Read, extract information from web pages in parallel to lower response time

- Time reading webpage, extract info from webpage steps for perf analysis - Deduplicate webpages to read gathered across separate google searches - Use aiohttp to make API requests non-blocking, pair with asyncio to parallelize all the online search webpage read and extract calls
2026-03-02 21:19:12 +00:00 · 2024-03-08 16:41:19 +05:30
parent b7fad04870
commit ca2f962e95
2 changed files with 49 additions and 43 deletions
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -7,7 +7,7 @@ import pytest
 from scipy.stats import linregress

 from khoj.processor.embeddings import EmbeddingsModel
-from khoj.processor.tools.online_search import search_with_olostep
+from khoj.processor.tools.online_search import read_webpage_with_olostep
 from khoj.utils import helpers


@@ -90,7 +90,7 @@ def test_olostep_api():
    website = "https://en.wikipedia.org/wiki/Great_Chicago_Fire"

    # Act
-    response = search_with_olostep(website)
+    response = read_webpage_with_olostep(website)

    # Assert
    assert (