Read, extract information from web pages in parallel to lower response time

- Time reading webpage, extract info from webpage steps for perf
  analysis
- Deduplicate webpages to read gathered across separate google
  searches
- Use aiohttp to make API requests non-blocking, pair with asyncio to
  parallelize all the online search webpage read and extract calls
This commit is contained in:
Debanjum Singh Solanky
2024-03-08 16:41:19 +05:30
parent b7fad04870
commit ca2f962e95
2 changed files with 49 additions and 43 deletions

View File

@@ -7,7 +7,7 @@ import pytest
from scipy.stats import linregress
from khoj.processor.embeddings import EmbeddingsModel
from khoj.processor.tools.online_search import search_with_olostep
from khoj.processor.tools.online_search import read_webpage_with_olostep
from khoj.utils import helpers
@@ -90,7 +90,7 @@ def test_olostep_api():
website = "https://en.wikipedia.org/wiki/Great_Chicago_Fire"
# Act
response = search_with_olostep(website)
response = read_webpage_with_olostep(website)
# Assert
assert (