Parallelize simple webpage read and extractor

Similar to what is being done with search_online with olostep
This commit is contained in:
Debanjum Singh Solanky
2024-03-14 16:34:04 +05:30
parent 1167f6ddf9
commit 71b6905008

View File

@@ -100,12 +100,16 @@ def search_with_google(subquery: str):
async def read_webpages(query: str, conversation_history: dict, location: LocationData):
"Infer web pages to read from the query and extract relevant information from them"
logger.info(f"Inferring web pages to read")
urls = await infer_webpage_urls(query, conversation_history, location)
results: Dict[str, Dict[str, str]] = defaultdict(dict)
for url in urls:
_, result = await read_webpage_and_extract_content(query, url)
results[url]["webpages"] = result
return results
logger.info(f"Reading web pages at: {urls}")
tasks = [read_webpage_and_extract_content(query, url) for url in urls]
results = await asyncio.gather(*tasks)
response: Dict[str, Dict[str, str]] = defaultdict(dict)
response[query]["webpages"] = [web_extract for _, web_extract in results if web_extract is not None]
return response
async def read_webpage_and_extract_content(subquery: str, url: str) -> Tuple[str, Union[None, str]]: