Parallelize simple webpage read and extractor

Similar to what is being done with search_online with olostep
This commit is contained in:
Debanjum Singh Solanky
2024-03-14 16:34:04 +05:30
parent 1167f6ddf9
commit 71b6905008

View File

@@ -100,12 +100,16 @@ def search_with_google(subquery: str):
async def read_webpages(query: str, conversation_history: dict, location: LocationData): async def read_webpages(query: str, conversation_history: dict, location: LocationData):
"Infer web pages to read from the query and extract relevant information from them" "Infer web pages to read from the query and extract relevant information from them"
logger.info(f"Inferring web pages to read")
urls = await infer_webpage_urls(query, conversation_history, location) urls = await infer_webpage_urls(query, conversation_history, location)
results: Dict[str, Dict[str, str]] = defaultdict(dict)
for url in urls: logger.info(f"Reading web pages at: {urls}")
_, result = await read_webpage_and_extract_content(query, url) tasks = [read_webpage_and_extract_content(query, url) for url in urls]
results[url]["webpages"] = result results = await asyncio.gather(*tasks)
return results
response: Dict[str, Dict[str, str]] = defaultdict(dict)
response[query]["webpages"] = [web_extract for _, web_extract in results if web_extract is not None]
return response
async def read_webpage_and_extract_content(subquery: str, url: str) -> Tuple[str, Union[None, str]]: async def read_webpage_and_extract_content(subquery: str, url: str) -> Tuple[str, Union[None, str]]: