mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 21:29:11 +00:00
Fix yield for scraping direct web page
This commit is contained in:
@@ -195,33 +195,34 @@ async def execute_information_collection(
|
|||||||
this_iteration.onlineContext = online_results
|
this_iteration.onlineContext = online_results
|
||||||
|
|
||||||
elif this_iteration.data_source == ConversationCommand.Webpage:
|
elif this_iteration.data_source == ConversationCommand.Webpage:
|
||||||
async for result in read_webpages(
|
try:
|
||||||
this_iteration.query,
|
async for result in read_webpages(
|
||||||
conversation_history,
|
this_iteration.query,
|
||||||
location,
|
conversation_history,
|
||||||
user,
|
location,
|
||||||
subscribed,
|
user,
|
||||||
send_status_func,
|
subscribed,
|
||||||
uploaded_image_url=uploaded_image_url,
|
send_status_func,
|
||||||
agent=agent,
|
uploaded_image_url=uploaded_image_url,
|
||||||
):
|
agent=agent,
|
||||||
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
):
|
||||||
yield result[ChatEvent.STATUS]
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
||||||
else:
|
yield result[ChatEvent.STATUS]
|
||||||
direct_web_pages: Dict[str, Dict] = result # type: ignore
|
else:
|
||||||
|
direct_web_pages: Dict[str, Dict] = result # type: ignore
|
||||||
|
|
||||||
webpages = []
|
webpages = []
|
||||||
for web_query in direct_web_pages:
|
for web_query in direct_web_pages:
|
||||||
if online_results.get(web_query):
|
if online_results.get(web_query):
|
||||||
online_results[web_query]["webpages"] = direct_web_pages[web_query]["webpages"]
|
online_results[web_query]["webpages"] = direct_web_pages[web_query]["webpages"]
|
||||||
else:
|
else:
|
||||||
online_results[web_query] = {"webpages": direct_web_pages[web_query]["webpages"]}
|
online_results[web_query] = {"webpages": direct_web_pages[web_query]["webpages"]}
|
||||||
|
|
||||||
for webpage in direct_web_pages[web_query]["webpages"]:
|
for webpage in direct_web_pages[web_query]["webpages"]:
|
||||||
webpages.append(webpage["link"])
|
webpages.append(webpage["link"])
|
||||||
yield send_status_func(f"**Read web pages**: {webpages}")
|
this_iteration.onlineContext = online_results
|
||||||
|
except Exception as e:
|
||||||
this_iteration.onlineContext = online_results
|
logger.error(f"Error reading webpages: {e}", exc_info=True)
|
||||||
|
|
||||||
# TODO: Fix summarize later
|
# TODO: Fix summarize later
|
||||||
# elif this_iteration.data_source == ConversationCommand.Summarize:
|
# elif this_iteration.data_source == ConversationCommand.Summarize:
|
||||||
|
|||||||
Reference in New Issue
Block a user