From 351fb31a34fd3575e54efa11f7345d3733244413 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Sun, 7 Apr 2024 09:07:06 +0530 Subject: [PATCH] Add webpage search to socket codepath, add a feature page for online search --- documentation/docs/features/online_search.md | 17 +++++++++++++++++ src/khoj/routers/api_chat.py | 14 ++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 documentation/docs/features/online_search.md diff --git a/documentation/docs/features/online_search.md b/documentation/docs/features/online_search.md new file mode 100644 index 00000000..894d515d --- /dev/null +++ b/documentation/docs/features/online_search.md @@ -0,0 +1,17 @@ +# Online Search + +By default, Khoj will try to infer which information-sourcing tools are required to answer your question. Sometimes, you'll have a need for outside questions that the LLM's knowledge doesn't cover. In that case, it will use the `online` search feature. + +For example, these queries would trigger an online search: +- What's the latest news about the Israel-Palestine war? +- Where can I find the best pizza in New York City? +- Deadline for filing taxes 2024. +- Give me a summary of this article: https://en.wikipedia.org/wiki/Haitian_Revolution + +Try it out yourself! https://app.khoj.dev + +## Self-Hosting + +The general online search function currently requires an API key from Serper.dev. You can grab one here: https://serper.dev/, and then add it as an environment variable with the name `SERPER_DEV_API_KEY`. + +Without any API keys, Khoj will use the `requests` library to directly read any webpages you give it a link to. This means that you can use Khoj to read any webpage that you have access in your local network. diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 9336fd6f..57f26163 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -386,6 +386,20 @@ async def websocket_endpoint( ) continue + if ConversationCommand.Webpage in conversation_commands: + try: + await send_status_update("**Operation**: Directly searching web pages...") + online_results = await read_webpages(defiltered_query, meta_log, location) + webpages = [] + for query in online_results: + for webpage in online_results[query]["webpages"]: + webpages.append(webpage["link"]) + await send_status_update(f"**Web pages read**: {webpages}") + except ValueError as e: + logger.warning( + f"Error directly reading webpages: {e}. Attempting to respond without online results", exc_info=True + ) + if ConversationCommand.Image in conversation_commands: update_telemetry_state( request=websocket,