From 0f53a67837d4dd9a50b6d88e5b2fc8b1f5562451 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Sun, 11 May 2025 15:29:38 -0600 Subject: [PATCH] Prompt web page reader to extract quantitative data as is from pages Previously the research agent would have a hard time getting quantitative data extracted by the web page reader tool AI. This change aims to encourage the web page reader tool to extract relevant data in verbatim form for higher granularity research and responses. --- src/khoj/processor/conversation/prompts.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index b0cec27b..6ec2376b 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -666,21 +666,25 @@ As a professional analyst, your job is to extract all pertinent information from You will be provided raw text directly from within the document. Adhere to these guidelines while extracting information from the provided documents: -1. Extract all relevant text and links from the document that can assist with further research or answer the user's query. +1. Extract all relevant text and links from the document that can assist with further research or answer the target query. 2. Craft a comprehensive but compact report with all the necessary data from the document to generate an informed response. 3. Rely strictly on the provided text to generate your summary, without including external information. 4. Provide specific, important snippets from the document in your report to establish trust in your summary. +5. Verbatim quote all necessary text, code or data from the provided document to answer the target query. """.strip() extract_relevant_information = PromptTemplate.from_template( """ {personality_context} -Target Query: {query} + +{query} + -Document: + {corpus} + -Collate only relevant information from the document to answer the target query. +Collate all relevant information from the document to answer the target query. """.strip() )