From e24ca9ec28a3d9a5ca6b3bf0ae5f0566347f71e5 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 26 May 2024 17:18:38 +0530 Subject: [PATCH] Pass file path of each doc reference in references returned by API - Pass file path of reference along with the compiled reference in list of references returned by chat API converts - Update the structure of references from list of strings to list of dictionary (containing 'compiled' and 'file' keys) - Pull out the compiled reference from the new references data struct wherever it was is being used --- src/interface/desktop/chat.html | 5 ++++- src/interface/obsidian/src/chat_view.ts | 3 ++- src/khoj/interface/web/chat.html | 5 ++++- src/khoj/processor/conversation/offline/chat_model.py | 2 +- src/khoj/processor/conversation/openai/gpt.py | 2 +- src/khoj/processor/conversation/utils.py | 2 +- src/khoj/routers/api.py | 10 ++++++---- src/khoj/routers/api_chat.py | 2 +- src/khoj/routers/helpers.py | 8 ++++---- 9 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/interface/desktop/chat.html b/src/interface/desktop/chat.html index 68ef9513..b1b31b71 100644 --- a/src/interface/desktop/chat.html +++ b/src/interface/desktop/chat.html @@ -62,7 +62,10 @@ return `${time_string}, ${date_string}`; } - function generateReference(reference, index) { + function generateReference(referenceJson, index) { + let reference = referenceJson.hasOwnProperty("compiled") ? referenceJson.compiled : referenceJson; + let referenceFile = referenceJson.hasOwnProperty("file") ? referenceJson.file : null; + // Escape reference for HTML rendering let escaped_ref = reference.replaceAll('"', '"'); diff --git a/src/interface/obsidian/src/chat_view.ts b/src/interface/obsidian/src/chat_view.ts index bd1d992d..755285e4 100644 --- a/src/interface/obsidian/src/chat_view.ts +++ b/src/interface/obsidian/src/chat_view.ts @@ -247,7 +247,8 @@ export class KhojChatView extends KhojPaneView { return referenceButton; } - generateReference(messageEl: Element, reference: string, index: number) { + generateReference(messageEl: Element, referenceJson: any, index: number) { + let reference: string = referenceJson.hasOwnProperty("compiled") ? referenceJson.compiled : referenceJson; // Escape reference for HTML rendering let escaped_ref = reference.replace(/"/g, """) diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index f8f43770..ca4117c7 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -103,7 +103,10 @@ To get started, just start typing below. You can also type / to see a list of co return `${time_string}, ${date_string}`; } - function generateReference(reference, index) { + function generateReference(referenceJson, index) { + let reference = referenceJson.hasOwnProperty("compiled") ? referenceJson.compiled : referenceJson; + let referenceFile = referenceJson.hasOwnProperty("file") ? referenceJson.file : null; + // Escape reference for HTML rendering let escaped_ref = reference.replaceAll('"', '"'); diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index a559df22..edc2d9f0 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -142,7 +142,7 @@ def converse_offline( # Initialize Variables assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured" offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size) - compiled_references_message = "\n\n".join({f"{item}" for item in references}) + compiled_references_message = "\n\n".join({f"{item['compiled']}" for item in references}) current_date = datetime.now().strftime("%Y-%m-%d") diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index c25f05fd..8360a32e 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -127,7 +127,7 @@ def converse( """ # Initialize Variables current_date = datetime.now().strftime("%Y-%m-%d") - compiled_references = "\n\n".join({f"# {item}" for item in references}) + compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references}) conversation_primer = prompts.query_prompt.format(query=user_query) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 775848c8..5d68d17d 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -96,7 +96,7 @@ def save_to_conversation_log( user: KhojUser, meta_log: Dict, user_message_time: str = None, - compiled_references: List[str] = [], + compiled_references: List[Dict[str, Any]] = [], online_results: Dict[str, Any] = {}, inferred_queries: List[str] = [], intent_type: str = "remember", diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 70245f72..067d18b3 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -342,14 +342,14 @@ async def extract_references_and_questions( # Collate search results as context for GPT with timer("Searching knowledge base took", logger): - result_list = [] + search_results = [] logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}") if send_status_func: inferred_queries_str = "\n- " + "\n- ".join(inferred_queries) await send_status_func(f"**🔍 Searching Documents for:** {inferred_queries_str}") for query in inferred_queries: n_items = min(n, 3) if using_offline_chat else n - result_list.extend( + search_results.extend( await execute_search( user, f"{query} {filters_in_query}", @@ -360,8 +360,10 @@ async def extract_references_and_questions( dedupe=False, ) ) - result_list = text_search.deduplicated_search_responses(result_list) - compiled_references = [item.additional["compiled"] for item in result_list] + search_results = text_search.deduplicated_search_responses(search_results) + compiled_references = [ + {"compiled": item.additional["compiled"], "file": item.additional["file"]} for item in search_results + ] return compiled_references, inferred_queries, defiltered_query diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index d7783528..30534b13 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -434,7 +434,7 @@ async def websocket_endpoint( if compiled_references: headings = "\n- " + "\n- ".join( - set([" ".join(c.split("Path: ")[1:]).split("\n ")[0] for c in compiled_references]) + set([" ".join(c.get("compiled", c).split("Path: ")[1:]).split("\n ")[0] for c in compiled_references]) ) await send_status_update(f"**📜 Found Relevant Notes**: {headings}") diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index cb3f5491..85852331 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -400,7 +400,7 @@ async def generate_better_image_prompt( q: str, conversation_history: str, location_data: LocationData, - note_references: List[str], + note_references: List[Dict[str, Any]], online_results: Optional[dict] = None, ) -> str: """ @@ -415,7 +415,7 @@ async def generate_better_image_prompt( else: location_prompt = "Unknown" - user_references = "\n\n".join([f"# {item}" for item in note_references]) + user_references = "\n\n".join([f"# {item['compiled']}" for item in note_references]) simplified_online_results = {} @@ -550,7 +550,7 @@ def generate_chat_response( q: str, meta_log: dict, conversation: Conversation, - compiled_references: List[str] = [], + compiled_references: List[Dict] = [], online_results: Dict[str, Dict] = {}, inferred_queries: List[str] = [], conversation_commands: List[ConversationCommand] = [ConversationCommand.Default], @@ -634,7 +634,7 @@ async def text_to_image( user: KhojUser, conversation_log: dict, location_data: LocationData, - references: List[str], + references: List[Dict[str, Any]], online_results: Dict[str, Any], send_status_func: Optional[Callable] = None, ) -> Tuple[Optional[str], int, Optional[str], str]: