From 7ac241b766b22e95da8087d17573dab8d5863431 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 22 Oct 2024 00:34:49 -0700
Subject: [PATCH] Improve format of notes, online context passed to chat models
 in prompt

Improve separation of note snippets and show its origin file in notes
prompt to have more readable, contextualized text shared with model.

Previously the references dict was being directly passed as a string.
The documents don't look well formatted and are less intelligible.

- Passing file path along with notes snippets will help contextualize
  the notes better.
- Better formatting should help with making notes more readable by the
  chat model.
---
 src/khoj/processor/conversation/anthropic/anthropic_chat.py | 3 +--
 src/khoj/processor/conversation/google/gemini_chat.py       | 3 +--
 src/khoj/processor/conversation/openai/gpt.py               | 3 +--
 src/khoj/processor/conversation/prompts.py                  | 4 ++++
 src/khoj/processor/conversation/utils.py                    | 6 +++++-
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
index cb51abb4..5fb900c9 100644
--- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py
+++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
@@ -142,9 +142,8 @@ def converse_anthropic(
     """
     # Initialize Variables
     current_date = datetime.now()
-    compiled_references = "\n\n".join({f"# {item}" for item in references})
-
     conversation_primer = prompts.query_prompt.format(query=user_query)
+    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     if agent and agent.personality:
         system_prompt = prompts.custom_personality.format(
diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py
index 7359b3eb..f7cfad31 100644
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@@ -139,9 +139,8 @@ def converse_gemini(
     """
     # Initialize Variables
     current_date = datetime.now()
-    compiled_references = "\n\n".join({f"# {item}" for item in references})
-
     conversation_primer = prompts.query_prompt.format(query=user_query)
+    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     if agent and agent.personality:
         system_prompt = prompts.custom_personality.format(
diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py
index ad02b10e..293bdacd 100644
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@@ -143,9 +143,8 @@ def converse(
     """
     # Initialize Variables
     current_date = datetime.now()
-    compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references})
-
     conversation_primer = prompts.query_prompt.format(query=user_query)
+    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     if agent and agent.personality:
         system_prompt = prompts.custom_personality.format(
diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py
index ad164c8d..fb6a105b 100644
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -118,6 +118,7 @@ Use my personal notes and our past conversations to inform your response.
 Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided notes or past conversations.
 
 User's Notes:
+-----
 {references}
 """.strip()
 )
@@ -127,6 +128,7 @@ notes_conversation_offline = PromptTemplate.from_template(
 Use my personal notes and our past conversations to inform your response.
 
 User's Notes:
+-----
 {references}
 """.strip()
 )
@@ -184,6 +186,7 @@ Use this up-to-date information from the internet to inform your response.
 Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the online data or past conversations.
 
 Information from the internet:
+-----
 {online_results}
 """.strip()
 )
@@ -193,6 +196,7 @@ online_search_conversation_offline = PromptTemplate.from_template(
 Use this up-to-date information from the internet to inform your response.
 
 Information from the internet:
+-----
 {online_results}
 """.strip()
 )
diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index e841c484..56e9e9db 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -178,7 +178,11 @@ def generate_chatml_messages_with_context(
     # Extract Chat History for Context
     chatml_messages: List[ChatMessage] = []
     for chat in conversation_log.get("chat", []):
-        message_notes = f'\n\n Notes:\n{chat.get("context")}' if chat.get("context") else "\n"
+        references = "\n\n".join(
+            {f"# File: {item['file']}\n## {item['compiled']}\n" for item in chat.get("context") or []}
+        )
+        message_notes = f"\n\n Notes:\n{references}" if chat.get("context") else "\n"
+
         role = "user" if chat["by"] == "you" else "assistant"
 
         message_content = chat["message"] + message_notes