Extract constructing question history into shared function for reuse

Minor logic update to only include non image inferred queries for gemini, anthropic models as well instead of just for openai models. Apart from that the extracted function should be functionally same.
2026-04-20 01:24:31 +00:00 · 2025-05-29 10:59:13 -07:00
parent da663e184c
commit d511cbfa34
5 changed files with 47 additions and 28 deletions
--- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py
+++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
@@ -17,6 +17,7 @@ from khoj.processor.conversation.utils import (
    OperatorRun,
    ResponseWithThought,
    clean_json,
+    construct_question_history,
    construct_structured_message,
    generate_chatml_messages_with_context,
    messages_to_print,
@@ -54,13 +55,7 @@ def extract_questions_anthropic(
    username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""

    # Extract Past User Message and Inferred Questions from Conversation Log
-    chat_history = "".join(
-        [
-            f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
-            for chat in conversation_log.get("chat", [])[-4:]
-            if chat["by"] == "khoj"
-        ]
-    )
+    chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant")

    # Get dates relative to today for prompt creation
    today = datetime.today()
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@@ -16,6 +16,7 @@ from khoj.processor.conversation.google.utils import (
 from khoj.processor.conversation.utils import (
    OperatorRun,
    clean_json,
+    construct_question_history,
    construct_structured_message,
    generate_chatml_messages_with_context,
    messages_to_print,
@@ -54,13 +55,7 @@ def extract_questions_gemini(
    username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""

    # Extract Past User Message and Inferred Questions from Conversation Log
-    chat_history = "".join(
-        [
-            f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
-            for chat in conversation_log.get("chat", [])[-4:]
-            if chat["by"] == "khoj"
-        ]
-    )
+    chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant")

    # Get dates relative to today for prompt creation
    today = datetime.today()
--- a/src/khoj/processor/conversation/offline/chat_model.py
+++ b/src/khoj/processor/conversation/offline/chat_model.py
@@ -16,6 +16,7 @@ from khoj.processor.conversation.offline.utils import download_model
 from khoj.processor.conversation.utils import (
    clean_json,
    commit_conversation_trace,
+    construct_question_history,
    generate_chatml_messages_with_context,
    messages_to_print,
 )
@@ -64,13 +65,7 @@ def extract_questions_offline(
    username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""

    # Extract Past User Message and Inferred Questions from Conversation Log
-    chat_history = ""
-
-    if use_history:
-        for chat in conversation_log.get("chat", [])[-4:]:
-            if chat["by"] == "khoj":
-                chat_history += f"Q: {chat['intent']['query']}\n"
-                chat_history += f"Khoj: {chat['message']}\n\n"
+    chat_history = construct_question_history(conversation_log, include_query=False) if use_history else ""

    # Get dates relative to today for prompt creation
    today = datetime.today()
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@@ -20,6 +20,7 @@ from khoj.processor.conversation.utils import (
    OperatorRun,
    ResponseWithThought,
    clean_json,
+    construct_question_history,
    construct_structured_message,
    generate_chatml_messages_with_context,
    messages_to_print,
@@ -56,13 +57,7 @@ def extract_questions(
    username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""

    # Extract Past User Message and Inferred Questions from Conversation Log
-    chat_history = "".join(
-        [
-            f'Q: {chat["intent"]["query"]}\nKhoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
-            for chat in conversation_log.get("chat", [])[-4:]
-            if chat["by"] == "khoj" and "to-image" not in chat["intent"].get("type")
-        ]
-    )
+    chat_history = construct_question_history(conversation_log)

    # Get dates relative to today for prompt creation
    today = datetime.today()
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -218,6 +218,45 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
    return chat_history


+def construct_question_history(
+    conversation_log: dict,
+    include_query: bool = True,
+    lookback: int = 4,
+    query_prefix: str = "Q",
+    agent_name: str = "Khoj",
+) -> str:
+    """
+    Constructs a chat history string formatted for query extraction purposes.
+    """
+    history_parts = ""
+    for chat in conversation_log.get("chat", [])[-lookback:]:
+        if chat["by"] == "khoj":
+            original_query = chat.get("intent", {}).get("query")
+            if original_query is None:
+                continue
+
+            message = chat.get("message", "")
+            inferred_queries_list = chat.get("intent", {}).get("inferred-queries")
+
+            # Ensure inferred_queries_list is a list, defaulting to the original query in a list
+            if not inferred_queries_list:
+                inferred_queries_list = [original_query]
+            # If it's a string (though unlikely based on usage), wrap it in a list
+            elif isinstance(inferred_queries_list, str):
+                inferred_queries_list = [inferred_queries_list]
+
+            if include_query:
+                # Ensure 'type' exists and is a string before checking 'to-image'
+                intent_type = chat.get("intent", {}).get("type", "")
+                if "to-image" not in intent_type:
+                    history_parts += f'{agent_name}: {{"queries": {inferred_queries_list}}}\n'
+                    history_parts += f"A: {message}\n\n"
+            else:
+                history_parts += f"{agent_name}: {message}\n\n"
+
+    return history_parts
+
+
 def construct_tool_chat_history(
    previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None
 ) -> Dict[str, list]: