Structure research iteration msgs for more granular context management

Previously research iterations and conversation logs were added to a single user message. This prevented truncating each past iteration separately on hitting context limits. So the whole past research context had to be dropped on hitting context limits. This change splits each research iteration into a separate item in a message content list. It uses the ability for message content to be a list, that is supported by all major ai model apis like openai, anthropic and gemini. The change in message format seen by pick next tool chat actor: - New Format - System: System Message - User/Assistant: Chat History - User: Raw Query - Assistant: Iteration History - Iteration 1 - Iteration 2 - User: Query with Pick Next Tool Nudge - Old Format - User: System + Chat History + Previous Iterations Message - User: Query - Collateral Changes The construct_structured_message function has been updated to always return a list[dict[str, Any]]. Previously it'd only use list if attached_file_context or vision model with images for wider compatibility with other openai compatible api
2026-03-02 21:19:12 +00:00 · 2025-05-10 15:44:50 -06:00
parent 0f53a67837
commit a337d9e4b8
3 changed files with 54 additions and 29 deletions
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -762,29 +762,32 @@ Assuming you can search the user's notes and the internet.
 - User Name: {username}

 # Available Tool AIs
-Which of the tool AIs listed below would you use to answer the user's question? You **only** have access to the following tool AIs:
+You decide which of the tool AIs listed below would you use to answer the user's question. You **only** have access to the following tool AIs:

 {tools}

-# Previous Iterations
-{previous_iterations}
-
-# Chat History:
-{chat_history}
-
-Return the next tool AI to use and the query to ask it. Your response should always be a valid JSON object. Do not say anything else.
+Your response should always be a valid JSON object. Do not say anything else.
 Response format:
 {{"scratchpad": "<your_scratchpad_to_reason_about_which_tool_to_use>", "tool": "<name_of_tool_ai>", "query": "<your_detailed_query_for_the_tool_ai>"}}
 """.strip()
 )

+plan_function_execution_next_tool = PromptTemplate.from_template(
+    """
+Given the results of your previous iterations, which tool AI will you use next to answer the target query?
+
+# Target Query:
+{query}
+""".strip()
+)
+
 previous_iteration = PromptTemplate.from_template(
    """
-## Iteration {index}:
+# Iteration {index}:
 - tool: {tool}
 - query: {query}
 - result: {result}
-"""
+""".strip()
 )

 pick_relevant_tools = PromptTemplate.from_template(
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -105,9 +105,9 @@ class InformationCollectionIteration:


 def construct_iteration_history(
-    previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
-) -> str:
-    previous_iterations_history = ""
+    query: str, previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
+) -> list[dict]:
+    previous_iterations_history = []
    for idx, iteration in enumerate(previous_iterations):
        iteration_data = previous_iteration_prompt.format(
            tool=iteration.tool,
@@ -116,8 +116,23 @@ def construct_iteration_history(
            index=idx + 1,
        )

-        previous_iterations_history += iteration_data
-    return previous_iterations_history
+        previous_iterations_history.append(iteration_data)
+
+    return (
+        [
+            {
+                "by": "you",
+                "message": query,
+            },
+            {
+                "by": "khoj",
+                "intent": {"type": "remember", "query": query},
+                "message": previous_iterations_history,
+            },
+        ]
+        if previous_iterations_history
+        else []
+    )


 def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
@@ -316,7 +331,11 @@ Khoj: "{chat_response}"


 def construct_structured_message(
-    message: str, images: list[str], model_type: str, vision_enabled: bool, attached_file_context: str = None
+    message: list[str] | str,
+    images: list[str],
+    model_type: str,
+    vision_enabled: bool,
+    attached_file_context: str = None,
 ):
    """
    Format messages into appropriate multimedia format for supported chat model types
@@ -326,10 +345,11 @@ def construct_structured_message(
        ChatModel.ModelType.GOOGLE,
        ChatModel.ModelType.ANTHROPIC,
    ]:
-        if not attached_file_context and not (vision_enabled and images):
-            return message
+        message = [message] if isinstance(message, str) else message

-        constructed_messages: List[Any] = [{"type": "text", "text": message}]
+        constructed_messages: List[dict[str, Any]] = [
+            {"type": "text", "text": message_part} for message_part in message
+        ]

        if not is_none_or_empty(attached_file_context):
            constructed_messages.append({"type": "text", "text": attached_file_context})
--- a/src/khoj/routers/research.py
+++ b/src/khoj/routers/research.py
@@ -108,13 +108,6 @@ async def apick_next_tool(
    # Create planning reponse model with dynamically populated tool enum class
    planning_response_model = PlanningResponse.create_model_with_enum(tool_options)

-    # Construct chat history with user and iteration history with researcher agent for context
-    chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
-    previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
-
-    if query_images:
-        query = f"[placeholder for user attached images]\n{query}"
-
    today = datetime.today()
    location_data = f"{location}" if location else "Unknown"
    agent_chat_model = AgentAdapters.get_agent_chat_model(agent, user) if agent else None
@@ -124,21 +117,30 @@ async def apick_next_tool(

    function_planning_prompt = prompts.plan_function_execution.format(
        tools=tool_options_str,
-        chat_history=chat_history,
        personality_context=personality_context,
        current_date=today.strftime("%Y-%m-%d"),
        day_of_week=today.strftime("%A"),
        username=user_name or "Unknown",
        location=location_data,
-        previous_iterations=previous_iterations_history,
        max_iterations=max_iterations,
    )

+    if query_images:
+        query = f"[placeholder for user attached images]\n{query}"
+
+    # Construct chat history with user and iteration history with researcher agent for context
+    previous_iterations_history = construct_iteration_history(query, previous_iterations, prompts.previous_iteration)
+    iteration_chat_log = {"chat": conversation_history.get("chat", []) + previous_iterations_history}
+
+    # Plan function execution for the next tool
+    query = prompts.plan_function_execution_next_tool.format(query=query) if previous_iterations_history else query
+
    try:
        with timer("Chat actor: Infer information sources to refer", logger):
            response = await send_message_to_model_wrapper(
                query=query,
-                context=function_planning_prompt,
+                system_message=function_planning_prompt,
+                conversation_log=iteration_chat_log,
                response_type="json_object",
                response_schema=planning_response_model,
                deepthought=True,