Add trajectory compression to anthropic operator agent

- Add compression parameters to base operator agent for reuse - Increase default operator iterations
2026-03-02 13:18:18 +00:00 · 2025-05-28 00:28:34 -07:00
parent cb451fa67c
commit d54bfc19e5
3 changed files with 31 additions and 2 deletions
--- a/src/khoj/processor/operator/init.py
+++ b/src/khoj/processor/operator/init.py
@@ -50,7 +50,7 @@ async def operate_environment(
        raise ValueError(f"No vision enabled chat model found. Configure a vision chat model to operate environment.")

    # Initialize Agent
-    max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 40))
+    max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 100))
    operator_agent: OperatorAgent
    if is_operator_model(reasoning_model.name) == ChatModel.ModelType.OPENAI:
        operator_agent = OpenAIOperatorAgent(query, reasoning_model, environment_type, max_iterations, tracer)
--- a/src/khoj/processor/operator/operator_agent_anthropic.py
+++ b/src/khoj/processor/operator/operator_agent_anthropic.py
@@ -6,7 +6,7 @@ from datetime import datetime
 from textwrap import dedent
 from typing import List, Literal, Optional, cast

-from anthropic.types.beta import BetaContentBlock
+from anthropic.types.beta import BetaContentBlock, BetaTextBlock, BetaToolUseBlock

 from khoj.processor.conversation.anthropic.utils import is_reasoning_model
 from khoj.processor.operator.operator_actions import *
@@ -47,6 +47,24 @@ class AnthropicOperatorAgent(OperatorAgent):
        if is_reasoning_model(self.vision_model.name):
            thinking = {"type": "enabled", "budget_tokens": 1024}

+        # Trigger trajectory compression if exceed size limit
+        if len(self.messages) > self.message_limit:
+            # 1. Prepare messages for compression
+            original_messages = self.messages
+            self.messages = self.messages[: self.compress_length]
+            # ensure last message isn't a tool call request
+            if self.messages[-1].role == "assistant" and any(
+                isinstance(block, BetaToolUseBlock) for block in self.messages[-1].content
+            ):
+                self.messages.pop()
+            # 2. Get summary of operation trajectory
+            await self.summarize(current_state)
+            # 3. Rebuild condensed trajectory
+            primary_task = [original_messages.pop(0)]
+            condensed_trajectory = self.messages[-2:]  # extract summary request, response
+            recent_trajectory = original_messages[self.compress_length :]
+            self.messages = primary_task + condensed_trajectory + recent_trajectory
+
        messages_for_api = self._format_message_for_api(self.messages)
        response = await client.beta.messages.create(
            messages=messages_for_api,
--- a/src/khoj/processor/operator/operator_agent_base.py
+++ b/src/khoj/processor/operator/operator_agent_base.py
@@ -40,6 +40,17 @@ class OperatorAgent(ABC):
        self.messages: List[AgentMessage] = []
        self.summarize_prompt = f"Use the results of our research to provide a comprehensive, self-contained answer for the target query:\n{query}."

+        # Context compression parameters
+        self.context_compress_trigger = 2e3  # heuristic to determine compression trigger
+        # turns after which compression triggered. scales with model max context size. Minimum 5 turns.
+        self.message_limit = 2 * max(
+            5, int(self.vision_model.subscribed_max_prompt_size / self.context_compress_trigger)
+        )
+        # compression ratio determines how many messages to compress down to one
+        # e.g. if 5 messages, a compress ratio of 4/5 means compress 5 messages into 1 + keep 1 uncompressed
+        self.message_compress_ratio = 4 / 5
+        self.compress_length = int(self.message_limit * self.message_compress_ratio)
+
    @abstractmethod
    async def act(self, current_state: EnvState) -> AgentActResult:
        pass