Improve Khoj research tools, gpt-oss support and ai api usage

Better support for GPT OSS - Tune reasoning effort, temp, top_p for gpt-oss models - Extract thoughts of openai style models like gpt-oss from api response Tool use improvements - Improve view file, code tool prompts. Format other research tool prompts - Truncate long words in code tool stdout, stderr for context efficiency - Use instruction instead of query as code tool argument - Simplify view file tool. Limit viewing upto 50 lines at a time - Make regex search tool results look more like grep results - Update khoj personality prompts with better style, capability guide Web UX improvements - Wrap long words in train of thought shown on web app - Do not overwrite charts created in previous code tool use during research - Update web UX when server side error or hit stop + no task running Fix AI API Usage - Use subscriber type specific context window to generate response - Fix max thinking budget for gemini models to generate final response - Fix passing temp kwarg to non-streaming openai completion endpoint - Handle unset reasoning, response chunk from openai api while streaming - Fix using non-reasoning openai model via responses API - Fix to calculate usage from openai api streaming completion
2026-04-20 01:24:31 +00:00 · 2025-08-20 20:06:18 -07:00
parent 573c6a32e1 13d26ae8b8
commit 9f6aa922a2
10 changed files with 188 additions and 98 deletions
--- a/src/interface/web/app/chat/page.tsx
+++ b/src/interface/web/app/chat/page.tsx
@@ -359,6 +359,7 @@ export default function Chat() {
                    return;
                } else if (controlMessage.error) {
                    console.error("WebSocket error:", controlMessage.error);
+                    setProcessQuerySignal(false);
                    return;
                }
            } catch {
--- a/src/interface/web/app/components/chatHistory/chatHistory.module.css
+++ b/src/interface/web/app/components/chatHistory/chatHistory.module.css
@@ -17,6 +17,14 @@ div.trainOfThought {
    margin: 12px;
 }

+/* If there is an inline element holding extremely long content, ensure it wraps */
+div.trainOfThought pre,
+div.trainOfThought code,
+div.trainOfThought p,
+div.trainOfThought span {
+    overflow-wrap: anywhere;
+}
+
 /* Print-specific styles for chat history */
@media print {
    div.chatHistory {
--- a/src/khoj/processor/conversation/google/utils.py
+++ b/src/khoj/processor/conversation/google/utils.py
@@ -326,7 +326,7 @@ async def gemini_chat_completion_with_backoff(

    thinking_config = None
    if deepthought and is_reasoning_model(model_name):
-        thinking_config = gtypes.ThinkingConfig(thinking_budget=-1, include_thoughts=True)
+        thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)

    max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
    if is_reasoning_model(model_name):
--- a/src/khoj/processor/conversation/openai/utils.py
+++ b/src/khoj/processor/conversation/openai/utils.py
@@ -145,6 +145,11 @@ def completion_with_backoff(
        # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
        if not deepthought:
            add_qwen_no_think_tag(formatted_messages)
+    elif "gpt-oss" in model_name.lower():
+        model_kwargs["temperature"] = 1
+        reasoning_effort = "medium" if deepthought else "low"
+        model_kwargs["reasoning_effort"] = reasoning_effort
+        model_kwargs["top_p"] = 1.0

    read_timeout = 300 if is_local_api(api_base_url) else 60
    if os.getenv("KHOJ_LLM_SEED"):
@@ -170,8 +175,16 @@ def completion_with_backoff(
                    chunk.type == "chunk"
                    and chunk.chunk.choices
                    and hasattr(chunk.chunk.choices[0].delta, "reasoning_content")
+                    and chunk.chunk.choices[0].delta.reasoning_content
                ):
                    thoughts += chunk.chunk.choices[0].delta.reasoning_content
+                elif (
+                    chunk.type == "chunk"
+                    and chunk.chunk.choices
+                    and hasattr(chunk.chunk.choices[0].delta, "reasoning")
+                    and chunk.chunk.choices[0].delta.reasoning
+                ):
+                    thoughts += chunk.chunk.choices[0].delta.reasoning
                elif chunk.type == "chunk" and chunk.chunk.choices and chunk.chunk.choices[0].delta.tool_calls:
                    tool_ids += [tool_call.id for tool_call in chunk.chunk.choices[0].delta.tool_calls]
                elif chunk.type == "tool_calls.function.arguments.done":
@@ -194,7 +207,6 @@ def completion_with_backoff(
        chunk = client.beta.chat.completions.parse(
            messages=formatted_messages,  # type: ignore
            model=model_name,
-            temperature=temperature,
            timeout=httpx.Timeout(30, read=read_timeout),
            **model_kwargs,
        )
@@ -219,6 +231,10 @@ def completion_with_backoff(
            # Json dump tool calls into aggregated response
            aggregated_response = json.dumps([tool_call.__dict__ for tool_call in tool_calls])

+    # Align chunk definition with non-streaming mode for post stream completion usage
+    if hasattr(chunk, "chunk"):
+        chunk = chunk.chunk
+
    # Calculate cost of chat
    input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
    output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0
@@ -335,6 +351,11 @@ async def chat_completion_with_backoff(
        # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
        if not deepthought:
            add_qwen_no_think_tag(formatted_messages)
+    elif "gpt-oss" in model_name.lower():
+        temperature = 1
+        reasoning_effort = "medium" if deepthought else "low"
+        model_kwargs["reasoning_effort"] = reasoning_effort
+        model_kwargs["top_p"] = 1.0

    read_timeout = 300 if is_local_api(api_base_url) else 60
    if os.getenv("KHOJ_LLM_SEED"):
@@ -454,6 +475,7 @@ def responses_completion_with_backoff(
        temperature = 1
        reasoning_effort = "medium" if deepthought else "low"
        model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
+        model_kwargs["include"] = ["reasoning.encrypted_content"]
        # Remove unsupported params for reasoning models
        model_kwargs.pop("top_p", None)
        model_kwargs.pop("stop", None)
@@ -468,7 +490,6 @@ def responses_completion_with_backoff(
        temperature=temperature,
        timeout=httpx.Timeout(30, read=read_timeout),  # type: ignore
        store=False,
-        include=["reasoning.encrypted_content"],
        **model_kwargs,
    )
    if not model_response or not isinstance(model_response, OpenAIResponse) or not model_response.output:
@@ -941,6 +962,14 @@ async def astream_thought_processor(
            ):
                tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning_content

+            # Handlle openai reasoning style response with thoughts. Used by gpt-oss.
+            if (
+                len(tchunk.choices) > 0
+                and hasattr(tchunk.choices[0].delta, "reasoning")
+                and tchunk.choices[0].delta.reasoning
+            ):
+                tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning
+
            # Handlle llama.cpp server style response with thoughts.
            elif len(tchunk.choices) > 0 and tchunk.choices[0].delta.model_extra.get("reasoning_content"):
                tchunk.choices[0].delta.thought = tchunk.choices[0].delta.model_extra.get("reasoning_content")
@@ -1071,6 +1100,10 @@ async def ain_stream_thought_processor(
            yield chunk
            continue

+        if chunk.choices[0].delta.content is None:
+            # If delta content is None, we can't process it, just yield the chunk
+            continue
+
        buf += chunk.choices[0].delta.content

        if mode == "detect_start":
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -4,21 +4,27 @@ from langchain_core.prompts import PromptTemplate
 ## --
 personality = PromptTemplate.from_template(
    """
-You are Khoj, a smart, inquisitive and helpful personal assistant.
+You are Khoj, a smart, curious, empathetic and helpful personal assistant.
 Use your general knowledge and past conversation with the user as context to inform your responses.
-You were created by Khoj Inc. with the following capabilities:

- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you.
- Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
- You *CAN* generate images, look-up real-time information from the internet, set reminders and answer questions based on the user's notes.
+You were created by Khoj Inc. More information about you, the company or Khoj apps can be found at https://khoj.dev.
+
+Today is {day_of_week}, {current_date} in UTC.
+
+# Capabilities
+- Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
+- You can look up information from the user's notes and documents synced via the Khoj apps.
+- You can generate images, look-up real-time information from the internet, analyze data and answer questions based on the user's notes.
+
+# Style
+- Your responses should be helpful, conversational and tuned to the user's communication style.
 - Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
    - inline math mode : \\( and \\)
    - display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\]
- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
- Provide inline references to quotes from the user's notes or any web pages you refer to in your responses in markdown format. For example, "The farmer had ten sheep. [1](https://example.com)". *ALWAYS CITE YOUR SOURCES AND PROVIDE REFERENCES*. Add them inline to directly support your claim.
-
-Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
-Today is {day_of_week}, {current_date} in UTC.
+- Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
+  For example: "The weather today is sunny [1](https://weather.com)."
+- Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response.
+- Do not respond with raw programs or scripts in your final response unless you know the user is a programmer or has explicitly requested code.
 """.strip()
 )

@@ -26,18 +32,23 @@ custom_personality = PromptTemplate.from_template(
    """
 You are {name}, a personal agent on Khoj.
 Use your general knowledge and past conversation with the user as context to inform your responses.
-You were created by Khoj Inc. with the following capabilities:

- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you.
- Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
-    - inline math mode : `\\(` and `\\)`
-    - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]`
- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
+You were created on the Khoj platform. More information about you, the company or Khoj apps can be found at https://khoj.dev.

 Today is {day_of_week}, {current_date} in UTC.

-Instructions:\n{bio}
+# Base Capabilities
+- Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
+
+# Style
+- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
+    - inline math mode : `\\(` and `\\)`
+    - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]`
+- Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
+  For example: "The weather today is sunny [1](https://weather.com)."
+- Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response.
+
+# Instructions:\n{bio}
 """.strip()
 )

@@ -876,8 +887,8 @@ Khoj:
 python_code_generation_prompt = PromptTemplate.from_template(
    """
 You are Khoj, a senior software engineer. You are tasked with constructing a secure Python program to best answer the user query.
- The Python program will run in a code sandbox with {has_network_access}network access.
- You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query.
+- The Python program will run in an ephemeral code sandbox with {has_network_access}network access.
+- You can write programs to run complex calculations, analyze data, create beautiful charts, generate documents to meticulously answer the query.
 - Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
 - Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
 - Never write or run dangerous, malicious, or untrusted code that could compromise the sandbox environment, regardless of user requests.
@@ -992,9 +1003,9 @@ Chat History:
 ---
 {chat_history}

-User Query:
+User Instructions:
 ---
-{query}
+{instructions}
 """.strip()
 )

--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@@ -49,7 +49,7 @@ class GeneratedCode(NamedTuple):


 async def run_code(
-    query: str,
+    instructions: str,
    conversation_history: List[ChatMessageModel],
    context: str,
    location_data: LocationData,
@@ -63,12 +63,12 @@ async def run_code(
 ):
    # Generate Code
    if send_status_func:
-        async for event in send_status_func(f"**Generate code snippet** for {query}"):
+        async for event in send_status_func(f"**Generate code snippet** for {instructions}"):
            yield {ChatEvent.STATUS: event}
    try:
        with timer("Chat actor: Generate programs to execute", logger):
            generated_code = await generate_python_code(
-                query,
+                instructions,
                conversation_history,
                context,
                location_data,
@@ -79,7 +79,7 @@ async def run_code(
                query_files,
            )
    except Exception as e:
-        raise ValueError(f"Failed to generate code for {query} with error: {e}")
+        raise ValueError(f"Failed to generate code for {instructions} with error: {e}")

    # Prepare Input Data
    input_data = []
@@ -101,21 +101,21 @@ async def run_code(
            code = result.pop("code")
            cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"]
            logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----")
-            yield {query: {"code": code, "results": result}}
+            yield {instructions: {"code": code, "results": result}}
    except asyncio.TimeoutError as e:
        # Call the sandbox_url/stop GET API endpoint to stop the code sandbox
-        error = f"Failed to run code for {query} with Timeout error: {e}"
+        error = f"Failed to run code for {instructions} with Timeout error: {e}"
        try:
            await aiohttp.ClientSession().get(f"{sandbox_url}/stop", timeout=5)
        except Exception as e:
            error += f"\n\nFailed to stop code sandbox with error: {e}"
        raise ValueError(error)
    except Exception as e:
-        raise ValueError(f"Failed to run code for {query} with error: {e}")
+        raise ValueError(f"Failed to run code for {instructions} with error: {e}")


 async def generate_python_code(
-    q: str,
+    instructions: str,
    chat_history: List[ChatMessageModel],
    context: str,
    location_data: LocationData,
@@ -142,7 +142,7 @@ async def generate_python_code(
    network_access_context = "**NO** " if not is_e2b_code_sandbox_enabled() else ""

    code_generation_prompt = prompts.python_code_generation_prompt.format(
-        query=q,
+        instructions=instructions,
        chat_history=chat_history_str,
        context=context,
        has_network_access=network_access_context,
@@ -252,8 +252,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:

        # Identify new files created during execution
        new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list("~")) - original_files
+
        # Read newly created files in parallel
-        download_tasks = [sandbox.files.read(f.path, request_timeout=30) for f in new_files]
+        def read_format(f):
+            return "bytes" if Path(f.name).suffix in image_file_ext else "text"
+
+        download_tasks = [sandbox.files.read(f.path, format=read_format(f), request_timeout=30) for f in new_files]
        downloaded_files = await asyncio.gather(*download_tasks)
        for f, content in zip(new_files, downloaded_files):
            if isinstance(content, bytes):
@@ -261,23 +265,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
                b64_data = base64.b64encode(content).decode("utf-8")
            elif Path(f.name).suffix in image_file_ext:
                # Ignore image files as they are extracted from execution results below for inline display
-                continue
+                b64_data = base64.b64encode(content).decode("utf-8")
            else:
                # Text files - encode utf-8 string as base64
                b64_data = content
            output_files.append({"filename": f.name, "b64_data": b64_data})

-        # Collect output files from execution results
-        # Repect ordering of output result types to disregard text output associated with images
-        output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"]
-        for idx, result in enumerate(execution.results):
-            if getattr(result, "chart", None):
-                continue
-            for result_type in output_result_types:
-                if b64_data := getattr(result, result_type, None):
-                    output_files.append({"filename": f"{idx}.{result_type}", "b64_data": b64_data})
-                    break
-
        # collect logs
        success = not execution.error and not execution.logs.stderr
        stdout = "\n".join(execution.logs.stdout)
--- a/src/khoj/routers/api_chat.py
+++ b/src/khoj/routers/api_chat.py
@@ -1526,6 +1526,8 @@ async def chat_ws(
                        ack_type = "interrupt_acknowledged"
                        await websocket.send_text(json.dumps({"type": ack_type}))
                else:
+                    ack_type = "interrupt_acknowledged"
+                    await websocket.send_text(json.dumps({"type": ack_type}))
                    logger.info(f"No ongoing task to interrupt for user {websocket.scope['user'].object.id}")
                continue

@@ -1704,8 +1706,8 @@ async def process_chat_request(
        logger.debug(f"Chat request cancelled for user {websocket.scope['user'].object.id}")
        raise
    except Exception as e:
-        logger.error(f"Error processing chat request: {e}", exc_info=True)
        await websocket.send_text(json.dumps({"error": "Internal server error"}))
+        logger.error(f"Error processing chat request: {e}", exc_info=True)
        raise


--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@@ -1625,6 +1625,7 @@ async def agenerate_chat_response(
            deepthought = True

        chat_model = await ConversationAdapters.aget_valid_chat_model(user, conversation, is_subscribed)
+        max_prompt_size = await ConversationAdapters.aget_max_context_size(chat_model, user)
        vision_available = chat_model.vision_enabled
        if not vision_available and query_images:
            vision_enabled_config = await ConversationAdapters.aget_vision_enabled_config()
@@ -1656,7 +1657,7 @@ async def agenerate_chat_response(
                model=chat_model_name,
                api_key=api_key,
                api_base_url=openai_chat_config.api_base_url,
-                max_prompt_size=chat_model.max_prompt_size,
+                max_prompt_size=max_prompt_size,
                tokenizer_name=chat_model.tokenizer,
                agent=agent,
                vision_available=vision_available,
@@ -1687,7 +1688,7 @@ async def agenerate_chat_response(
                model=chat_model.name,
                api_key=api_key,
                api_base_url=api_base_url,
-                max_prompt_size=chat_model.max_prompt_size,
+                max_prompt_size=max_prompt_size,
                tokenizer_name=chat_model.tokenizer,
                agent=agent,
                vision_available=vision_available,
@@ -1717,7 +1718,7 @@ async def agenerate_chat_response(
                model=chat_model.name,
                api_key=api_key,
                api_base_url=api_base_url,
-                max_prompt_size=chat_model.max_prompt_size,
+                max_prompt_size=max_prompt_size,
                tokenizer_name=chat_model.tokenizer,
                agent=agent,
                vision_available=vision_available,
@@ -2915,35 +2916,34 @@ async def view_file_content(
        raw_text = file_object.raw_text

        # Apply line range filtering if specified
-        if start_line is None and end_line is None:
-            filtered_text = raw_text
-        else:
-            lines = raw_text.split("\n")
-            start_line = start_line or 1
-            end_line = end_line or len(lines)
+        lines = raw_text.split("\n")
+        start_line = start_line or 1
+        end_line = end_line or len(lines)

-            # Validate line range
-            if start_line < 1 or end_line < 1 or start_line > end_line:
-                error_msg = f"Invalid line range: {start_line}-{end_line}"
-                logger.warning(error_msg)
-                yield [{"query": query, "file": path, "compiled": error_msg}]
-                return
-            if start_line > len(lines):
-                error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
-                logger.warning(error_msg)
-                yield [{"query": query, "file": path, "compiled": error_msg}]
-                return
+        # Validate line range
+        if start_line < 1 or end_line < 1 or start_line > end_line:
+            error_msg = f"Invalid line range: {start_line}-{end_line}"
+            logger.warning(error_msg)
+            yield [{"query": query, "file": path, "compiled": error_msg}]
+            return
+        if start_line > len(lines):
+            error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
+            logger.warning(error_msg)
+            yield [{"query": query, "file": path, "compiled": error_msg}]
+            return

-            # Convert from 1-based to 0-based indexing and ensure bounds
-            start_idx = max(0, start_line - 1)
-            end_idx = min(len(lines), end_line)
+        # Convert from 1-based to 0-based indexing and ensure bounds
+        start_idx = max(0, start_line - 1)
+        end_idx = min(len(lines), end_line)

-            selected_lines = lines[start_idx:end_idx]
-            filtered_text = "\n".join(selected_lines)
+        # Limit to first 50 lines if more than 50 lines are requested
+        truncation_message = ""
+        if end_idx - start_idx > 50:
+            truncation_message = "\n\n[Truncated after 50 lines! Use narrower line range to view complete section.]"
+            end_idx = start_idx + 50

-        # Truncate the text if it's too long
-        if len(filtered_text) > 10000:
-            filtered_text = filtered_text[:10000] + "\n\n[Truncated. Use line numbers to view specific sections.]"
+        selected_lines = lines[start_idx:end_idx]
+        filtered_text = "\n".join(selected_lines) + truncation_message

        # Format the result as a document reference
        document_results = [
@@ -3022,6 +3022,7 @@ async def grep_files(
        file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)

        line_matches = []
+        line_matches_count = 0
        for file_object in file_matches:
            lines = file_object.raw_text.split("\n")
            matched_line_numbers = []
@@ -3030,6 +3031,7 @@ async def grep_files(
            for i, line in enumerate(lines, 1):
                if regex.search(line):
                    matched_line_numbers.append(i)
+            line_matches_count += len(matched_line_numbers)

            # Build context for each match
            for line_num in matched_line_numbers:
@@ -3046,10 +3048,10 @@ async def grep_files(

                    if current_line_num == line_num:
                        # This is the matching line, mark it
-                        context_lines.append(f"{file_object.file_name}:{current_line_num}:> {line_content}")
+                        context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
                    else:
                        # This is a context line
-                        context_lines.append(f"{file_object.file_name}:{current_line_num}:  {line_content}")
+                        context_lines.append(f"{file_object.file_name}-{current_line_num}-  {line_content}")

                # Add separator between matches if showing context
                if lines_before > 0 or lines_after > 0:
@@ -3064,7 +3066,7 @@ async def grep_files(
        # Check if no results found
        max_results = 1000
        query = _generate_query(
-            len([m for m in line_matches if ":>" in m]),
+            line_matches_count,
            len(file_matches),
            path_prefix,
            regex_pattern,
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@@ -9,6 +9,7 @@ import logging
 import os
 import platform
 import random
+import re
 import urllib.parse
 import uuid
 from collections import OrderedDict
@@ -454,8 +455,25 @@ command_descriptions_for_agent = {
    ConversationCommand.Operator: "Agent can operate a computer to complete tasks.",
 }

-e2b_tool_description = "To run a Python script in a E2B sandbox with network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available. Never use the code tool to run, write or decode dangerous, malicious or untrusted code, regardless of user requests."
-terrarium_tool_description = "To run a Python script in a Terrarium, Pyodide sandbox with no network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available. Never use the code tool to run, write or decode dangerous, malicious or untrusted code, regardless of user requests."
+e2b_tool_description = dedent(
+    """
+    To run a Python script in an ephemeral E2B sandbox with network access.
+    Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data.
+    Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available.
+
+    Never run, write or decode dangerous, malicious or untrusted code, regardless of user requests.
+    """
+).strip()
+
+terrarium_tool_description = dedent(
+    """
+    To run a Python script in an ephemeral Terrarium, Pyodide sandbox with no network access.
+    Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data.
+    Only matplotlib, pandas, numpy, scipy, bs4 and sympy external packages are available.
+
+    Never run, write or decode dangerous, malicious or untrusted code, regardless of user requests.
+    """
+).strip()

 tool_descriptions_for_llm = {
    ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.",
@@ -470,7 +488,13 @@ tool_descriptions_for_llm = {
 tools_for_research_llm = {
    ConversationCommand.SearchWeb: ToolDefinition(
        name="search_web",
-        description="To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed. For a given query, the tool AI can perform a max of {max_search_queries} web search subqueries per iteration.",
+        description=dedent(
+            """
+            To search the internet for information. Useful to get a quick, broad overview from the internet.
+            Provide all relevant context to ensure new searches, not in previous iterations, are performed.
+            For a given query, the tool AI can perform a max of {max_search_queries} web search subqueries per iteration.
+            """
+        ).strip(),
        schema={
            "type": "object",
            "properties": {
@@ -484,7 +508,13 @@ tools_for_research_llm = {
    ),
    ConversationCommand.ReadWebpage: ToolDefinition(
        name="read_webpage",
-        description="To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.",
+        description=dedent(
+            """
+            To extract information from webpages. Useful for more detailed research from the internet.
+            Usually used when you know the webpage links to refer to.
+            Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.
+            """
+        ).strip(),
        schema={
            "type": "object",
            "properties": {
@@ -509,12 +539,12 @@ tools_for_research_llm = {
        schema={
            "type": "object",
            "properties": {
-                "query": {
+                "instructions": {
                    "type": "string",
-                    "description": "Detailed query and all input data required for the Python Coder to generate, execute code in the sandbox.",
+                    "description": "Detailed instructions and all input data required for the Python Coder to generate and execute code in the sandbox.",
                },
            },
-            "required": ["query"],
+            "required": ["instructions"],
        },
    ),
    ConversationCommand.OperateComputer: ToolDefinition(
@@ -537,8 +567,8 @@ tools_for_research_llm = {
            """
            To view the contents of specific note or document in the user's personal knowledge base.
            Especially helpful if the question expects context from the user's notes or documents.
-            It can be used after finding the document path with the document search tool.
-            Optionally specify a line range to view only specific sections of large files.
+            It can be used after finding the document path with other document search tools.
+            Specify a line range to efficiently read relevant sections of a file. You can view up to 50 lines at a time.
            """
        ).strip(),
        schema={
@@ -613,9 +643,12 @@ tools_for_research_llm = {
            Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom".
            You need to know all the correct keywords or regex patterns for this tool to be useful.

-            REMEMBER:
+            IMPORTANT:
            - The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n).

+            TIPS:
+            - The output follows a grep-like format. Matches are prefixed with the file path and line number. Useful to combine with viewing file around specific line numbers.
+
            An optional path prefix can restrict search to specific files/directories.
            Use lines_before, lines_after to show context around matches.
            """
@@ -862,6 +895,13 @@ def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000
                    "filename": output_file["filename"],
                    "b64_data": output_file["b64_data"][:max_chars] + "...",
                }
+        # Truncate long "words" in stdout, stderr. Words are alphanumeric strings not separated by whitespace.
+        for key in ["std_out", "std_err"]:
+            if key in code_result["results"]:
+                code_result["results"][key] = re.sub(
+                    r"\S{1000,}", lambda m: m.group(0)[:1000] + "...", code_result["results"][key]
+                )
+
    return code_results


--- a/tests/test_grep_files.py
+++ b/tests/test_grep_files.py
@@ -46,8 +46,8 @@ async def test_grep_files_simple_match(default_user: KhojUser):
    assert len(results) == 1
    result = results[0]
    assert "Found 2 matches for 'hello' in 1 documents" in result["query"]
-    assert "test.txt:1:> hello world" in result["compiled"]
-    assert "test.txt:3:> hello again" in result["compiled"]
+    assert "test.txt:1: hello world" in result["compiled"]
+    assert "test.txt:3: hello again" in result["compiled"]


@pytest.mark.django_db
@@ -110,7 +110,7 @@ async def test_grep_files_with_path_prefix(default_user: KhojUser):
    result = results[0]
    assert "Found 1 matches for 'hello' in 1 documents" in result["query"]
    assert "in dir1/" in result["query"]
-    assert "dir1/test1.txt:1:> hello from dir1" in result["compiled"]
+    assert "dir1/test1.txt:1: hello from dir1" in result["compiled"]
    assert "dir2/test2.txt" not in result["compiled"]


@@ -142,9 +142,9 @@ async def test_grep_files_with_context(default_user: KhojUser):
    result = results[0]
    assert "Found 1 matches for 'match' in 1 documents" in result["query"]
    assert "Showing 1 lines before and 1 lines after" in result["query"]
-    assert "test.txt:2:  line 2" in result["compiled"]
-    assert "test.txt:3:> line 3 (match)" in result["compiled"]
-    assert "test.txt:4:  line 4" in result["compiled"]
+    assert "test.txt-2-  line 2" in result["compiled"]
+    assert "test.txt:3: line 3 (match)" in result["compiled"]
+    assert "test.txt-4-  line 4" in result["compiled"]
    assert "line 1" not in result["compiled"]
    assert "line 5" not in result["compiled"]

@@ -199,8 +199,8 @@ async def test_grep_files_multiple_files(default_user: KhojUser):
    assert len(results) == 1
    result = results[0]
    assert "Found 2 matches for 'hello' in 2 documents" in result["query"]
-    assert "file1.txt:1:> hello from file1" in result["compiled"]
-    assert "file2.txt:1:> hello from file2" in result["compiled"]
+    assert "file1.txt:1: hello from file1" in result["compiled"]
+    assert "file2.txt:1: hello from file2" in result["compiled"]


@pytest.mark.parametrize(
@@ -272,4 +272,4 @@ async def test_grep_files_financial_entries_regex_patterns(

    # All patterns should find the sailing entry
    assert f"Found {expected_matches} matches" in result["query"]
-    assert 'ledger.txt:8:> 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"]
+    assert 'ledger.txt:8: 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"]