From e504141c0741e7550418386fb93c41d5e794b9f6 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 18:53:30 -0700 Subject: [PATCH 01/17] Fix to calculate usage from openai api streaming completion During streaming chunk.chunk contains usage data. This regression must have appeared while tuning openai stream processors --- src/khoj/processor/conversation/openai/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 3b706df5..51d71cc0 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -219,6 +219,10 @@ def completion_with_backoff( # Json dump tool calls into aggregated response aggregated_response = json.dumps([tool_call.__dict__ for tool_call in tool_calls]) + # Align chunk definition with non-streaming mode for post stream completion usage + if hasattr(chunk, "chunk"): + chunk = chunk.chunk + # Calculate cost of chat input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0 output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0 From 14b4d4b66340b2df891a7061f215d3ca166a1753 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 21:34:04 -0700 Subject: [PATCH 02/17] Fix using non-reasoning openai model via responses API Pass arg to include encrypted reasoning only for reasoning openai models. Non reasoning openai models do not except this arg --- src/khoj/processor/conversation/openai/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 51d71cc0..d29796b3 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -458,6 +458,7 @@ def responses_completion_with_backoff( temperature = 1 reasoning_effort = "medium" if deepthought else "low" model_kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"} + model_kwargs["include"] = ["reasoning.encrypted_content"] # Remove unsupported params for reasoning models model_kwargs.pop("top_p", None) model_kwargs.pop("stop", None) @@ -472,7 +473,6 @@ def responses_completion_with_backoff( temperature=temperature, timeout=httpx.Timeout(30, read=read_timeout), # type: ignore store=False, - include=["reasoning.encrypted_content"], **model_kwargs, ) if not model_response or not isinstance(model_response, OpenAIResponse) or not model_response.output: From 8862394c15fc814d7973828face583153ba2f7c6 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 19 Aug 2025 15:01:56 -0700 Subject: [PATCH 03/17] Handle unset reasoning, response chunk from openai api while streaming --- src/khoj/processor/conversation/openai/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index d29796b3..2b5889c2 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -170,6 +170,7 @@ def completion_with_backoff( chunk.type == "chunk" and chunk.chunk.choices and hasattr(chunk.chunk.choices[0].delta, "reasoning_content") + and chunk.chunk.choices[0].delta.reasoning_content ): thoughts += chunk.chunk.choices[0].delta.reasoning_content elif chunk.type == "chunk" and chunk.chunk.choices and chunk.chunk.choices[0].delta.tool_calls: @@ -1075,6 +1076,10 @@ async def ain_stream_thought_processor( yield chunk continue + if chunk.choices[0].delta.content is None: + # If delta content is None, we can't process it, just yield the chunk + continue + buf += chunk.choices[0].delta.content if mode == "detect_start": From 34dca8e11401f3a22dd81d46166e0b71d7bbdcad Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 19 Aug 2025 15:10:19 -0700 Subject: [PATCH 04/17] Fix passing temp kwarg to non-streaming openai completion endpoint It is already being passed in model_kwargs, so not required to be passed explicitly as well. This code path isn't being used currently, but better to fix for if/when it is used --- src/khoj/processor/conversation/openai/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 2b5889c2..7db03f52 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -195,7 +195,6 @@ def completion_with_backoff( chunk = client.beta.chat.completions.parse( messages=formatted_messages, # type: ignore model=model_name, - temperature=temperature, timeout=httpx.Timeout(30, read=read_timeout), **model_kwargs, ) From ff73d30106c582472b6aed785520a2304c1e0ccc Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 23:25:11 -0700 Subject: [PATCH 05/17] Fix max thinking budget for gemini models to generate final response --- src/khoj/processor/conversation/google/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/khoj/processor/conversation/google/utils.py b/src/khoj/processor/conversation/google/utils.py index d3776319..3a64434c 100644 --- a/src/khoj/processor/conversation/google/utils.py +++ b/src/khoj/processor/conversation/google/utils.py @@ -326,7 +326,7 @@ async def gemini_chat_completion_with_backoff( thinking_config = None if deepthought and is_reasoning_model(model_name): - thinking_config = gtypes.ThinkingConfig(thinking_budget=-1, include_thoughts=True) + thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True) max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI if is_reasoning_model(model_name): From 222cc19b7ffa197406471a735bc3434f34c68552 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 23:24:00 -0700 Subject: [PATCH 06/17] Use subscriber type specific context window to generate response --- src/khoj/routers/helpers.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index f8807047..ffd384b3 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -1625,6 +1625,7 @@ async def agenerate_chat_response( deepthought = True chat_model = await ConversationAdapters.aget_valid_chat_model(user, conversation, is_subscribed) + max_prompt_size = await ConversationAdapters.aget_max_context_size(chat_model, user) vision_available = chat_model.vision_enabled if not vision_available and query_images: vision_enabled_config = await ConversationAdapters.aget_vision_enabled_config() @@ -1656,7 +1657,7 @@ async def agenerate_chat_response( model=chat_model_name, api_key=api_key, api_base_url=openai_chat_config.api_base_url, - max_prompt_size=chat_model.max_prompt_size, + max_prompt_size=max_prompt_size, tokenizer_name=chat_model.tokenizer, agent=agent, vision_available=vision_available, @@ -1687,7 +1688,7 @@ async def agenerate_chat_response( model=chat_model.name, api_key=api_key, api_base_url=api_base_url, - max_prompt_size=chat_model.max_prompt_size, + max_prompt_size=max_prompt_size, tokenizer_name=chat_model.tokenizer, agent=agent, vision_available=vision_available, @@ -1717,7 +1718,7 @@ async def agenerate_chat_response( model=chat_model.name, api_key=api_key, api_base_url=api_base_url, - max_prompt_size=chat_model.max_prompt_size, + max_prompt_size=max_prompt_size, tokenizer_name=chat_model.tokenizer, agent=agent, vision_available=vision_available, From e0007a31bbc9d890906fcab71c7c6b32b69cb92e Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 23:26:37 -0700 Subject: [PATCH 07/17] Update web UX when server side error or hit stop + no task running - Ack websocket interrupt even when no task running Otherwise chat UX isn't updated to indicate query has stopped processing for this edge case - Mark chat request as not being procesed on server side error --- src/interface/web/app/chat/page.tsx | 1 + src/khoj/routers/api_chat.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/interface/web/app/chat/page.tsx b/src/interface/web/app/chat/page.tsx index 666726af..e273455e 100644 --- a/src/interface/web/app/chat/page.tsx +++ b/src/interface/web/app/chat/page.tsx @@ -359,6 +359,7 @@ export default function Chat() { return; } else if (controlMessage.error) { console.error("WebSocket error:", controlMessage.error); + setProcessQuerySignal(false); return; } } catch { diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 411cb73a..88035e9d 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -1526,6 +1526,8 @@ async def chat_ws( ack_type = "interrupt_acknowledged" await websocket.send_text(json.dumps({"type": ack_type})) else: + ack_type = "interrupt_acknowledged" + await websocket.send_text(json.dumps({"type": ack_type})) logger.info(f"No ongoing task to interrupt for user {websocket.scope['user'].object.id}") continue @@ -1704,8 +1706,8 @@ async def process_chat_request( logger.debug(f"Chat request cancelled for user {websocket.scope['user'].object.id}") raise except Exception as e: - logger.error(f"Error processing chat request: {e}", exc_info=True) await websocket.send_text(json.dumps({"error": "Internal server error"})) + logger.error(f"Error processing chat request: {e}", exc_info=True) raise From 9a8c707f8450068ae26d55eb08ad983e64d83c65 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 20:19:29 -0700 Subject: [PATCH 08/17] Do not overwrite charts created in previous code tool use during research --- src/khoj/processor/tools/run_code.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/khoj/processor/tools/run_code.py b/src/khoj/processor/tools/run_code.py index 2e3de666..c981d400 100644 --- a/src/khoj/processor/tools/run_code.py +++ b/src/khoj/processor/tools/run_code.py @@ -252,8 +252,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]: # Identify new files created during execution new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list("~")) - original_files + # Read newly created files in parallel - download_tasks = [sandbox.files.read(f.path, request_timeout=30) for f in new_files] + def read_format(f): + return "bytes" if Path(f.name).suffix in image_file_ext else "text" + + download_tasks = [sandbox.files.read(f.path, format=read_format(f), request_timeout=30) for f in new_files] downloaded_files = await asyncio.gather(*download_tasks) for f, content in zip(new_files, downloaded_files): if isinstance(content, bytes): @@ -261,23 +265,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]: b64_data = base64.b64encode(content).decode("utf-8") elif Path(f.name).suffix in image_file_ext: # Ignore image files as they are extracted from execution results below for inline display - continue + b64_data = base64.b64encode(content).decode("utf-8") else: # Text files - encode utf-8 string as base64 b64_data = content output_files.append({"filename": f.name, "b64_data": b64_data}) - # Collect output files from execution results - # Repect ordering of output result types to disregard text output associated with images - output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"] - for idx, result in enumerate(execution.results): - if getattr(result, "chart", None): - continue - for result_type in output_result_types: - if b64_data := getattr(result, result_type, None): - output_files.append({"filename": f"{idx}.{result_type}", "b64_data": b64_data}) - break - # collect logs success = not execution.error and not execution.logs.stderr stdout = "\n".join(execution.logs.stdout) From 452c794e93eafc6abf898cf17cf8accd19360456 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 16:52:59 -0700 Subject: [PATCH 09/17] Make regex search tool results look more like grep results --- src/khoj/routers/helpers.py | 8 +++++--- src/khoj/utils/helpers.py | 5 ++++- tests/test_grep_files.py | 18 +++++++++--------- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index ffd384b3..a905aa02 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -3023,6 +3023,7 @@ async def grep_files( file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix) line_matches = [] + line_matches_count = 0 for file_object in file_matches: lines = file_object.raw_text.split("\n") matched_line_numbers = [] @@ -3031,6 +3032,7 @@ async def grep_files( for i, line in enumerate(lines, 1): if regex.search(line): matched_line_numbers.append(i) + line_matches_count += len(matched_line_numbers) # Build context for each match for line_num in matched_line_numbers: @@ -3047,10 +3049,10 @@ async def grep_files( if current_line_num == line_num: # This is the matching line, mark it - context_lines.append(f"{file_object.file_name}:{current_line_num}:> {line_content}") + context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}") else: # This is a context line - context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}") + context_lines.append(f"{file_object.file_name}-{current_line_num}- {line_content}") # Add separator between matches if showing context if lines_before > 0 or lines_after > 0: @@ -3065,7 +3067,7 @@ async def grep_files( # Check if no results found max_results = 1000 query = _generate_query( - len([m for m in line_matches if ":>" in m]), + line_matches_count, len(file_matches), path_prefix, regex_pattern, diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index f8e5a07c..a200e2f4 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -613,9 +613,12 @@ tools_for_research_llm = { Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom". You need to know all the correct keywords or regex patterns for this tool to be useful. - REMEMBER: + IMPORTANT: - The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n). + TIPS: + - The output follows a grep-like format. Matches are prefixed with the file path and line number. Useful to combine with viewing file around specific line numbers. + An optional path prefix can restrict search to specific files/directories. Use lines_before, lines_after to show context around matches. """ diff --git a/tests/test_grep_files.py b/tests/test_grep_files.py index 22828e19..96d9b3e7 100644 --- a/tests/test_grep_files.py +++ b/tests/test_grep_files.py @@ -46,8 +46,8 @@ async def test_grep_files_simple_match(default_user: KhojUser): assert len(results) == 1 result = results[0] assert "Found 2 matches for 'hello' in 1 documents" in result["query"] - assert "test.txt:1:> hello world" in result["compiled"] - assert "test.txt:3:> hello again" in result["compiled"] + assert "test.txt:1: hello world" in result["compiled"] + assert "test.txt:3: hello again" in result["compiled"] @pytest.mark.django_db @@ -110,7 +110,7 @@ async def test_grep_files_with_path_prefix(default_user: KhojUser): result = results[0] assert "Found 1 matches for 'hello' in 1 documents" in result["query"] assert "in dir1/" in result["query"] - assert "dir1/test1.txt:1:> hello from dir1" in result["compiled"] + assert "dir1/test1.txt:1: hello from dir1" in result["compiled"] assert "dir2/test2.txt" not in result["compiled"] @@ -142,9 +142,9 @@ async def test_grep_files_with_context(default_user: KhojUser): result = results[0] assert "Found 1 matches for 'match' in 1 documents" in result["query"] assert "Showing 1 lines before and 1 lines after" in result["query"] - assert "test.txt:2: line 2" in result["compiled"] - assert "test.txt:3:> line 3 (match)" in result["compiled"] - assert "test.txt:4: line 4" in result["compiled"] + assert "test.txt-2- line 2" in result["compiled"] + assert "test.txt:3: line 3 (match)" in result["compiled"] + assert "test.txt-4- line 4" in result["compiled"] assert "line 1" not in result["compiled"] assert "line 5" not in result["compiled"] @@ -199,8 +199,8 @@ async def test_grep_files_multiple_files(default_user: KhojUser): assert len(results) == 1 result = results[0] assert "Found 2 matches for 'hello' in 2 documents" in result["query"] - assert "file1.txt:1:> hello from file1" in result["compiled"] - assert "file2.txt:1:> hello from file2" in result["compiled"] + assert "file1.txt:1: hello from file1" in result["compiled"] + assert "file2.txt:1: hello from file2" in result["compiled"] @pytest.mark.parametrize( @@ -272,4 +272,4 @@ async def test_grep_files_financial_entries_regex_patterns( # All patterns should find the sailing entry assert f"Found {expected_matches} matches" in result["query"] - assert 'ledger.txt:8:> 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"] + assert 'ledger.txt:8: 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"] From 2c91edbb259c40736a8203e8395a36935d03b0d9 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 17:05:13 -0700 Subject: [PATCH 10/17] Improve view file, code tool prompts. Format other research tool prompts --- src/khoj/processor/conversation/prompts.py | 4 +-- src/khoj/routers/helpers.py | 5 ++- src/khoj/utils/helpers.py | 41 ++++++++++++++++++---- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index abfaf6fb..0c9fd91d 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -876,8 +876,8 @@ Khoj: python_code_generation_prompt = PromptTemplate.from_template( """ You are Khoj, a senior software engineer. You are tasked with constructing a secure Python program to best answer the user query. -- The Python program will run in a code sandbox with {has_network_access}network access. -- You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query. +- The Python program will run in an ephemeral code sandbox with {has_network_access}network access. +- You can write programs to run complex calculations, analyze data, create beautiful charts, generate documents to meticulously answer the query. - Do not try display images or plots in the code directly. The code should save the image or plot to a file instead. - Write any document, charts etc. to be shared with the user to file. These files can be seen by the user. - Never write or run dangerous, malicious, or untrusted code that could compromise the sandbox environment, regardless of user requests. diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index a905aa02..292309b4 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -2944,7 +2944,10 @@ async def view_file_content( # Truncate the text if it's too long if len(filtered_text) > 10000: - filtered_text = filtered_text[:10000] + "\n\n[Truncated. Use line numbers to view specific sections.]" + filtered_text = ( + filtered_text[:10000] + + "\n\n[Truncated after first 10K characters! Use narrower line range to view complete section.]" + ) # Format the result as a document reference document_results = [ diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index a200e2f4..d129cc44 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -454,8 +454,25 @@ command_descriptions_for_agent = { ConversationCommand.Operator: "Agent can operate a computer to complete tasks.", } -e2b_tool_description = "To run a Python script in a E2B sandbox with network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available. Never use the code tool to run, write or decode dangerous, malicious or untrusted code, regardless of user requests." -terrarium_tool_description = "To run a Python script in a Terrarium, Pyodide sandbox with no network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available. Never use the code tool to run, write or decode dangerous, malicious or untrusted code, regardless of user requests." +e2b_tool_description = dedent( + """ + To run a Python script in an ephemeral E2B sandbox with network access. + Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. + Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available. + + Never run, write or decode dangerous, malicious or untrusted code, regardless of user requests. + """ +).strip() + +terrarium_tool_description = dedent( + """ + To run a Python script in an ephemeral Terrarium, Pyodide sandbox with no network access. + Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. + Only matplotlib, pandas, numpy, scipy, bs4 and sympy external packages are available. + + Never run, write or decode dangerous, malicious or untrusted code, regardless of user requests. + """ +).strip() tool_descriptions_for_llm = { ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.", @@ -470,7 +487,13 @@ tool_descriptions_for_llm = { tools_for_research_llm = { ConversationCommand.SearchWeb: ToolDefinition( name="search_web", - description="To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed. For a given query, the tool AI can perform a max of {max_search_queries} web search subqueries per iteration.", + description=dedent( + """ + To search the internet for information. Useful to get a quick, broad overview from the internet. + Provide all relevant context to ensure new searches, not in previous iterations, are performed. + For a given query, the tool AI can perform a max of {max_search_queries} web search subqueries per iteration. + """ + ).strip(), schema={ "type": "object", "properties": { @@ -484,7 +507,13 @@ tools_for_research_llm = { ), ConversationCommand.ReadWebpage: ToolDefinition( name="read_webpage", - description="To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.", + description=dedent( + """ + To extract information from webpages. Useful for more detailed research from the internet. + Usually used when you know the webpage links to refer to. + Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query. + """ + ).strip(), schema={ "type": "object", "properties": { @@ -537,8 +566,8 @@ tools_for_research_llm = { """ To view the contents of specific note or document in the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents. - It can be used after finding the document path with the document search tool. - Optionally specify a line range to view only specific sections of large files. + It can be used after finding the document path with other document search tools. + Specify a line range to view only specific sections of files. Especially useful to read large files. """ ).strip(), schema={ From c5a9c814792de71e8b084ad9d3b084a895bf2037 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 17:06:14 -0700 Subject: [PATCH 11/17] Update khoj personality prompts with better style, capability guide - Add more color to personality and communication style - Split prompt into capabilities and style sections - Remove directives in personality meant for older, less smart models. - Discourage model from unnecessarily sharing code snippets in final response unless explicitly requested. --- src/khoj/processor/conversation/prompts.py | 47 +++++++++++++--------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 0c9fd91d..04967f46 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -4,21 +4,27 @@ from langchain_core.prompts import PromptTemplate ## -- personality = PromptTemplate.from_template( """ -You are Khoj, a smart, inquisitive and helpful personal assistant. +You are Khoj, a smart, curious, empathetic and helpful personal assistant. Use your general knowledge and past conversation with the user as context to inform your responses. -You were created by Khoj Inc. with the following capabilities: -- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. -- Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window. -- You *CAN* generate images, look-up real-time information from the internet, set reminders and answer questions based on the user's notes. +You were created by Khoj Inc. More information about you, the company or Khoj apps can be found at https://khoj.dev. + +Today is {day_of_week}, {current_date} in UTC. + +# Capabilities +- Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window. +- You can look up information from the user's notes and documents synced via the Khoj apps. +- You can generate images, look-up real-time information from the internet, analyze data and answer questions based on the user's notes. + +# Style +- Your responses should be helpful, conversational and tuned to the user's communication style. - Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following - inline math mode : \\( and \\) - display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\] -- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay". -- Provide inline references to quotes from the user's notes or any web pages you refer to in your responses in markdown format. For example, "The farmer had ten sheep. [1](https://example.com)". *ALWAYS CITE YOUR SOURCES AND PROVIDE REFERENCES*. Add them inline to directly support your claim. - -Note: More information about you, the company or Khoj apps can be found at https://khoj.dev. -Today is {day_of_week}, {current_date} in UTC. +- Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim. + For example: "The weather today is sunny [1](https://weather.com)." +- Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response. +- Do not respond with raw programs or scripts in your final response unless you know the user is a programmer or has explicitly requested code. """.strip() ) @@ -26,18 +32,23 @@ custom_personality = PromptTemplate.from_template( """ You are {name}, a personal agent on Khoj. Use your general knowledge and past conversation with the user as context to inform your responses. -You were created by Khoj Inc. with the following capabilities: -- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. -- Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window. -- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following - - inline math mode : `\\(` and `\\)` - - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]` -- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay". +You were created on the Khoj platform. More information about you, the company or Khoj apps can be found at https://khoj.dev. Today is {day_of_week}, {current_date} in UTC. -Instructions:\n{bio} +# Base Capabilities +- Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window. + +# Style +- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following + - inline math mode : `\\(` and `\\)` + - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]` +- Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim. + For example: "The weather today is sunny [1](https://weather.com)." +- Mention generated assets like images by reference, e.g ![chart](/visualization/image.png). Do not manually output raw, b64 encoded bytes in your response. + +# Instructions:\n{bio} """.strip() ) From f5a4d106d1718852555c7a84a417f153ad8f3682 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 19 Aug 2025 00:04:29 -0700 Subject: [PATCH 12/17] Use instruction instead of query as code tool argument --- src/khoj/processor/conversation/prompts.py | 4 ++-- src/khoj/processor/tools/run_code.py | 18 +++++++++--------- src/khoj/utils/helpers.py | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 04967f46..52a492af 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -1003,9 +1003,9 @@ Chat History: --- {chat_history} -User Query: +User Instructions: --- -{query} +{instructions} """.strip() ) diff --git a/src/khoj/processor/tools/run_code.py b/src/khoj/processor/tools/run_code.py index c981d400..47f6304f 100644 --- a/src/khoj/processor/tools/run_code.py +++ b/src/khoj/processor/tools/run_code.py @@ -49,7 +49,7 @@ class GeneratedCode(NamedTuple): async def run_code( - query: str, + instructions: str, conversation_history: List[ChatMessageModel], context: str, location_data: LocationData, @@ -63,12 +63,12 @@ async def run_code( ): # Generate Code if send_status_func: - async for event in send_status_func(f"**Generate code snippet** for {query}"): + async for event in send_status_func(f"**Generate code snippet** for {instructions}"): yield {ChatEvent.STATUS: event} try: with timer("Chat actor: Generate programs to execute", logger): generated_code = await generate_python_code( - query, + instructions, conversation_history, context, location_data, @@ -79,7 +79,7 @@ async def run_code( query_files, ) except Exception as e: - raise ValueError(f"Failed to generate code for {query} with error: {e}") + raise ValueError(f"Failed to generate code for {instructions} with error: {e}") # Prepare Input Data input_data = [] @@ -101,21 +101,21 @@ async def run_code( code = result.pop("code") cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"] logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----") - yield {query: {"code": code, "results": result}} + yield {instructions: {"code": code, "results": result}} except asyncio.TimeoutError as e: # Call the sandbox_url/stop GET API endpoint to stop the code sandbox - error = f"Failed to run code for {query} with Timeout error: {e}" + error = f"Failed to run code for {instructions} with Timeout error: {e}" try: await aiohttp.ClientSession().get(f"{sandbox_url}/stop", timeout=5) except Exception as e: error += f"\n\nFailed to stop code sandbox with error: {e}" raise ValueError(error) except Exception as e: - raise ValueError(f"Failed to run code for {query} with error: {e}") + raise ValueError(f"Failed to run code for {instructions} with error: {e}") async def generate_python_code( - q: str, + instructions: str, chat_history: List[ChatMessageModel], context: str, location_data: LocationData, @@ -142,7 +142,7 @@ async def generate_python_code( network_access_context = "**NO** " if not is_e2b_code_sandbox_enabled() else "" code_generation_prompt = prompts.python_code_generation_prompt.format( - query=q, + instructions=instructions, chat_history=chat_history_str, context=context, has_network_access=network_access_context, diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index d129cc44..8f50e3b7 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -538,12 +538,12 @@ tools_for_research_llm = { schema={ "type": "object", "properties": { - "query": { + "instructions": { "type": "string", - "description": "Detailed query and all input data required for the Python Coder to generate, execute code in the sandbox.", + "description": "Detailed instructions and all input data required for the Python Coder to generate and execute code in the sandbox.", }, }, - "required": ["query"], + "required": ["instructions"], }, ), ConversationCommand.OperateComputer: ToolDefinition( From f483a626b8cd6f1e62c52017256494a4c75500f8 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 19 Aug 2025 16:37:32 -0700 Subject: [PATCH 13/17] Simplify view file tool. Limit viewing upto 50 lines at a time We were previously truncating by characters. Limiting by max lines allows model to control line ranges they request --- src/khoj/routers/helpers.py | 52 +++++++++++++++++-------------------- src/khoj/utils/helpers.py | 2 +- 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 292309b4..927f73ef 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -2916,38 +2916,34 @@ async def view_file_content( raw_text = file_object.raw_text # Apply line range filtering if specified - if start_line is None and end_line is None: - filtered_text = raw_text - else: - lines = raw_text.split("\n") - start_line = start_line or 1 - end_line = end_line or len(lines) + lines = raw_text.split("\n") + start_line = start_line or 1 + end_line = end_line or len(lines) - # Validate line range - if start_line < 1 or end_line < 1 or start_line > end_line: - error_msg = f"Invalid line range: {start_line}-{end_line}" - logger.warning(error_msg) - yield [{"query": query, "file": path, "compiled": error_msg}] - return - if start_line > len(lines): - error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}" - logger.warning(error_msg) - yield [{"query": query, "file": path, "compiled": error_msg}] - return + # Validate line range + if start_line < 1 or end_line < 1 or start_line > end_line: + error_msg = f"Invalid line range: {start_line}-{end_line}" + logger.warning(error_msg) + yield [{"query": query, "file": path, "compiled": error_msg}] + return + if start_line > len(lines): + error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}" + logger.warning(error_msg) + yield [{"query": query, "file": path, "compiled": error_msg}] + return - # Convert from 1-based to 0-based indexing and ensure bounds - start_idx = max(0, start_line - 1) - end_idx = min(len(lines), end_line) + # Convert from 1-based to 0-based indexing and ensure bounds + start_idx = max(0, start_line - 1) + end_idx = min(len(lines), end_line) - selected_lines = lines[start_idx:end_idx] - filtered_text = "\n".join(selected_lines) + # Limit to first 50 lines if more than 50 lines are requested + truncation_message = "" + if end_idx - start_idx > 50: + truncation_message = "\n\n[Truncated after 50 lines! Use narrower line range to view complete section.]" + end_idx = start_idx + 50 - # Truncate the text if it's too long - if len(filtered_text) > 10000: - filtered_text = ( - filtered_text[:10000] - + "\n\n[Truncated after first 10K characters! Use narrower line range to view complete section.]" - ) + selected_lines = lines[start_idx:end_idx] + filtered_text = "\n".join(selected_lines) + truncation_message # Format the result as a document reference document_results = [ diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 8f50e3b7..23c6b6ce 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -567,7 +567,7 @@ tools_for_research_llm = { To view the contents of specific note or document in the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents. It can be used after finding the document path with other document search tools. - Specify a line range to view only specific sections of files. Especially useful to read large files. + Specify a line range to efficiently read relevant sections of a file. You can view up to 50 lines at a time. """ ).strip(), schema={ From 83d725d2d84ee7fe033532b3951cbb214945ac64 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 19 Aug 2025 23:26:45 -0700 Subject: [PATCH 14/17] Extract thoughts of openai style models like gpt-oss from api response They use delta.reasoning instead of delta.reasoning_content to share model reasoning --- src/khoj/processor/conversation/openai/utils.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 7db03f52..1d0ecdab 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -173,6 +173,13 @@ def completion_with_backoff( and chunk.chunk.choices[0].delta.reasoning_content ): thoughts += chunk.chunk.choices[0].delta.reasoning_content + elif ( + chunk.type == "chunk" + and chunk.chunk.choices + and hasattr(chunk.chunk.choices[0].delta, "reasoning") + and chunk.chunk.choices[0].delta.reasoning + ): + thoughts += chunk.chunk.choices[0].delta.reasoning elif chunk.type == "chunk" and chunk.chunk.choices and chunk.chunk.choices[0].delta.tool_calls: tool_ids += [tool_call.id for tool_call in chunk.chunk.choices[0].delta.tool_calls] elif chunk.type == "tool_calls.function.arguments.done": @@ -945,6 +952,14 @@ async def astream_thought_processor( ): tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning_content + # Handlle openai reasoning style response with thoughts. Used by gpt-oss. + if ( + len(tchunk.choices) > 0 + and hasattr(tchunk.choices[0].delta, "reasoning") + and tchunk.choices[0].delta.reasoning + ): + tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning + # Handlle llama.cpp server style response with thoughts. elif len(tchunk.choices) > 0 and tchunk.choices[0].delta.model_extra.get("reasoning_content"): tchunk.choices[0].delta.thought = tchunk.choices[0].delta.model_extra.get("reasoning_content") From dbc333061012c28590eecf5e559f69f7d2861439 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 19 Aug 2025 23:29:42 -0700 Subject: [PATCH 15/17] Tune reasoning effort, temp, top_p for gpt-oss models --- src/khoj/processor/conversation/openai/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 1d0ecdab..f67ec2c5 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -145,6 +145,11 @@ def completion_with_backoff( # See https://qwenlm.github.io/blog/qwen3/#advanced-usages if not deepthought: add_qwen_no_think_tag(formatted_messages) + elif "gpt-oss" in model_name.lower(): + model_kwargs["temperature"] = 1 + reasoning_effort = "medium" if deepthought else "low" + model_kwargs["reasoning_effort"] = reasoning_effort + model_kwargs["top_p"] = 1.0 read_timeout = 300 if is_local_api(api_base_url) else 60 if os.getenv("KHOJ_LLM_SEED"): @@ -346,6 +351,11 @@ async def chat_completion_with_backoff( # See https://qwenlm.github.io/blog/qwen3/#advanced-usages if not deepthought: add_qwen_no_think_tag(formatted_messages) + elif "gpt-oss" in model_name.lower(): + temperature = 1 + reasoning_effort = "medium" if deepthought else "low" + model_kwargs["reasoning_effort"] = reasoning_effort + model_kwargs["top_p"] = 1.0 read_timeout = 300 if is_local_api(api_base_url) else 60 if os.getenv("KHOJ_LLM_SEED"): From fb0347a388a339afcf4083be6290e0bb6caa5c31 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 19 Aug 2025 23:31:45 -0700 Subject: [PATCH 16/17] Truncate long words in stdout, stderr for context efficiency Avoid long base64 images etc. in stdout, stderr to result in context limits being hit. --- src/khoj/utils/helpers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 23c6b6ce..5efafe15 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -9,6 +9,7 @@ import logging import os import platform import random +import re import urllib.parse import uuid from collections import OrderedDict @@ -894,6 +895,13 @@ def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000 "filename": output_file["filename"], "b64_data": output_file["b64_data"][:max_chars] + "...", } + # Truncate long "words" in stdout, stderr. Words are alphanumeric strings not separated by whitespace. + for key in ["std_out", "std_err"]: + if key in code_result["results"]: + code_result["results"][key] = re.sub( + r"\S{1000,}", lambda m: m.group(0)[:1000] + "...", code_result["results"][key] + ) + return code_results From 13d26ae8b835f05dcc4d1df9c2e777b38bbc67be Mon Sep 17 00:00:00 2001 From: Debanjum Date: Wed, 20 Aug 2025 12:32:24 -0700 Subject: [PATCH 17/17] Wrap long words in train of thought shown on web app --- .../web/app/components/chatHistory/chatHistory.module.css | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/interface/web/app/components/chatHistory/chatHistory.module.css b/src/interface/web/app/components/chatHistory/chatHistory.module.css index 119620d4..cbc197f7 100644 --- a/src/interface/web/app/components/chatHistory/chatHistory.module.css +++ b/src/interface/web/app/components/chatHistory/chatHistory.module.css @@ -17,6 +17,14 @@ div.trainOfThought { margin: 12px; } +/* If there is an inline element holding extremely long content, ensure it wraps */ +div.trainOfThought pre, +div.trainOfThought code, +div.trainOfThought p, +div.trainOfThought span { + overflow-wrap: anywhere; +} + /* Print-specific styles for chat history */ @media print { div.chatHistory {