From acdc3f947077caf6b9b8b0958269515c38d88daf Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 16 Aug 2024 05:20:24 -0500 Subject: [PATCH] Unwrap any json in md code block, when parsing chat actor responses This is a more robust way to extract json output requested from gemma-2 (2B, 9B) models which tend to return json in md codeblocks. Other models should remain unaffected by this change. Also removed request to not wrap json in codeblocks from prompts. As code is doing the unwrapping automatically now, when present --- pyproject.toml | 2 +- src/khoj/processor/conversation/offline/chat_model.py | 3 +++ src/khoj/processor/conversation/prompts.py | 2 +- src/khoj/routers/helpers.py | 6 ++++++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4b651dad..edbbb655 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ dependencies = [ "pymupdf >= 1.23.5", "django == 5.0.7", "authlib == 1.2.1", - "llama-cpp-python == 0.2.82", + "llama-cpp-python == 0.2.88", "itsdangerous == 2.1.2", "httpx == 0.25.0", "pgvector == 0.2.4", diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index ec4c7367..1251dcec 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -103,6 +103,9 @@ def extract_questions_offline( .replace("']", '"]') .replace("', '", '", "') ) + # Remove any markdown json codeblock formatting if present (useful for gemma-2) + if response.startswith("```json"): + response = response[7:-3] questions: List[str] = json.loads(questions_str) questions = filter_questions(questions) except: diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 6a8db9db..ffd7d094 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -587,7 +587,7 @@ You are Khoj, an advanced google search assistant. You are tasked with construct - Official, up-to-date information about you, Khoj, is available at site:khoj.dev, github or pypi. What Google searches, if any, will you need to perform to answer the user's question? -Provide search queries as a list of strings in a JSON object. Do not wrap the json in a codeblock. +Provide search queries as a list of strings in a JSON object. Current Date: {current_date} User's Location: {location} {username} diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 4e4f5a56..4da60717 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -279,6 +279,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di try: response = response.strip() + # Remove any markdown json codeblock formatting if present (useful for gemma-2) + if response.startswith("```json"): + response = response[7:-3] response = json.loads(response) response = [q.strip() for q in response["source"] if q.strip()] if not isinstance(response, list) or not response or len(response) == 0: @@ -401,6 +404,9 @@ async def generate_online_subqueries( # Validate that the response is a non-empty, JSON-serializable list try: response = response.strip() + # Remove any markdown json codeblock formatting if present (useful for gemma-2) + if response.startswith("```json") and response.endswith("```"): + response = response[7:-3] response = json.loads(response) response = [q.strip() for q in response["queries"] if q.strip()] if not isinstance(response, list) or not response or len(response) == 0: