From d511cbfa3443d8f12a77bd732009ddd78a97a43d Mon Sep 17 00:00:00 2001 From: Debanjum Date: Thu, 29 May 2025 10:59:13 -0700 Subject: [PATCH] Extract constructing question history into shared function for reuse Minor logic update to only include non image inferred queries for gemini, anthropic models as well instead of just for openai models. Apart from that the extracted function should be functionally same. --- .../conversation/anthropic/anthropic_chat.py | 9 +---- .../conversation/google/gemini_chat.py | 9 +---- .../conversation/offline/chat_model.py | 9 +---- src/khoj/processor/conversation/openai/gpt.py | 9 +---- src/khoj/processor/conversation/utils.py | 39 +++++++++++++++++++ 5 files changed, 47 insertions(+), 28 deletions(-) diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py index 2e52a9f2..655a5baa 100644 --- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py +++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py @@ -17,6 +17,7 @@ from khoj.processor.conversation.utils import ( OperatorRun, ResponseWithThought, clean_json, + construct_question_history, construct_structured_message, generate_chatml_messages_with_context, messages_to_print, @@ -54,13 +55,7 @@ def extract_questions_anthropic( username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else "" # Extract Past User Message and Inferred Questions from Conversation Log - chat_history = "".join( - [ - f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n' - for chat in conversation_log.get("chat", [])[-4:] - if chat["by"] == "khoj" - ] - ) + chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant") # Get dates relative to today for prompt creation today = datetime.today() diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index 17bb9b76..c440b541 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -16,6 +16,7 @@ from khoj.processor.conversation.google.utils import ( from khoj.processor.conversation.utils import ( OperatorRun, clean_json, + construct_question_history, construct_structured_message, generate_chatml_messages_with_context, messages_to_print, @@ -54,13 +55,7 @@ def extract_questions_gemini( username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else "" # Extract Past User Message and Inferred Questions from Conversation Log - chat_history = "".join( - [ - f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n' - for chat in conversation_log.get("chat", [])[-4:] - if chat["by"] == "khoj" - ] - ) + chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant") # Get dates relative to today for prompt creation today = datetime.today() diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index 8a02dc63..0ecf62fd 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -16,6 +16,7 @@ from khoj.processor.conversation.offline.utils import download_model from khoj.processor.conversation.utils import ( clean_json, commit_conversation_trace, + construct_question_history, generate_chatml_messages_with_context, messages_to_print, ) @@ -64,13 +65,7 @@ def extract_questions_offline( username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else "" # Extract Past User Message and Inferred Questions from Conversation Log - chat_history = "" - - if use_history: - for chat in conversation_log.get("chat", [])[-4:]: - if chat["by"] == "khoj": - chat_history += f"Q: {chat['intent']['query']}\n" - chat_history += f"Khoj: {chat['message']}\n\n" + chat_history = construct_question_history(conversation_log, include_query=False) if use_history else "" # Get dates relative to today for prompt creation today = datetime.today() diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index d5fcd0a1..b3c440ff 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -20,6 +20,7 @@ from khoj.processor.conversation.utils import ( OperatorRun, ResponseWithThought, clean_json, + construct_question_history, construct_structured_message, generate_chatml_messages_with_context, messages_to_print, @@ -56,13 +57,7 @@ def extract_questions( username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else "" # Extract Past User Message and Inferred Questions from Conversation Log - chat_history = "".join( - [ - f'Q: {chat["intent"]["query"]}\nKhoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n' - for chat in conversation_log.get("chat", [])[-4:] - if chat["by"] == "khoj" and "to-image" not in chat["intent"].get("type") - ] - ) + chat_history = construct_question_history(conversation_log) # Get dates relative to today for prompt creation today = datetime.today() diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 899a6786..63cca547 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -218,6 +218,45 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A return chat_history +def construct_question_history( + conversation_log: dict, + include_query: bool = True, + lookback: int = 4, + query_prefix: str = "Q", + agent_name: str = "Khoj", +) -> str: + """ + Constructs a chat history string formatted for query extraction purposes. + """ + history_parts = "" + for chat in conversation_log.get("chat", [])[-lookback:]: + if chat["by"] == "khoj": + original_query = chat.get("intent", {}).get("query") + if original_query is None: + continue + + message = chat.get("message", "") + inferred_queries_list = chat.get("intent", {}).get("inferred-queries") + + # Ensure inferred_queries_list is a list, defaulting to the original query in a list + if not inferred_queries_list: + inferred_queries_list = [original_query] + # If it's a string (though unlikely based on usage), wrap it in a list + elif isinstance(inferred_queries_list, str): + inferred_queries_list = [inferred_queries_list] + + if include_query: + # Ensure 'type' exists and is a string before checking 'to-image' + intent_type = chat.get("intent", {}).get("type", "") + if "to-image" not in intent_type: + history_parts += f'{agent_name}: {{"queries": {inferred_queries_list}}}\n' + history_parts += f"A: {message}\n\n" + else: + history_parts += f"{agent_name}: {message}\n\n" + + return history_parts + + def construct_tool_chat_history( previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None ) -> Dict[str, list]: