diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py index 2e52a9f2..655a5baa 100644 --- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py +++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py @@ -17,6 +17,7 @@ from khoj.processor.conversation.utils import ( OperatorRun, ResponseWithThought, clean_json, + construct_question_history, construct_structured_message, generate_chatml_messages_with_context, messages_to_print, @@ -54,13 +55,7 @@ def extract_questions_anthropic( username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else "" # Extract Past User Message and Inferred Questions from Conversation Log - chat_history = "".join( - [ - f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n' - for chat in conversation_log.get("chat", [])[-4:] - if chat["by"] == "khoj" - ] - ) + chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant") # Get dates relative to today for prompt creation today = datetime.today() diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index 17bb9b76..c440b541 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -16,6 +16,7 @@ from khoj.processor.conversation.google.utils import ( from khoj.processor.conversation.utils import ( OperatorRun, clean_json, + construct_question_history, construct_structured_message, generate_chatml_messages_with_context, messages_to_print, @@ -54,13 +55,7 @@ def extract_questions_gemini( username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else "" # Extract Past User Message and Inferred Questions from Conversation Log - chat_history = "".join( - [ - f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n' - for chat in conversation_log.get("chat", [])[-4:] - if chat["by"] == "khoj" - ] - ) + chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant") # Get dates relative to today for prompt creation today = datetime.today() diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index 8a02dc63..0ecf62fd 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -16,6 +16,7 @@ from khoj.processor.conversation.offline.utils import download_model from khoj.processor.conversation.utils import ( clean_json, commit_conversation_trace, + construct_question_history, generate_chatml_messages_with_context, messages_to_print, ) @@ -64,13 +65,7 @@ def extract_questions_offline( username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else "" # Extract Past User Message and Inferred Questions from Conversation Log - chat_history = "" - - if use_history: - for chat in conversation_log.get("chat", [])[-4:]: - if chat["by"] == "khoj": - chat_history += f"Q: {chat['intent']['query']}\n" - chat_history += f"Khoj: {chat['message']}\n\n" + chat_history = construct_question_history(conversation_log, include_query=False) if use_history else "" # Get dates relative to today for prompt creation today = datetime.today() diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index d5fcd0a1..b3c440ff 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -20,6 +20,7 @@ from khoj.processor.conversation.utils import ( OperatorRun, ResponseWithThought, clean_json, + construct_question_history, construct_structured_message, generate_chatml_messages_with_context, messages_to_print, @@ -56,13 +57,7 @@ def extract_questions( username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else "" # Extract Past User Message and Inferred Questions from Conversation Log - chat_history = "".join( - [ - f'Q: {chat["intent"]["query"]}\nKhoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n' - for chat in conversation_log.get("chat", [])[-4:] - if chat["by"] == "khoj" and "to-image" not in chat["intent"].get("type") - ] - ) + chat_history = construct_question_history(conversation_log) # Get dates relative to today for prompt creation today = datetime.today() diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 899a6786..63cca547 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -218,6 +218,45 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A return chat_history +def construct_question_history( + conversation_log: dict, + include_query: bool = True, + lookback: int = 4, + query_prefix: str = "Q", + agent_name: str = "Khoj", +) -> str: + """ + Constructs a chat history string formatted for query extraction purposes. + """ + history_parts = "" + for chat in conversation_log.get("chat", [])[-lookback:]: + if chat["by"] == "khoj": + original_query = chat.get("intent", {}).get("query") + if original_query is None: + continue + + message = chat.get("message", "") + inferred_queries_list = chat.get("intent", {}).get("inferred-queries") + + # Ensure inferred_queries_list is a list, defaulting to the original query in a list + if not inferred_queries_list: + inferred_queries_list = [original_query] + # If it's a string (though unlikely based on usage), wrap it in a list + elif isinstance(inferred_queries_list, str): + inferred_queries_list = [inferred_queries_list] + + if include_query: + # Ensure 'type' exists and is a string before checking 'to-image' + intent_type = chat.get("intent", {}).get("type", "") + if "to-image" not in intent_type: + history_parts += f'{agent_name}: {{"queries": {inferred_queries_list}}}\n' + history_parts += f"A: {message}\n\n" + else: + history_parts += f"{agent_name}: {message}\n\n" + + return history_parts + + def construct_tool_chat_history( previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None ) -> Dict[str, list]: