Extract constructing question history into shared function for reuse

Minor logic update to only include non image inferred queries for
gemini, anthropic models as well instead of just for openai models.

Apart from that the extracted function should be functionally same.
This commit is contained in:
Debanjum
2025-05-29 10:59:13 -07:00
parent da663e184c
commit d511cbfa34
5 changed files with 47 additions and 28 deletions

View File

@@ -17,6 +17,7 @@ from khoj.processor.conversation.utils import (
OperatorRun,
ResponseWithThought,
clean_json,
construct_question_history,
construct_structured_message,
generate_chatml_messages_with_context,
messages_to_print,
@@ -54,13 +55,7 @@ def extract_questions_anthropic(
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
# Extract Past User Message and Inferred Questions from Conversation Log
chat_history = "".join(
[
f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
for chat in conversation_log.get("chat", [])[-4:]
if chat["by"] == "khoj"
]
)
chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant")
# Get dates relative to today for prompt creation
today = datetime.today()

View File

@@ -16,6 +16,7 @@ from khoj.processor.conversation.google.utils import (
from khoj.processor.conversation.utils import (
OperatorRun,
clean_json,
construct_question_history,
construct_structured_message,
generate_chatml_messages_with_context,
messages_to_print,
@@ -54,13 +55,7 @@ def extract_questions_gemini(
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
# Extract Past User Message and Inferred Questions from Conversation Log
chat_history = "".join(
[
f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
for chat in conversation_log.get("chat", [])[-4:]
if chat["by"] == "khoj"
]
)
chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant")
# Get dates relative to today for prompt creation
today = datetime.today()

View File

@@ -16,6 +16,7 @@ from khoj.processor.conversation.offline.utils import download_model
from khoj.processor.conversation.utils import (
clean_json,
commit_conversation_trace,
construct_question_history,
generate_chatml_messages_with_context,
messages_to_print,
)
@@ -64,13 +65,7 @@ def extract_questions_offline(
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
# Extract Past User Message and Inferred Questions from Conversation Log
chat_history = ""
if use_history:
for chat in conversation_log.get("chat", [])[-4:]:
if chat["by"] == "khoj":
chat_history += f"Q: {chat['intent']['query']}\n"
chat_history += f"Khoj: {chat['message']}\n\n"
chat_history = construct_question_history(conversation_log, include_query=False) if use_history else ""
# Get dates relative to today for prompt creation
today = datetime.today()

View File

@@ -20,6 +20,7 @@ from khoj.processor.conversation.utils import (
OperatorRun,
ResponseWithThought,
clean_json,
construct_question_history,
construct_structured_message,
generate_chatml_messages_with_context,
messages_to_print,
@@ -56,13 +57,7 @@ def extract_questions(
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
# Extract Past User Message and Inferred Questions from Conversation Log
chat_history = "".join(
[
f'Q: {chat["intent"]["query"]}\nKhoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
for chat in conversation_log.get("chat", [])[-4:]
if chat["by"] == "khoj" and "to-image" not in chat["intent"].get("type")
]
)
chat_history = construct_question_history(conversation_log)
# Get dates relative to today for prompt creation
today = datetime.today()

View File

@@ -218,6 +218,45 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
return chat_history
def construct_question_history(
conversation_log: dict,
include_query: bool = True,
lookback: int = 4,
query_prefix: str = "Q",
agent_name: str = "Khoj",
) -> str:
"""
Constructs a chat history string formatted for query extraction purposes.
"""
history_parts = ""
for chat in conversation_log.get("chat", [])[-lookback:]:
if chat["by"] == "khoj":
original_query = chat.get("intent", {}).get("query")
if original_query is None:
continue
message = chat.get("message", "")
inferred_queries_list = chat.get("intent", {}).get("inferred-queries")
# Ensure inferred_queries_list is a list, defaulting to the original query in a list
if not inferred_queries_list:
inferred_queries_list = [original_query]
# If it's a string (though unlikely based on usage), wrap it in a list
elif isinstance(inferred_queries_list, str):
inferred_queries_list = [inferred_queries_list]
if include_query:
# Ensure 'type' exists and is a string before checking 'to-image'
intent_type = chat.get("intent", {}).get("type", "")
if "to-image" not in intent_type:
history_parts += f'{agent_name}: {{"queries": {inferred_queries_list}}}\n'
history_parts += f"A: {message}\n\n"
else:
history_parts += f"{agent_name}: {message}\n\n"
return history_parts
def construct_tool_chat_history(
previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None
) -> Dict[str, list]: