Do not retrieve relevant notes when having a general chat with Khoj

- This improves latency of @general chat by avoiding unnecessary
  compute
- It also avoids passing references in API response when they haven't
  been used to generate the chat response. So interfaces don't have to
  add logic to not render them unnecessarily
This commit is contained in:
Debanjum Singh Solanky
2023-06-02 10:24:40 +05:30
parent 90439a8db1
commit ec280067ef
2 changed files with 18 additions and 12 deletions

View File

@@ -153,9 +153,7 @@ def converse(references, user_query, conversation_log={}, model="gpt-3.5-turbo",
compiled_references = "\n\n".join({f"# {item}" for item in references}) compiled_references = "\n\n".join({f"# {item}" for item in references})
# Get Conversation Primer appropriate to Conversation Type # Get Conversation Primer appropriate to Conversation Type
conversation_type = "general" if user_query.startswith("@general") or compiled_references.strip() == "" else "notes" if compiled_references == []:
logger.debug(f"Conversation Type: {conversation_type}")
if conversation_type == "general":
conversation_primer = prompts.general_conversation.format(current_date=current_date, query=user_query) conversation_primer = prompts.general_conversation.format(current_date=current_date, query=user_query)
else: else:
conversation_primer = prompts.notes_conversation.format( conversation_primer = prompts.notes_conversation.format(

View File

@@ -228,6 +228,9 @@ def chat(q: Optional[str] = None):
model = state.processor_config.conversation.model model = state.processor_config.conversation.model
chat_model = state.processor_config.conversation.chat_model chat_model = state.processor_config.conversation.chat_model
user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
conversation_type = "general" if q.startswith("@general") else "notes"
compiled_references = []
inferred_queries = []
# Load Conversation History # Load Conversation History
chat_session = state.processor_config.conversation.chat_session chat_session = state.processor_config.conversation.chat_session
@@ -240,16 +243,21 @@ def chat(q: Optional[str] = None):
else: else:
return {"status": "ok", "response": []} return {"status": "ok", "response": []}
# Infer search queries from user message if conversation_type == "notes":
with timer("Extracting search queries took", logger): # Infer search queries from user message
inferred_queries = extract_questions(q, model=model, api_key=api_key, conversation_log=meta_log) with timer("Extracting search queries took", logger):
inferred_queries = extract_questions(q, model=model, api_key=api_key, conversation_log=meta_log)
# Collate search results as context for GPT # Collate search results as context for GPT
with timer("Searching knowledge base took", logger): with timer("Searching knowledge base took", logger):
result_list = [] result_list = []
for query in inferred_queries: for query in inferred_queries:
result_list.extend(search(query, n=5, r=True, score_threshold=-5.0, dedupe=False)) result_list.extend(search(query, n=5, r=True, score_threshold=-5.0, dedupe=False))
compiled_references = [item.additional["compiled"] for item in result_list] compiled_references = [item.additional["compiled"] for item in result_list]
# Switch to general conversation type if no relevant notes found for the given query
conversation_type = "notes" if compiled_references else "general"
logger.debug(f"Conversation Type: {conversation_type}")
try: try:
with timer("Generating chat response took", logger): with timer("Generating chat response took", logger):