Use extract queries actor to improve notes search with offline chat

Previously we were skipping the extract questions step for offline chat as default offline chat model wasn't good enough to output proper json given the time it took to extract questions. The new default offline chat models gives json much more regularly and with date filters, so the extract questions step becomes useful given the impact on latency
2026-03-09 21:29:11 +00:00 · 2024-03-21 00:44:13 +05:30
parent 1ebd5c3648
commit 4912c0ee30
3 changed files with 67 additions and 115 deletions
--- a/src/khoj/processor/conversation/offline/chat_model.py
+++ b/src/khoj/processor/conversation/offline/chat_model.py
@@ -1,5 +1,6 @@
 import json
 import logging
-from datetime import datetime
+from datetime import datetime, timedelta
 from threading import Thread
 from typing import Any, Iterator, List, Union
@@ -48,39 +49,36 @@ def extract_questions_offline(
    if use_history:
        for chat in conversation_log.get("chat", [])[-4:]:
-            if chat["by"] == "khoj" and chat["intent"].get("type") != "text-to-image":
+            if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type"):
                chat_history += f"Q: {chat['intent']['query']}\n"
-                chat_history += f"A: {chat['message']}\n"
+                chat_history += f"Khoj: {chat['message']}\n\n"
-    current_date = datetime.now().strftime("%Y-%m-%d")
+    today = datetime.today()
-    last_year = datetime.now().year - 1
+    yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
-    last_christmas_date = f"{last_year}-12-25"
+    last_year = today.year - 1
-    next_christmas_date = f"{datetime.now().year}-12-25"
+    example_questions = prompts.extract_questions_offline.format(
    system_prompt = prompts.system_prompt_extract_questions_gpt4all.format(
        message=(prompts.system_prompt_message_extract_questions_gpt4all)
    )
    example_questions = prompts.extract_questions_gpt4all_sample.format(
        query=text,
        chat_history=chat_history,
-        current_date=current_date,
+        current_date=today.strftime("%Y-%m-%d"),
        yesterday_date=yesterday,
        last_year=last_year,
-        last_christmas_date=last_christmas_date,
+        this_year=today.year,
        next_christmas_date=next_christmas_date,
        location=location,
    )
-    messages = generate_chatml_messages_with_context(example_questions, system_message=system_prompt, model_name=model)
+    messages = generate_chatml_messages_with_context(
        example_questions, model_name=model, loaded_model=offline_chat_model
    )
    state.chat_lock.acquire()
    try:
-        response = offline_chat_model.create_chat_completion(messages, max_tokens=200, top_k=2, temp=0)
+        response = send_message_to_model_offline(messages, loaded_model=offline_chat_model)
        response = response[0]["choices"][0]["message"]["content"]
    finally:
        state.chat_lock.release()
    # Extract, Clean Message from GPT's Response
    try:
        # This will expect to be a list with a single string with a list of questions
-        questions = (
+        questions_str = (
            str(response)
            .strip(empty_escape_sequences)
            .replace("['", '["')
@@ -88,11 +86,8 @@ def extract_questions_offline(
            .replace("</s>", "")
            .replace("']", '"]')
            .replace("', '", '", "')
            .replace('["', "")
            .replace('"]', "")
            .split("? ")
        )
-        questions = [q + "?" for q in questions[:-1]] + [questions[-1]]
+        questions: List[str] = json.loads(questions_str)
        questions = filter_questions(questions)
    except:
        logger.warning(f"Llama returned invalid JSON. Falling back to using user message as search query.\n{response}")
@@ -115,12 +110,12 @@ def filter_questions(questions: List[str]):
        "do not know",
        "do not understand",
    ]
-    filtered_questions = []
+    filtered_questions = set()
    for q in questions:
        if not any([word in q.lower() for word in hint_words]) and not is_none_or_empty(q):
-            filtered_questions.append(q)
+            filtered_questions.add(q)
-    return filtered_questions
+    return list(filtered_questions)
 def converse_offline(
@@ -171,13 +166,13 @@ def converse_offline(
        conversation_primer = f"{prompts.online_search_conversation.format(online_results=str(simplified_online_results))}\n{conversation_primer}"
    if not is_none_or_empty(compiled_references_message):
-        conversation_primer = f"{prompts.notes_conversation_gpt4all.format(references=compiled_references_message)}\n{conversation_primer}"
+        conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n{conversation_primer}"
    # Setup Prompt with Primer or Conversation History
    current_date = datetime.now().strftime("%Y-%m-%d")
    messages = generate_chatml_messages_with_context(
        conversation_primer,
-        prompts.system_prompt_message_gpt4all.format(current_date=current_date),
+        prompts.system_prompt_offline_chat.format(current_date=current_date),
        conversation_log,
        model_name=model,
        loaded_model=offline_chat_model,
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -47,9 +47,9 @@ no_entries_found = PromptTemplate.from_template(
 """.strip()
 )
-## Conversation Prompts for GPT4All Models
+## Conversation Prompts for Offline Chat Models
 ## --
-system_prompt_message_gpt4all = PromptTemplate.from_template(
+system_prompt_offline_chat = PromptTemplate.from_template(
    """
 You are Khoj, a smart, inquisitive and helpful personal assistant.
 - Use your general knowledge and past conversation with the user as context to inform your responses.
@@ -61,40 +61,6 @@ Today is {current_date} in UTC.
    """.strip()
 )
 system_prompt_message_extract_questions_gpt4all = f"""You are Khoj, a kind and intelligent personal assistant. When the user asks you a question, you ask follow-up questions to clarify the necessary information you need in order to answer from the user's perspective.
 - Write the question as if you can search for the answer on the user's personal notes.
 - Try to be as specific as possible. Instead of saying "they" or "it" or "he", use the name of the person or thing you are referring to. For example, instead of saying "Which store did they go to?", say "Which store did Alice and Bob go to?".
 - Add as much context from the previous questions and notes as required into your search queries.
 - Provide search queries as a list of questions
 What follow-up questions, if any, will you need to ask to answer the user's question?
 """
 system_prompt_gpt4all = PromptTemplate.from_template(
    """
 <s>[INST] <<SYS>>
 {message}
 <</SYS>>Hi there! [/INST] Hello! How can I help you today? </s>"""
 )
 system_prompt_extract_questions_gpt4all = PromptTemplate.from_template(
    """
 <s>[INST] <<SYS>>
 {message}
 <</SYS>>[/INST]</s>"""
 )
 user_message_gpt4all = PromptTemplate.from_template(
    """
 <s>[INST] {message} [/INST]
 """.strip()
 )
 khoj_message_gpt4all = PromptTemplate.from_template(
    """
 {message}</s>
 """.strip()
 )
 ## Notes Conversation
 ## --
 notes_conversation = PromptTemplate.from_template(
@@ -107,7 +73,7 @@ Notes:
 """.strip()
 )
-notes_conversation_gpt4all = PromptTemplate.from_template(
+notes_conversation_offline = PromptTemplate.from_template(
    """
 User's Notes:
 {references}
@@ -159,58 +125,49 @@ Query: {query}""".strip()
 )
 ## Summarize Notes
 ## --
 summarize_notes = PromptTemplate.from_template(
    """
 Summarize the below notes about {user_query}:
 {text}
 Summarize the notes in second person perspective:"""
 )
 ## Answer
 ## --
 answer = PromptTemplate.from_template(
    """
 You are a friendly, helpful personal assistant.
 Using the users notes below, answer their following question. If the answer is not contained within the notes, say "I don't know."
 Notes:
 {text}
 Question: {user_query}
 Answer (in second person):"""
 )
 ## Extract Questions
 ## --
-extract_questions_gpt4all_sample = PromptTemplate.from_template(
+extract_questions_offline = PromptTemplate.from_template(
    """
-<s>[INST] <<SYS>>Current Date: {current_date}. User's Location: {location}<</SYS>> [/INST]</s>
+You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the user's notes. Construct search queries to retrieve relevant information to answer the user's question.
-<s>[INST] How was my trip to Cambodia? [/INST]
+- You will be provided past questions(Q) and answers(A) for context.
-How was my trip to Cambodia?</s>
+- Add as much context from the previous questions and answers as required into your search queries.
-<s>[INST] Who did I visit the temple with on that trip? [/INST]
+- Break messages into multiple search queries when required to retrieve the relevant information.
-Who did I visit the temple with in Cambodia?</s>
+- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
-<s>[INST] How should I take care of my plants? [/INST]
+
-What kind of plants do I have? What issues do my plants have?</s>
+Current Date: {current_date}
-<s>[INST] How many tennis balls fit in the back of a 2002 Honda Civic? [/INST]
+User's Location: {location}
-What is the size of a tennis ball? What is the trunk size of a 2002 Honda Civic?</s>
+
-<s>[INST] What did I do for Christmas last year? [/INST]
+Examples:
-What did I do for Christmas {last_year} dt>='{last_christmas_date}' dt<'{next_christmas_date}'</s>
+Q: How was my trip to Cambodia?
-<s>[INST] How are you feeling today? [/INST]</s>
+Khoj: ["How was my trip to Cambodia?"]
-<s>[INST] Is Alice older than Bob? [/INST]
+
-When was Alice born? What is Bob's age?</s>
+Q: Who did I visit the temple with on that trip?
-<s>[INST] <<SYS>>
+Khoj: ["Who did I visit the temple with in Cambodia?"]
-Use these notes from the user's previous conversations to provide a response:
+
 Q: Which of them is older?
 Khoj: ["When was Alice born?", "What is Bob's age?"]
 Q: Where did John say he was? He mentioned it in our call last week.
 Khoj: ["Where is John? dt>='{last_year}-12-25' dt<'{last_year}-12-26'", "John's location in call notes"]
 Q: How can you help me?
 Khoj: ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]
 Q: What did I do for Christmas last year?
 Khoj: ["What did I do for Christmas {last_year} dt>='{last_year}-12-25' dt<'{last_year}-12-26'"]
 Q: How should I take care of my plants?
 Khoj: ["What kind of plants do I have?", "What issues do my plants have?"]
 Q: Who all did I meet here yesterday?
 Khoj: ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]
 Chat History:
 {chat_history}
-<</SYS>> [/INST]</s>
+What searches will you perform to answer the following question, using the chat history as reference? Respond with relevant search queries as list of strings.
-<s>[INST] {query} [/INST]
+Q: {query}
-"""
+""".strip()
 )
@@ -254,8 +211,8 @@ Q: What is their age difference?
 Khoj: {{"queries": ["What is Bob's age?", "What is Tom's age?"]}}
 A: Bob is {bob_tom_age_difference} years older than Tom. As Bob is {bob_age} years old and Tom is 30 years old.
-Q: What does yesterday's note say?
+Q: Who all did I meet here yesterday?
-Khoj: {{"queries": ["Note from {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
+Khoj: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
 A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
 {chat_history}
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -327,7 +327,7 @@ async def extract_references_and_questions(
                defiltered_query,
                loaded_model=loaded_model,
                conversation_log=meta_log,
-                should_extract_questions=False,
+                should_extract_questions=True,
                location_data=location_data,
            )
        elif conversation_config and conversation_config.model_type == ChatModelOptions.ModelType.OPENAI: