Improve subqueries for online search and prompt generation for image (#626)

* Improve subqueries for online search and prompt generation for image - Include conversation history so that subqueries or intermediate prompts are generated with the appropriate context
2026-04-28 00:19:25 +00:00 · 2024-01-24 04:12:59 -08:00
parent dbdca7d8d1
commit da6cd5ddc4
5 changed files with 75 additions and 38 deletions
@@ -467,7 +467,7 @@ class ConversationAdapters:
        return await OpenAIProcessorConversationConfig.objects.filter().aexists()

    @staticmethod
-    async def get_default_openai_llm():
+    async def aget_default_openai_llm():
        return await ChatModelOptions.objects.filter(model_type="openai").afirst()

    @staticmethod
@@ -113,7 +113,10 @@ Question: {query}

 image_generation_improve_prompt = PromptTemplate.from_template(
    """
-Generate a detailed prompt to generate an image based on the following description. Update the query below to improve the image generation. Add additional context to the query to improve the image generation.
+You are a talented creator. Generate a detailed prompt to generate an image based on the following description. Update the query below to improve the image generation. Add additional context to the query to improve the image generation. Make sure to retain any important information from the query. Use the conversation log to inform your response.
+
+Conversation Log:
+{chat_history}

 Query: {query}

@@ -132,33 +135,6 @@ Information from the internet: {online_results}
 Query: {query}""".strip()
 )

-online_search_conversation_subqueries = PromptTemplate.from_template(
-    """
-The user has a question which you can use the internet to respond to. Can you break down the question into subqueries to get the correct answer? Provide search queries as a JSON list of strings
-
-Today's date in UTC: {current_date}
-
-Here are some examples of questions and subqueries:
-
-Q: Posts about vector databases on Hacker News
-A: ["site:"news.ycombinator.com vector database"]
-
-Q: What is the weather like in New York and San Francisco?
-A: ["weather in new york", "weather in san francisco"]
-
-Q: What is the latest news about Google stock?
-A: ["google stock news"]
-
-Q: When is the next lunar eclipse?
-A: ["next lunar eclipse"]
-
-Q: How many oranges would fit in NASA's Saturn V rocket?
-A: ["volume of an orange", "volume of saturn v rocket"]
-
-This is the user's query:
-Q: {query}
-A: """.strip()
-)

 ## Summarize Notes
 ## --
@@ -281,6 +257,54 @@ Q: {text}
 """
 )

+online_search_conversation_subqueries = PromptTemplate.from_template(
+    """
+You are Khoj, an extremely smart and helpful search assistant. You are tasked with constructing a search query for Google to answer the user's question.
+- You will receive the conversation history as context.
+- Add as much context from the previous questions and answers as required into your search queries.
+- Break messages into multiple search queries when required to retrieve the relevant information.
+- You have access to the the whole internet to retrieve information.
+
+What Google searches, if any, will you need to perform to answer the user's question?
+Provide search queries as a JSON list of strings
+Current Date: {current_date}
+
+History:
+User: I like to use Hacker News to get my tech news.
+Khoj: Hacker News is an online forum for sharing and discussing the latest tech news. It is a great place to learn about new technologies and startups.
+
+Q: Posts about vector databases on Hacker News
+A: ["site:"news.ycombinator.com vector database"]
+
+History:
+User: I'm currently living in New York but I'm thinking about moving to San Francisco.
+Khoj: New York is a great city to live in. It has a lot of great restaurants and museums. San Francisco is also a great city to live in. It has a lot of great restaurants and museums.
+
+Q: What is the weather like in those cities?
+A: ["weather in new york", "weather in san francisco"]
+
+History:
+User: I'm thinking of my next vacation idea. Ideally, I want to see something new and exciting.
+Khoj: You could time your next trip with the next lunar eclipse, as that would be a novel experience.
+
+Q: When is the next one?
+A: ["next lunar eclipse"]
+
+History:
+User: I need to transport a lot of oranges to the moon. Are there any rockets that can fit a lot of oranges?
+Khoj: NASA's Saturn V rocket frequently makes lunar trips and has a large cargo capacity.
+
+Q: How many oranges would fit in NASA's Saturn V rocket?
+A: ["volume of an orange", "volume of saturn v rocket"]
+
+History:
+{chat_history}
+
+Q: {query}
+A:
+"""
+)
+

 ## Extract Search Type
 ## --
@@ -13,7 +13,7 @@ SERPER_DEV_API_KEY = os.getenv("SERPER_DEV_API_KEY")
 url = "https://google.serper.dev/search"


-async def search_with_google(query: str):
+async def search_with_google(query: str, conversation_history: dict):
    def _search_with_google(subquery: str):
        payload = json.dumps(
            {
@@ -42,7 +42,7 @@ async def search_with_google(query: str):
        raise ValueError("SERPER_DEV_API_KEY is not set")

    # Breakdown the query into subqueries to get the correct answer
-    subqueries = await generate_online_subqueries(query)
+    subqueries = await generate_online_subqueries(query, conversation_history)

    response_dict = {}

@@ -402,7 +402,7 @@ async def chat(

    elif conversation_command == ConversationCommand.Online:
        try:
-            online_results = await search_with_google(defiltered_query)
+            online_results = await search_with_google(defiltered_query, meta_log)
        except ValueError as e:
            return StreamingResponse(
                iter(["Please set your SERPER_DEV_API_KEY to get started with online searches 🌐"]),
@@ -417,7 +417,7 @@ async def chat(
            metadata={"conversation_command": conversation_command.value},
            **common.__dict__,
        )
-        image, status_code, improved_image_prompt = await text_to_image(q)
+        image, status_code, improved_image_prompt = await text_to_image(q, meta_log)
        if image is None:
            content_obj = {
                "image": image,
@@ -538,7 +538,7 @@ async def extract_references_and_questions(
            )
        elif conversation_config and conversation_config.model_type == ChatModelOptions.ModelType.OPENAI:
            openai_chat_config = await ConversationAdapters.get_openai_chat_config()
-            default_openai_llm = await ConversationAdapters.get_default_openai_llm()
+            default_openai_llm = await ConversationAdapters.aget_default_openai_llm()
            api_key = openai_chat_config.api_key
            chat_model = default_openai_llm.chat_model
            inferred_queries = extract_questions(
@@ -125,14 +125,21 @@ async def agenerate_chat_response(*args):
    return await loop.run_in_executor(executor, generate_chat_response, *args)


-async def generate_online_subqueries(q: str) -> List[str]:
+async def generate_online_subqueries(q: str, conversation_history: dict) -> List[str]:
    """
    Generate subqueries from the given query
    """
+    chat_history = ""
+    for chat in conversation_history.get("chat", [])[-4:]:
+        if chat["by"] == "khoj" and chat["intent"].get("type") == "remember":
+            chat_history += f"User: {chat['intent']['query']}\n"
+            chat_history += f"Khoj: {chat['message']}\n"
+
    utc_date = datetime.utcnow().strftime("%Y-%m-%d")
    online_queries_prompt = prompts.online_search_conversation_subqueries.format(
        current_date=utc_date,
        query=q,
+        chat_history=chat_history,
    )

    response = await send_message_to_model_wrapper(online_queries_prompt)
@@ -151,13 +158,14 @@ async def generate_online_subqueries(q: str) -> List[str]:
        return [q]


-async def generate_better_image_prompt(q: str) -> str:
+async def generate_better_image_prompt(q: str, conversation_history: str) -> str:
    """
    Generate a better image prompt from the given query
    """

    image_prompt = prompts.image_generation_improve_prompt.format(
        query=q,
+        chat_history=conversation_history,
    )

    response = await send_message_to_model_wrapper(image_prompt)
@@ -273,7 +281,7 @@ def generate_chat_response(
    return chat_response, metadata


-async def text_to_image(message: str) -> Tuple[Optional[str], int, Optional[str]]:
+async def text_to_image(message: str, conversation_log: dict) -> Tuple[Optional[str], int, Optional[str]]:
    status_code = 200
    image = None

@@ -283,7 +291,12 @@ async def text_to_image(message: str) -> Tuple[Optional[str], int, Optional[str]
        status_code = 501
    elif state.openai_client and text_to_image_config.model_type == TextToImageModelConfig.ModelType.OPENAI:
        text2image_model = text_to_image_config.model_name
-        improved_image_prompt = await generate_better_image_prompt(message)
+        chat_history = ""
+        for chat in conversation_log.get("chat", [])[-4:]:
+            if chat["by"] == "khoj" and chat["intent"].get("type") == "remember":
+                chat_history += f"Q: {chat['intent']['query']}\n"
+                chat_history += f"A: {chat['message']}\n"
+        improved_image_prompt = await generate_better_image_prompt(message, chat_history)
        try:
            response = state.openai_client.images.generate(
                prompt=improved_image_prompt, model=text2image_model, response_format="b64_json"