mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Enforce json response from OpenAI chat actors prev using string lists
- Allow passing response format type to OpenAI API via chat actors - Convert in-context examples to use json objects instead of str lists - Update actors outputting str list to request output to be json_object - OpenAI's json mode enforces the model to output valid json object
This commit is contained in:
@@ -34,7 +34,7 @@ def extract_questions(
|
|||||||
# Extract Past User Message and Inferred Questions from Conversation Log
|
# Extract Past User Message and Inferred Questions from Conversation Log
|
||||||
chat_history = "".join(
|
chat_history = "".join(
|
||||||
[
|
[
|
||||||
f'Q: {chat["intent"]["query"]}\nKhoj: {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}\nA: {chat["message"]}\n\n'
|
f'Q: {chat["intent"]["query"]}\nKhoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
|
||||||
for chat in conversation_log.get("chat", [])[-4:]
|
for chat in conversation_log.get("chat", [])[-4:]
|
||||||
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
|
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
|
||||||
]
|
]
|
||||||
@@ -65,7 +65,7 @@ def extract_questions(
|
|||||||
model_name=model,
|
model_name=model,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
model_kwargs={"stop": ["A: ", "\n"]},
|
model_kwargs={"stop": ["A: ", "\n"], "response_format": {"type": "json_object"}},
|
||||||
openai_api_key=api_key,
|
openai_api_key=api_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -73,7 +73,7 @@ def extract_questions(
|
|||||||
try:
|
try:
|
||||||
response = response.strip()
|
response = response.strip()
|
||||||
response = json.loads(response)
|
response = json.loads(response)
|
||||||
response = [q.strip() for q in response if q.strip()]
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||||
if not isinstance(response, list) or not response:
|
if not isinstance(response, list) or not response:
|
||||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||||
return [text]
|
return [text]
|
||||||
@@ -86,11 +86,7 @@ def extract_questions(
|
|||||||
return questions
|
return questions
|
||||||
|
|
||||||
|
|
||||||
def send_message_to_model(
|
def send_message_to_model(messages, api_key, model, response_type="text"):
|
||||||
messages,
|
|
||||||
api_key,
|
|
||||||
model,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Send message to model
|
Send message to model
|
||||||
"""
|
"""
|
||||||
@@ -100,6 +96,7 @@ def send_message_to_model(
|
|||||||
messages=messages,
|
messages=messages,
|
||||||
model=model,
|
model=model,
|
||||||
openai_api_key=api_key,
|
openai_api_key=api_key,
|
||||||
|
model_kwargs={"response_format": {"type": response_type}},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -221,40 +221,40 @@ You are Khoj, an extremely smart and helpful search assistant with the ability t
|
|||||||
- Break messages into multiple search queries when required to retrieve the relevant information.
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
||||||
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
||||||
|
|
||||||
What searches will you need to perform to answer the users question? Respond with only a list of search queries as strings.
|
What searches will you need to perform to answer the users question? Respond with search queries as list of strings in a JSON object.
|
||||||
Current Date: {current_date}
|
Current Date: {current_date}
|
||||||
User's Location: {location}
|
User's Location: {location}
|
||||||
|
|
||||||
Q: How was my trip to Cambodia?
|
Q: How was my trip to Cambodia?
|
||||||
Khoj: ["How was my trip to Cambodia?"]
|
Khoj: {{"queries": ["How was my trip to Cambodia?"]}}
|
||||||
A: The trip was amazing. I went to the Angkor Wat temple and it was beautiful.
|
A: The trip was amazing. I went to the Angkor Wat temple and it was beautiful.
|
||||||
|
|
||||||
Q: Who did i visit that temple with?
|
Q: Who did i visit that temple with?
|
||||||
Khoj: ["Who did I visit the Angkor Wat Temple in Cambodia with?"]
|
Khoj: {{"queries": ["Who did I visit the Angkor Wat Temple in Cambodia with?"]}}
|
||||||
A: You visited the Angkor Wat Temple in Cambodia with Pablo, Namita and Xi.
|
A: You visited the Angkor Wat Temple in Cambodia with Pablo, Namita and Xi.
|
||||||
|
|
||||||
Q: What national parks did I go to last year?
|
Q: What national parks did I go to last year?
|
||||||
Khoj: ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]
|
Khoj: {{"queries": ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]}}
|
||||||
A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
|
A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
|
||||||
|
|
||||||
Q: How can you help me?
|
Q: How can you help me?
|
||||||
Khoj: ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]
|
Khoj: {{"queries": ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]}}
|
||||||
A: I can help you live healthier and happier across work and personal life
|
A: I can help you live healthier and happier across work and personal life
|
||||||
|
|
||||||
Q: How many tennis balls fit in the back of a 2002 Honda Civic?
|
Q: How many tennis balls fit in the back of a 2002 Honda Civic?
|
||||||
Khoj: ["What is the size of a tennis ball?", "What is the trunk size of a 2002 Honda Civic?"]
|
Khoj: {{"queries": ["What is the size of a tennis ball?", "What is the trunk size of a 2002 Honda Civic?"]}}
|
||||||
A: 1085 tennis balls will fit in the trunk of a Honda Civic
|
A: 1085 tennis balls will fit in the trunk of a Honda Civic
|
||||||
|
|
||||||
Q: Is Bob older than Tom?
|
Q: Is Bob older than Tom?
|
||||||
Khoj: ["When was Bob born?", "What is Tom's age?"]
|
Khoj: {{"queries": ["When was Bob born?", "What is Tom's age?"]}}
|
||||||
A: Yes, Bob is older than Tom. As Bob was born on 1984-01-01 and Tom is 30 years old.
|
A: Yes, Bob is older than Tom. As Bob was born on 1984-01-01 and Tom is 30 years old.
|
||||||
|
|
||||||
Q: What is their age difference?
|
Q: What is their age difference?
|
||||||
Khoj: ["What is Bob's age?", "What is Tom's age?"]
|
Khoj: {{"queries": ["What is Bob's age?", "What is Tom's age?"]}}
|
||||||
A: Bob is {bob_tom_age_difference} years older than Tom. As Bob is {bob_age} years old and Tom is 30 years old.
|
A: Bob is {bob_tom_age_difference} years older than Tom. As Bob is {bob_age} years old and Tom is 30 years old.
|
||||||
|
|
||||||
Q: What does yesterday's note say?
|
Q: What does yesterday's note say?
|
||||||
Khoj: ["Note from {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]
|
Khoj: {{"queries": ["Note from {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
|
||||||
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
||||||
|
|
||||||
{chat_history}
|
{chat_history}
|
||||||
@@ -337,7 +337,7 @@ User: I'm thinking of moving to a new city. I'm trying to decide between New Yor
|
|||||||
AI: Moving to a new city can be challenging. Both New York and San Francisco are great cities to live in. New York is known for its diverse culture and San Francisco is known for its tech scene.
|
AI: Moving to a new city can be challenging. Both New York and San Francisco are great cities to live in. New York is known for its diverse culture and San Francisco is known for its tech scene.
|
||||||
|
|
||||||
Q: What is the population of each of those cities?
|
Q: What is the population of each of those cities?
|
||||||
Khoj: ["online"]
|
Khoj: {{"source": ["online"]}}
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
Chat History:
|
Chat History:
|
||||||
@@ -345,14 +345,14 @@ User: I'm thinking of my next vacation idea. Ideally, I want to see something ne
|
|||||||
AI: Excellent! Taking a vacation is a great way to relax and recharge.
|
AI: Excellent! Taking a vacation is a great way to relax and recharge.
|
||||||
|
|
||||||
Q: Where did Grandma grow up?
|
Q: Where did Grandma grow up?
|
||||||
Khoj: ["notes"]
|
Khoj: {{"source": ["notes"]}}
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
Chat History:
|
Chat History:
|
||||||
|
|
||||||
|
|
||||||
Q: What can you do for me?
|
Q: What can you do for me?
|
||||||
Khoj: ["notes", "online"]
|
Khoj: {{"source": ["notes", "online"]}}
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
Chat History:
|
Chat History:
|
||||||
@@ -360,7 +360,7 @@ User: Good morning
|
|||||||
AI: Good morning! How can I help you today?
|
AI: Good morning! How can I help you today?
|
||||||
|
|
||||||
Q: How can I share my files with Khoj?
|
Q: How can I share my files with Khoj?
|
||||||
Khoj: ["default", "online"]
|
Khoj: {{"source": ["default", "online"]}}
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
Chat History:
|
Chat History:
|
||||||
@@ -368,9 +368,9 @@ User: I want to start a new hobby. I'm thinking of learning to play the guitar.
|
|||||||
AI: Learning to play the guitar is a great hobby. It can be a lot of fun and a great way to express yourself.
|
AI: Learning to play the guitar is a great hobby. It can be a lot of fun and a great way to express yourself.
|
||||||
|
|
||||||
Q: What is the first element of the periodic table?
|
Q: What is the first element of the periodic table?
|
||||||
Khoj: ["general"]
|
Khoj: {{"source": ["general"]}}
|
||||||
|
|
||||||
Now it's your turn to pick the data sources you would like to use to answer the user's question. Provide your response as a list of strings.
|
Now it's your turn to pick the data sources you would like to use to answer the user's question. Respond with data sources as a list of strings in a JSON object.
|
||||||
|
|
||||||
Chat History:
|
Chat History:
|
||||||
{chat_history}
|
{chat_history}
|
||||||
@@ -391,7 +391,7 @@ You are Khoj, an advanced google search assistant. You are tasked with construct
|
|||||||
- Official, up-to-date information about you, Khoj, is available at site:khoj.dev
|
- Official, up-to-date information about you, Khoj, is available at site:khoj.dev
|
||||||
|
|
||||||
What Google searches, if any, will you need to perform to answer the user's question?
|
What Google searches, if any, will you need to perform to answer the user's question?
|
||||||
Provide search queries as a list of strings
|
Provide search queries as a JSON list of strings
|
||||||
Current Date: {current_date}
|
Current Date: {current_date}
|
||||||
User's Location: {location}
|
User's Location: {location}
|
||||||
|
|
||||||
@@ -401,14 +401,14 @@ User: I like to use Hacker News to get my tech news.
|
|||||||
AI: Hacker News is an online forum for sharing and discussing the latest tech news. It is a great place to learn about new technologies and startups.
|
AI: Hacker News is an online forum for sharing and discussing the latest tech news. It is a great place to learn about new technologies and startups.
|
||||||
|
|
||||||
Q: Summarize posts about vector databases on Hacker News since Feb 2024
|
Q: Summarize posts about vector databases on Hacker News since Feb 2024
|
||||||
Khoj: ["site:news.ycombinator.com after:2024/02/01 vector database"]
|
Khoj: {{"queries": ["site:news.ycombinator.com after:2024/02/01 vector database"]}}
|
||||||
|
|
||||||
History:
|
History:
|
||||||
User: I'm currently living in New York but I'm thinking about moving to San Francisco.
|
User: I'm currently living in New York but I'm thinking about moving to San Francisco.
|
||||||
AI: New York is a great city to live in. It has a lot of great restaurants and museums. San Francisco is also a great city to live in. It has good access to nature and a great tech scene.
|
AI: New York is a great city to live in. It has a lot of great restaurants and museums. San Francisco is also a great city to live in. It has good access to nature and a great tech scene.
|
||||||
|
|
||||||
Q: What is the climate like in those cities?
|
Q: What is the climate like in those cities?
|
||||||
Khoj: ["climate in new york city", "climate in san francisco"]
|
Khoj: {{"queries": ["climate in new york city", "climate in san francisco"]}}
|
||||||
|
|
||||||
History:
|
History:
|
||||||
AI: Hey, how is it going?
|
AI: Hey, how is it going?
|
||||||
@@ -416,27 +416,27 @@ User: Going well. Ananya is in town tonight!
|
|||||||
AI: Oh that's awesome! What are your plans for the evening?
|
AI: Oh that's awesome! What are your plans for the evening?
|
||||||
|
|
||||||
Q: She wants to see a movie. Any decent sci-fi movies playing at the local theater?
|
Q: She wants to see a movie. Any decent sci-fi movies playing at the local theater?
|
||||||
Khoj: ["new sci-fi movies in theaters near {location}"]
|
Khoj: {{"queries": ["new sci-fi movies in theaters near {location}"]}}
|
||||||
|
|
||||||
History:
|
History:
|
||||||
User: Can I chat with you over WhatsApp?
|
User: Can I chat with you over WhatsApp?
|
||||||
AI: Yes, you can chat with me using WhatsApp.
|
AI: Yes, you can chat with me using WhatsApp.
|
||||||
|
|
||||||
Q: How
|
Q: How
|
||||||
Khoj: ["site:khoj.dev chat with Khoj on Whatsapp"]
|
Khoj: {{"queries": ["site:khoj.dev chat with Khoj on Whatsapp"]}}
|
||||||
|
|
||||||
History:
|
History:
|
||||||
|
|
||||||
|
|
||||||
Q: How do I share my files with you?
|
Q: How do I share my files with you?
|
||||||
Khoj: ["site:khoj.dev sync files with Khoj"]
|
Khoj: {{"queries": ["site:khoj.dev sync files with Khoj"]}}
|
||||||
|
|
||||||
History:
|
History:
|
||||||
User: I need to transport a lot of oranges to the moon. Are there any rockets that can fit a lot of oranges?
|
User: I need to transport a lot of oranges to the moon. Are there any rockets that can fit a lot of oranges?
|
||||||
AI: NASA's Saturn V rocket frequently makes lunar trips and has a large cargo capacity.
|
AI: NASA's Saturn V rocket frequently makes lunar trips and has a large cargo capacity.
|
||||||
|
|
||||||
Q: How many oranges would fit in NASA's Saturn V rocket?
|
Q: How many oranges would fit in NASA's Saturn V rocket?
|
||||||
Khoj: ["volume of an orange", "volume of saturn v rocket"]
|
Khoj: {{"queries": ["volume of an orange", "volume of saturn v rocket"]}}
|
||||||
|
|
||||||
Now it's your turn to construct a search query for Google to answer the user's question.
|
Now it's your turn to construct a search query for Google to answer the user's question.
|
||||||
History:
|
History:
|
||||||
|
|||||||
@@ -167,12 +167,12 @@ async def aget_relevant_information_sources(query: str, conversation_history: di
|
|||||||
chat_history=chat_history,
|
chat_history=chat_history,
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await send_message_to_model_wrapper(relevant_tools_prompt)
|
response = await send_message_to_model_wrapper(relevant_tools_prompt, response_type="json_object")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = response.strip()
|
response = response.strip()
|
||||||
response = json.loads(response)
|
response = json.loads(response)
|
||||||
response = [q.strip() for q in response if q.strip()]
|
response = [q.strip() for q in response["source"] if q.strip()]
|
||||||
if not isinstance(response, list) or not response or len(response) == 0:
|
if not isinstance(response, list) or not response or len(response) == 0:
|
||||||
logger.error(f"Invalid response for determining relevant tools: {response}")
|
logger.error(f"Invalid response for determining relevant tools: {response}")
|
||||||
return tool_options
|
return tool_options
|
||||||
@@ -244,13 +244,13 @@ async def generate_online_subqueries(q: str, conversation_history: dict, locatio
|
|||||||
location=location,
|
location=location,
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await send_message_to_model_wrapper(online_queries_prompt)
|
response = await send_message_to_model_wrapper(online_queries_prompt, response_type="json_object")
|
||||||
|
|
||||||
# Validate that the response is a non-empty, JSON-serializable list
|
# Validate that the response is a non-empty, JSON-serializable list
|
||||||
try:
|
try:
|
||||||
response = response.strip()
|
response = response.strip()
|
||||||
response = json.loads(response)
|
response = json.loads(response)
|
||||||
response = [q.strip() for q in response if q.strip()]
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||||
if not isinstance(response, list) or not response or len(response) == 0:
|
if not isinstance(response, list) or not response or len(response) == 0:
|
||||||
logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
|
logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
|
||||||
return [q]
|
return [q]
|
||||||
@@ -324,6 +324,7 @@ async def generate_better_image_prompt(
|
|||||||
async def send_message_to_model_wrapper(
|
async def send_message_to_model_wrapper(
|
||||||
message: str,
|
message: str,
|
||||||
system_message: str = "",
|
system_message: str = "",
|
||||||
|
response_type: str = "text",
|
||||||
):
|
):
|
||||||
conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
|
conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
|
||||||
|
|
||||||
@@ -352,9 +353,7 @@ async def send_message_to_model_wrapper(
|
|||||||
api_key = openai_chat_config.api_key
|
api_key = openai_chat_config.api_key
|
||||||
chat_model = conversation_config.chat_model
|
chat_model = conversation_config.chat_model
|
||||||
openai_response = send_message_to_model(
|
openai_response = send_message_to_model(
|
||||||
messages=truncated_messages,
|
messages=truncated_messages, api_key=api_key, model=chat_model, response_type=response_type
|
||||||
api_key=api_key,
|
|
||||||
model=chat_model,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return openai_response
|
return openai_response
|
||||||
|
|||||||
Reference in New Issue
Block a user