mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Use json5 to parse llm generated questions to query docs and web
json5 is more forgiving, handles double quotes, newlines in raw json string
This commit is contained in:
@@ -1,9 +1,8 @@
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Optional
|
||||
|
||||
import pyjson5
|
||||
from langchain.schema import ChatMessage
|
||||
|
||||
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
||||
@@ -110,7 +109,7 @@ def extract_questions_anthropic(
|
||||
# Extract, Clean Message from Claude's Response
|
||||
try:
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = pyjson5.loads(response)
|
||||
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||
if not isinstance(response, list) or not response:
|
||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Optional
|
||||
|
||||
import pyjson5
|
||||
from langchain.schema import ChatMessage
|
||||
|
||||
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
||||
@@ -104,7 +103,7 @@ def extract_questions_gemini(
|
||||
# Extract, Clean Message from Gemini's Response
|
||||
try:
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = pyjson5.loads(response)
|
||||
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||
if not isinstance(response, list) or not response:
|
||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||
|
||||
@@ -5,6 +5,7 @@ from datetime import datetime, timedelta
|
||||
from threading import Thread
|
||||
from typing import Any, Iterator, List, Optional, Union
|
||||
|
||||
import pyjson5
|
||||
from langchain.schema import ChatMessage
|
||||
from llama_cpp import Llama
|
||||
|
||||
@@ -13,6 +14,7 @@ from khoj.processor.conversation import prompts
|
||||
from khoj.processor.conversation.offline.utils import download_model
|
||||
from khoj.processor.conversation.utils import (
|
||||
ThreadedGenerator,
|
||||
clean_json,
|
||||
commit_conversation_trace,
|
||||
generate_chatml_messages_with_context,
|
||||
messages_to_print,
|
||||
@@ -114,8 +116,8 @@ def extract_questions_offline(
|
||||
|
||||
# Extract and clean the chat model's response
|
||||
try:
|
||||
response = response.strip(empty_escape_sequences)
|
||||
response = json.loads(response)
|
||||
response = clean_json(empty_escape_sequences)
|
||||
response = pyjson5.loads(response)
|
||||
questions = [q.strip() for q in response["queries"] if q.strip()]
|
||||
questions = filter_questions(questions)
|
||||
except:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Optional
|
||||
|
||||
import pyjson5
|
||||
from langchain.schema import ChatMessage
|
||||
|
||||
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
||||
@@ -104,7 +104,7 @@ def extract_questions(
|
||||
# Extract, Clean Message from GPT's Response
|
||||
try:
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = pyjson5.loads(response)
|
||||
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||
if not isinstance(response, list) or not response:
|
||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||
|
||||
@@ -27,6 +27,7 @@ from typing import (
|
||||
from urllib.parse import parse_qs, quote, unquote, urljoin, urlparse
|
||||
|
||||
import cron_descriptor
|
||||
import pyjson5
|
||||
import pytz
|
||||
import requests
|
||||
from apscheduler.job import Job
|
||||
@@ -541,7 +542,7 @@ async def generate_online_subqueries(
|
||||
# Validate that the response is a non-empty, JSON-serializable list
|
||||
try:
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = pyjson5.loads(response)
|
||||
response = {q.strip() for q in response["queries"] if q.strip()}
|
||||
if not isinstance(response, set) or not response or len(response) == 0:
|
||||
logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
|
||||
|
||||
Reference in New Issue
Block a user