mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 21:29:11 +00:00
Use json5 to parse llm generated questions to query docs and web
json5 is more forgiving, handles double quotes, newlines in raw json string
This commit is contained in:
@@ -1,9 +1,8 @@
|
|||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
|
|
||||||
|
import pyjson5
|
||||||
from langchain.schema import ChatMessage
|
from langchain.schema import ChatMessage
|
||||||
|
|
||||||
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
||||||
@@ -110,7 +109,7 @@ def extract_questions_anthropic(
|
|||||||
# Extract, Clean Message from Claude's Response
|
# Extract, Clean Message from Claude's Response
|
||||||
try:
|
try:
|
||||||
response = clean_json(response)
|
response = clean_json(response)
|
||||||
response = json.loads(response)
|
response = pyjson5.loads(response)
|
||||||
response = [q.strip() for q in response["queries"] if q.strip()]
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||||
if not isinstance(response, list) or not response:
|
if not isinstance(response, list) or not response:
|
||||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
|
|
||||||
|
import pyjson5
|
||||||
from langchain.schema import ChatMessage
|
from langchain.schema import ChatMessage
|
||||||
|
|
||||||
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
||||||
@@ -104,7 +103,7 @@ def extract_questions_gemini(
|
|||||||
# Extract, Clean Message from Gemini's Response
|
# Extract, Clean Message from Gemini's Response
|
||||||
try:
|
try:
|
||||||
response = clean_json(response)
|
response = clean_json(response)
|
||||||
response = json.loads(response)
|
response = pyjson5.loads(response)
|
||||||
response = [q.strip() for q in response["queries"] if q.strip()]
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||||
if not isinstance(response, list) or not response:
|
if not isinstance(response, list) or not response:
|
||||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from datetime import datetime, timedelta
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
from typing import Any, Iterator, List, Optional, Union
|
from typing import Any, Iterator, List, Optional, Union
|
||||||
|
|
||||||
|
import pyjson5
|
||||||
from langchain.schema import ChatMessage
|
from langchain.schema import ChatMessage
|
||||||
from llama_cpp import Llama
|
from llama_cpp import Llama
|
||||||
|
|
||||||
@@ -13,6 +14,7 @@ from khoj.processor.conversation import prompts
|
|||||||
from khoj.processor.conversation.offline.utils import download_model
|
from khoj.processor.conversation.offline.utils import download_model
|
||||||
from khoj.processor.conversation.utils import (
|
from khoj.processor.conversation.utils import (
|
||||||
ThreadedGenerator,
|
ThreadedGenerator,
|
||||||
|
clean_json,
|
||||||
commit_conversation_trace,
|
commit_conversation_trace,
|
||||||
generate_chatml_messages_with_context,
|
generate_chatml_messages_with_context,
|
||||||
messages_to_print,
|
messages_to_print,
|
||||||
@@ -114,8 +116,8 @@ def extract_questions_offline(
|
|||||||
|
|
||||||
# Extract and clean the chat model's response
|
# Extract and clean the chat model's response
|
||||||
try:
|
try:
|
||||||
response = response.strip(empty_escape_sequences)
|
response = clean_json(empty_escape_sequences)
|
||||||
response = json.loads(response)
|
response = pyjson5.loads(response)
|
||||||
questions = [q.strip() for q in response["queries"] if q.strip()]
|
questions = [q.strip() for q in response["queries"] if q.strip()]
|
||||||
questions = filter_questions(questions)
|
questions = filter_questions(questions)
|
||||||
except:
|
except:
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
|
|
||||||
|
import pyjson5
|
||||||
from langchain.schema import ChatMessage
|
from langchain.schema import ChatMessage
|
||||||
|
|
||||||
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
||||||
@@ -104,7 +104,7 @@ def extract_questions(
|
|||||||
# Extract, Clean Message from GPT's Response
|
# Extract, Clean Message from GPT's Response
|
||||||
try:
|
try:
|
||||||
response = clean_json(response)
|
response = clean_json(response)
|
||||||
response = json.loads(response)
|
response = pyjson5.loads(response)
|
||||||
response = [q.strip() for q in response["queries"] if q.strip()]
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||||
if not isinstance(response, list) or not response:
|
if not isinstance(response, list) or not response:
|
||||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ from typing import (
|
|||||||
from urllib.parse import parse_qs, quote, unquote, urljoin, urlparse
|
from urllib.parse import parse_qs, quote, unquote, urljoin, urlparse
|
||||||
|
|
||||||
import cron_descriptor
|
import cron_descriptor
|
||||||
|
import pyjson5
|
||||||
import pytz
|
import pytz
|
||||||
import requests
|
import requests
|
||||||
from apscheduler.job import Job
|
from apscheduler.job import Job
|
||||||
@@ -541,7 +542,7 @@ async def generate_online_subqueries(
|
|||||||
# Validate that the response is a non-empty, JSON-serializable list
|
# Validate that the response is a non-empty, JSON-serializable list
|
||||||
try:
|
try:
|
||||||
response = clean_json(response)
|
response = clean_json(response)
|
||||||
response = json.loads(response)
|
response = pyjson5.loads(response)
|
||||||
response = {q.strip() for q in response["queries"] if q.strip()}
|
response = {q.strip() for q in response["queries"] if q.strip()}
|
||||||
if not isinstance(response, set) or not response or len(response) == 0:
|
if not isinstance(response, set) or not response or len(response) == 0:
|
||||||
logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
|
logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
|
||||||
|
|||||||
Reference in New Issue
Block a user