mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 05:39:12 +00:00
Add reference notes to result response from GPT when streaming is completed
- NOTE: results are still not being saved to conversation history
This commit is contained in:
@@ -89,10 +89,21 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
const chunk = decoder.decode(value, { stream: true });
|
const chunk = decoder.decode(value, { stream: true });
|
||||||
|
|
||||||
|
if (chunk.startsWith("### compiled references:")) {
|
||||||
|
const rawReference = chunk.split("### compiled references:")[1];
|
||||||
|
const rawReferenceAsJson = JSON.parse(rawReference);
|
||||||
|
let polishedReference = rawReferenceAsJson.map((reference, index) => generateReference(reference, index))
|
||||||
|
.join("<sup>,</sup>");
|
||||||
|
|
||||||
|
new_response_text.innerHTML += polishedReference;
|
||||||
|
} else {
|
||||||
new_response_text.innerHTML += chunk;
|
new_response_text.innerHTML += chunk;
|
||||||
console.log(`Received ${chunk.length} bytes of data`);
|
console.log(`Received ${chunk.length} bytes of data`);
|
||||||
console.log(`Chunk: ${chunk}`);
|
console.log(`Chunk: ${chunk}`);
|
||||||
|
document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight;
|
||||||
readStream();
|
readStream();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
readStream();
|
readStream();
|
||||||
|
|||||||
@@ -172,6 +172,7 @@ def converse(references, user_query, conversation_log={}, model="gpt-3.5-turbo",
|
|||||||
logger.debug(f"Conversation Context for GPT: {messages}")
|
logger.debug(f"Conversation Context for GPT: {messages}")
|
||||||
return chat_completion_with_backoff(
|
return chat_completion_with_backoff(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
compiled_references=references,
|
||||||
model_name=model,
|
model_name=model,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
openai_api_key=api_key,
|
openai_api_key=api_key,
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from typing import Any, Optional
|
|||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
import asyncio
|
import asyncio
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
import json
|
||||||
|
|
||||||
# External Packages
|
# External Packages
|
||||||
from langchain.chat_models import ChatOpenAI
|
from langchain.chat_models import ChatOpenAI
|
||||||
@@ -36,8 +37,9 @@ max_prompt_size = {"gpt-3.5-turbo": 4096, "gpt-4": 8192}
|
|||||||
|
|
||||||
|
|
||||||
class ThreadedGenerator:
|
class ThreadedGenerator:
|
||||||
def __init__(self):
|
def __init__(self, compiled_references):
|
||||||
self.queue = queue.Queue()
|
self.queue = queue.Queue()
|
||||||
|
self.compiled_references = compiled_references
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return self
|
return self
|
||||||
@@ -45,13 +47,15 @@ class ThreadedGenerator:
|
|||||||
def __next__(self):
|
def __next__(self):
|
||||||
item = self.queue.get()
|
item = self.queue.get()
|
||||||
if item is StopIteration:
|
if item is StopIteration:
|
||||||
raise item
|
raise StopIteration
|
||||||
return item
|
return item
|
||||||
|
|
||||||
def send(self, data):
|
def send(self, data):
|
||||||
self.queue.put(data)
|
self.queue.put(data)
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
|
if self.compiled_references and len(self.compiled_references) > 0:
|
||||||
|
self.queue.put(f"### compiled references:{json.dumps(self.compiled_references)}")
|
||||||
self.queue.put(StopIteration)
|
self.queue.put(StopIteration)
|
||||||
|
|
||||||
|
|
||||||
@@ -101,8 +105,8 @@ def completion_with_backoff(**kwargs):
|
|||||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||||
reraise=True,
|
reraise=True,
|
||||||
)
|
)
|
||||||
def chat_completion_with_backoff(messages, model_name, temperature, openai_api_key=None):
|
def chat_completion_with_backoff(messages, compiled_references, model_name, temperature, openai_api_key=None):
|
||||||
g = ThreadedGenerator()
|
g = ThreadedGenerator(compiled_references)
|
||||||
t = Thread(target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key))
|
t = Thread(target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key))
|
||||||
t.start()
|
t.start()
|
||||||
return g
|
return g
|
||||||
|
|||||||
@@ -478,7 +478,7 @@ async def chat(
|
|||||||
result_list = []
|
result_list = []
|
||||||
for query in inferred_queries:
|
for query in inferred_queries:
|
||||||
result_list.extend(
|
result_list.extend(
|
||||||
await search(query, request=request, n=5, r=True, score_threshold=-5.0, dedupe=False)
|
await search(query, request=request, n=5, r=False, score_threshold=-5.0, dedupe=False)
|
||||||
)
|
)
|
||||||
compiled_references = [item.additional["compiled"] for item in result_list]
|
compiled_references = [item.additional["compiled"] for item in result_list]
|
||||||
|
|
||||||
@@ -501,7 +501,15 @@ async def chat(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
with timer("Generating chat response took", logger):
|
with timer("Generating chat response took", logger):
|
||||||
gpt_response = converse(compiled_references, q, meta_log, model=chat_model, api_key=api_key)
|
gpt_response = converse(
|
||||||
|
compiled_references,
|
||||||
|
q,
|
||||||
|
meta_log,
|
||||||
|
model=chat_model,
|
||||||
|
api_key=api_key,
|
||||||
|
chat_session=chat_session,
|
||||||
|
inferred_queries=inferred_queries,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
gpt_response = str(e)
|
gpt_response = str(e)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user