mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-03 05:29:12 +00:00
Extract events even when http chunk contains partial or mutiple events
Previous logic was more brittle to break with simple unbalanced
'{' or '}' string present in the event data. This method of trying to
identify valid json obj was fairly brittle. It only allowed json
objects or processed event as raw strings.
Now we buffer chunk until we see our unicode magic delimiter and only
then process it.
This is much less likely to break based on event data and the
delimiter is more tunable if we want to reduce rendering breakage
likelihood further
This commit is contained in:
@@ -437,36 +437,6 @@ function finalizeChatBodyResponse(references, newResponseElement) {
|
||||
document.getElementById("chat-input")?.removeAttribute("disabled");
|
||||
}
|
||||
|
||||
function collectJsonsInBufferedMessageChunk(chunk) {
|
||||
// Collect list of JSON objects and raw strings in the chunk
|
||||
// Return the list of objects and the remaining raw string
|
||||
let startIndex = chunk.indexOf('{');
|
||||
if (startIndex === -1) return { objects: [chunk], remainder: '' };
|
||||
const objects = [chunk.slice(0, startIndex)];
|
||||
let openBraces = 0;
|
||||
let currentObject = '';
|
||||
|
||||
for (let i = startIndex; i < chunk.length; i++) {
|
||||
if (chunk[i] === '{') {
|
||||
if (openBraces === 0) startIndex = i;
|
||||
openBraces++;
|
||||
}
|
||||
if (chunk[i] === '}') {
|
||||
openBraces--;
|
||||
if (openBraces === 0) {
|
||||
currentObject = chunk.slice(startIndex, i + 1);
|
||||
objects.push(currentObject);
|
||||
currentObject = '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
objects: objects,
|
||||
remainder: openBraces > 0 ? chunk.slice(startIndex) : ''
|
||||
};
|
||||
}
|
||||
|
||||
function convertMessageChunkToJson(rawChunk) {
|
||||
// Split the chunk into lines
|
||||
if (rawChunk?.startsWith("{") && rawChunk?.endsWith("}")) {
|
||||
@@ -554,8 +524,8 @@ async function readChatStream(response) {
|
||||
if (!response.body) return;
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
const eventDelimiter = '␃🔚␗';
|
||||
let buffer = '';
|
||||
let netBracketCount = 0;
|
||||
|
||||
while (true) {
|
||||
const { value, done } = await reader.read();
|
||||
@@ -569,14 +539,19 @@ async function readChatStream(response) {
|
||||
|
||||
// Read chunk from stream and append it to the buffer
|
||||
const chunk = decoder.decode(value, { stream: true });
|
||||
console.debug("Raw Chunk:", chunk)
|
||||
// Start buffering chunks until complete event is received
|
||||
buffer += chunk;
|
||||
|
||||
// Check if the buffer contains (0 or more) complete JSON objects
|
||||
netBracketCount += (chunk.match(/{/g) || []).length - (chunk.match(/}/g) || []).length;
|
||||
if (netBracketCount === 0) {
|
||||
let chunks = collectJsonsInBufferedMessageChunk(buffer);
|
||||
chunks.objects.forEach((chunk) => processMessageChunk(chunk));
|
||||
buffer = chunks.remainder;
|
||||
// Once the buffer contains a complete event
|
||||
let newEventIndex;
|
||||
while ((newEventIndex = buffer.indexOf(eventDelimiter)) !== -1) {
|
||||
// Extract the event from the buffer
|
||||
const event = buffer.slice(0, newEventIndex);
|
||||
buffer = buffer.slice(newEventIndex + eventDelimiter.length);
|
||||
|
||||
// Process the event
|
||||
if (event) processMessageChunk(event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user