mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Only send start llm response chat event once, after thoughts streamed
A previous regression resulted in the start llm response event being sent with every (non-thought) message chunk. It should only be sent once after thoughts and before first normal message chunk is streamed. Regression probably introduced with changes to stream thoughts. This should fix the chat streaming latency logs.
This commit is contained in:
@@ -1397,6 +1397,7 @@ async def event_generator(
|
||||
)
|
||||
|
||||
full_response = ""
|
||||
message_start = True
|
||||
async for item in llm_response:
|
||||
# Should not happen with async generator. Skip.
|
||||
if item is None or not isinstance(item, ResponseWithThought):
|
||||
@@ -1410,10 +1411,11 @@ async def event_generator(
|
||||
async for result in send_event(ChatEvent.THOUGHT, item.thought):
|
||||
yield result
|
||||
continue
|
||||
|
||||
# Start sending response
|
||||
async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
|
||||
yield result
|
||||
elif message_start:
|
||||
message_start = False
|
||||
async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
|
||||
yield result
|
||||
|
||||
try:
|
||||
async for result in send_event(ChatEvent.MESSAGE, message):
|
||||
|
||||
Reference in New Issue
Block a user