From 70cfaf72e91a872da0725aee5ba77c902faca8b5 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 22 Jul 2025 18:29:01 -0500 Subject: [PATCH] Only send start llm response chat event once, after thoughts streamed A previous regression resulted in the start llm response event being sent with every (non-thought) message chunk. It should only be sent once after thoughts and before first normal message chunk is streamed. Regression probably introduced with changes to stream thoughts. This should fix the chat streaming latency logs. --- src/khoj/routers/api_chat.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index f438ec61..d792eadc 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -1397,6 +1397,7 @@ async def event_generator( ) full_response = "" + message_start = True async for item in llm_response: # Should not happen with async generator. Skip. if item is None or not isinstance(item, ResponseWithThought): @@ -1410,10 +1411,11 @@ async def event_generator( async for result in send_event(ChatEvent.THOUGHT, item.thought): yield result continue - # Start sending response - async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""): - yield result + elif message_start: + message_start = False + async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""): + yield result try: async for result in send_event(ChatEvent.MESSAGE, message):