From 70cfaf72e91a872da0725aee5ba77c902faca8b5 Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Tue, 22 Jul 2025 18:29:01 -0500
Subject: [PATCH] Only send start llm response chat event once, after thoughts
 streamed

A previous regression resulted in the start llm response event being
sent with every (non-thought) message chunk. It should only be sent
once after thoughts and before first normal message chunk is streamed.

Regression probably introduced with changes to stream thoughts.

This should fix the chat streaming latency logs.
---
 src/khoj/routers/api_chat.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py
index f438ec61..d792eadc 100644
--- a/src/khoj/routers/api_chat.py
+++ b/src/khoj/routers/api_chat.py
@@ -1397,6 +1397,7 @@ async def event_generator(
     )
 
     full_response = ""
+    message_start = True
     async for item in llm_response:
         # Should not happen with async generator. Skip.
         if item is None or not isinstance(item, ResponseWithThought):
@@ -1410,10 +1411,11 @@ async def event_generator(
             async for result in send_event(ChatEvent.THOUGHT, item.thought):
                 yield result
             continue
-
         # Start sending response
-        async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
-            yield result
+        elif message_start:
+            message_start = False
+            async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
+                yield result
 
         try:
             async for result in send_event(ChatEvent.MESSAGE, message):