diff --git a/src/khoj/processor/conversation/anthropic/utils.py b/src/khoj/processor/conversation/anthropic/utils.py index e436ecda..c2db6a72 100644 --- a/src/khoj/processor/conversation/anthropic/utils.py +++ b/src/khoj/processor/conversation/anthropic/utils.py @@ -144,6 +144,7 @@ async def anthropic_chat_completion_with_backoff( formatted_messages, system_prompt = format_messages_for_anthropic(messages, system_prompt) aggregated_response = "" + response_started = False final_message = None start_time = perf_counter() async with client.messages.stream( @@ -157,7 +158,8 @@ async def anthropic_chat_completion_with_backoff( ) as stream: async for chunk in stream: # Log the time taken to start response - if aggregated_response == "": + if not response_started: + response_started = True logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds") # Skip empty chunks if chunk.type != "content_block_delta": diff --git a/src/khoj/processor/conversation/google/utils.py b/src/khoj/processor/conversation/google/utils.py index ed37a0b3..d465cbda 100644 --- a/src/khoj/processor/conversation/google/utils.py +++ b/src/khoj/processor/conversation/google/utils.py @@ -195,13 +195,15 @@ async def gemini_chat_completion_with_backoff( aggregated_response = "" final_chunk = None + response_started = False start_time = perf_counter() chat_stream: AsyncIterator[gtypes.GenerateContentResponse] = await client.aio.models.generate_content_stream( model=model_name, config=config, contents=formatted_messages ) async for chunk in chat_stream: # Log the time taken to start response - if final_chunk is None: + if not response_started: + response_started = True logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds") # Keep track of the last chunk for usage data final_chunk = chunk diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 7b1c11db..77dee0c4 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -226,6 +226,7 @@ async def chat_completion_with_backoff( aggregated_response = "" final_chunk = None + response_started = False start_time = perf_counter() chat_stream: openai.AsyncStream[ChatCompletionChunk] = await client.chat.completions.create( messages=formatted_messages, # type: ignore @@ -237,7 +238,8 @@ async def chat_completion_with_backoff( ) async for chunk in stream_processor(chat_stream): # Log the time taken to start response - if final_chunk is None: + if not response_started: + response_started = True logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds") # Keep track of the last chunk for usage data final_chunk = chunk