Fix to calculate usage from openai api streaming completion

During streaming chunk.chunk contains usage data. This regression must have appeared while tuning openai stream processors
2026-03-09 13:25:11 +00:00 · 2025-08-18 18:53:30 -07:00
parent 573c6a32e1
commit e504141c07
1 changed files with 4 additions and 0 deletions
--- a/src/khoj/processor/conversation/openai/utils.py
+++ b/src/khoj/processor/conversation/openai/utils.py
@@ -219,6 +219,10 @@ def completion_with_backoff(
            # Json dump tool calls into aggregated response
            aggregated_response = json.dumps([tool_call.__dict__ for tool_call in tool_calls])
    # Align chunk definition with non-streaming mode for post stream completion usage
    if hasattr(chunk, "chunk"):
        chunk = chunk.chunk
    # Calculate cost of chat
    input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0
    output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0