mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-03 13:19:16 +00:00
Track, return cost and usage metrics in chat api response
- Track input, output token usage and cost for interactions
via chat api with openai, anthropic and google chat models
- Get usage metadata from OpenAI using stream_options
- Handle openai proxies that do not support passing usage in response
- Add new usage, end response events returned by chat api.
- This can be optionally consumed by clients at a later point
- Update streaming clients to mark message as completed after new
end response event, not after end llm response event
- Ensure usage data from final response generation step is included
- Pass usage data after llm response complete. This allows gathering
token usage and cost for the final response generation step across
streaming and non-streaming modes
This commit is contained in:
@@ -133,7 +133,7 @@ export function processMessageChunk(
|
||||
console.log(`Started streaming: ${new Date()}`);
|
||||
} else if (chunk.type === "end_llm_response") {
|
||||
console.log(`Completed streaming: ${new Date()}`);
|
||||
|
||||
} else if (chunk.type === "end_response") {
|
||||
// Append any references after all the data has been streamed
|
||||
if (codeContext) currentMessage.codeContext = codeContext;
|
||||
if (onlineContext) currentMessage.onlineContext = onlineContext;
|
||||
|
||||
Reference in New Issue
Block a user