Track, return cost and usage metrics in chat api response

- Track input, output token usage and cost for interactions via chat api with openai, anthropic and google chat models - Get usage metadata from OpenAI using stream_options - Handle openai proxies that do not support passing usage in response - Add new usage, end response events returned by chat api. - This can be optionally consumed by clients at a later point - Update streaming clients to mark message as completed after new end response event, not after end llm response event - Ensure usage data from final response generation step is included - Pass usage data after llm response complete. This allows gathering token usage and cost for the final response generation step across streaming and non-streaming modes
2026-03-03 13:19:16 +00:00 · 2024-11-18 18:23:05 -08:00
parent 7bdc9590dd
commit c53c3db96b
10 changed files with 139 additions and 38 deletions
--- a/src/interface/obsidian/src/chat_view.ts
+++ b/src/interface/obsidian/src/chat_view.ts
@@ -945,7 +945,7 @@ export class KhojChatView extends KhojPaneView {
            console.log("Started streaming", new Date());
        } else if (chunk.type === 'end_llm_response') {
            console.log("Stopped streaming", new Date());
-
+        } else if (chunk.type === 'end_response') {
            // Automatically respond with voice if the subscribed user has sent voice message
            if (this.chatMessageState.isVoice && this.setting.userInfo?.is_active)
                this.textToSpeech(this.chatMessageState.rawResponse);