Track Usage Metrics in Chat API. Track Running Cost, Accuracy in Evals (#985)

- Track, return cost and usage metrics in chat api response Track input, output token usage and cost of interactions with openai, anthropic and google chat models for each call to the khoj chat api - Collect, display and store costs & accuracy of eval run currently in progress This provides more insight into eval runs during execution instead of having to wait until the eval run completes.
2026-03-05 21:29:11 +00:00 · 2024-11-20 12:59:44 -08:00
parent bbd24f1e98 ffbd0ae3a5
commit 6f1adcfe67
12 changed files with 230 additions and 67 deletions
--- a/src/interface/obsidian/src/chat_view.ts
+++ b/src/interface/obsidian/src/chat_view.ts
@@ -945,7 +945,7 @@ export class KhojChatView extends KhojPaneView {
            console.log("Started streaming", new Date());
        } else if (chunk.type === 'end_llm_response') {
            console.log("Stopped streaming", new Date());
-
+        } else if (chunk.type === 'end_response') {
            // Automatically respond with voice if the subscribed user has sent voice message
            if (this.chatMessageState.isVoice && this.setting.userInfo?.is_active)
                this.textToSpeech(this.chatMessageState.rawResponse);
--- a/src/interface/web/app/common/chatFunctions.ts
+++ b/src/interface/web/app/common/chatFunctions.ts
@@ -133,7 +133,7 @@ export function processMessageChunk(
        console.log(`Started streaming: ${new Date()}`);
    } else if (chunk.type === "end_llm_response") {
        console.log(`Completed streaming: ${new Date()}`);
-
+    } else if (chunk.type === "end_response") {
        // Append any references after all the data has been streamed
        if (codeContext) currentMessage.codeContext = codeContext;
        if (onlineContext) currentMessage.onlineContext = onlineContext;