From 7533e3eecfd45afdb7019ce81ade496024c64a3b Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Sun, 31 Aug 2025 12:37:34 -0700
Subject: [PATCH] Use prompt cache key to improve cache hits with openai
 responses api

Using prompt cache key enables sticky routing to openai servers.
This increases probability of a chat actor hitting same server and
reusing cached prompts.

We use stable hash of first N characters to uniquely identify a chat
actor prompt
---
 src/khoj/processor/conversation/openai/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py
index 623b8fe9..76c37422 100644
--- a/src/khoj/processor/conversation/openai/utils.py
+++ b/src/khoj/processor/conversation/openai/utils.py
@@ -1,3 +1,4 @@
+import hashlib
 import json
 import logging
 import os
@@ -457,6 +458,11 @@ def responses_completion_with_backoff(
 
     model_kwargs = deepcopy(model_kwargs)
     model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
+
+    # Use prompt cache key to increase probability of cache hits
+    if instructions:
+        model_kwargs["prompt_cache_key"] = f"{hashlib.md5(instructions[:500].encode()).hexdigest()}"
+
     # Configure thinking for openai reasoning models
     if is_openai_reasoning_model(model_name, api_base_url):
         temperature = 1