From 4f3fdaf19d45d597cc5426bb68b0d65447564e84 Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Sun, 18 May 2025 14:42:27 -0700
Subject: [PATCH] Increase khoj api response timeout on evals call. Handle no
 decision

---
 tests/evals/eval.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/evals/eval.py b/tests/evals/eval.py
index 926f8b6c..b93d2667 100644
--- a/tests/evals/eval.py
+++ b/tests/evals/eval.py
@@ -46,7 +46,7 @@ BATCH_SIZE = int(
     os.getenv("BATCH_SIZE", int(SAMPLE_SIZE) / 10 if SAMPLE_SIZE else 10)
 )  # Examples to evaluate in each batch
 SLEEP_SECONDS = 3 if KHOJ_MODE == "general" else 1  # Sleep between API calls to avoid rate limiting
-KHOJ_API_TIMEOUT_SECONDS = 600  # Default to 10 minutes
+KHOJ_API_TIMEOUT_SECONDS = 1200  # Default to 20 minutes
 
 
 class Counter:
@@ -365,7 +365,7 @@ def get_agent_response(prompt: str) -> Dict[str, Any]:
             "references": response_json.get("references", {}),
         }
     except requests.exceptions.Timeout:
-        logger.error(f"Timeout error getting agent response for prompt: {prompt[:100]}...")
+        logger.error(f"Timeout error getting agent response for prompt: {prompt[:100]}...{prompt[-100:]}")
     except Exception as e:
         logger.error(f"Error getting agent response: {e}")
     return {"response": "", "usage": {}, "references": {}}
@@ -544,20 +544,21 @@ def process_batch(batch, batch_start, results, dataset_length, response_evaluato
         running_cost.add(query_cost + eval_cost)
 
         # Update running accuracy
-        running_accuracy = 0.0
         if decision is not None:
             running_true_count.add(decision)
             running_total_count.add(1)
-            running_accuracy = running_true_count.get() / running_total_count.get()
+        running_accuracy = running_true_count.get() / running_total_count.get()
 
         ## Log results
-        decision_color = {True: "green", None: "blue", False: "red"}[decision > 0.5]
+        key_for_color_map = None if decision is None else (decision > 0.5)
+        decision_color = {True: "green", None: "blue", False: "red"}[key_for_color_map]
         colored_decision = color_text(str(decision), decision_color)
         result_to_print = f"""
 ---------
 Decision: {colored_decision}
 Accuracy: {running_accuracy:.2%}
 Progress: {running_total_count.get()/dataset_length:.2%}
+Index: {current_index}
 Question: {prompt}
 Expected Answer: {answer}
 Agent Answer: {agent_response}