From eb5bc6d9ebb657ebefcca973026ec799c90cc1a1 Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Thu, 14 Nov 2024 15:49:41 -0800
Subject: [PATCH] Remove Talc search bench from Khoj eval script

---
 tests/eval_frames.py | 36 +-----------------------------------
 1 file changed, 1 insertion(+), 35 deletions(-)

diff --git a/tests/eval_frames.py b/tests/eval_frames.py
index 9998d4c9..7788322d 100644
--- a/tests/eval_frames.py
+++ b/tests/eval_frames.py
@@ -48,38 +48,6 @@ def load_frames_dataset():
         return None
 
 
-def load_talc_dataset():
-    """
-    Load the TALC dataset from Github.
-
-    Normalize it into the FRAMES benchmark structure and the HuggingFace Dataset format.
-    """
-    try:
-        # Load TALC search benchmark from Github
-        raw_url = "https://raw.githubusercontent.com/Talc-AI/search-bench/3fd5b0858e2effa4c1578c7d046bee0a3895c488/data/searchbench_08_30_2024.jsonl"
-        response = requests.get(raw_url)
-        response.raise_for_status()
-
-        # Parse benchmark from raw JSONL response
-        jsonl_data = [json.loads(line) for line in response.text.splitlines()]
-
-        # Rename keys to match FRAMES format
-        formatted_data = [
-            {"Prompt": d["question"], "Answer": d["expected_answer"], "reasoning_types": "talc"} for d in jsonl_data
-        ]
-
-        # Convert benchmark to HF Dataset
-        dataset = Dataset.from_list(formatted_data)
-        dataset = dataset.shuffle() if RANDOMIZE else dataset
-        dataset = dataset.select(range(int(SAMPLE_SIZE))) if SAMPLE_SIZE else dataset
-
-        return dataset
-
-    except Exception as e:
-        logger.error(f"Error loading dataset: {e}")
-        return None
-
-
 def get_agent_response(prompt: str) -> str:
     """Get response from the Khoj API"""
     try:
@@ -208,7 +176,7 @@ def parse_args():
         "--dataset",
         "-d",
         default="frames",
-        choices=["frames", "talc"],
+        choices=["frames"],
         help="Dataset to use for evaluation (default: frames)",
     )
     return parser.parse_args()
@@ -223,8 +191,6 @@ def main():
     with timer(f"Loaded {args.dataset} dataset in", logger):
         if args.dataset == "frames":
             dataset = load_frames_dataset()
-        elif args.dataset == "talc":
-            dataset = load_talc_dataset()
     if dataset is None:
         return