Merge branch 'master' of github.com:khoj-ai/khoj into features/include-full-file-in-convo-with-filter

This commit is contained in:
sabaimran
2024-11-10 19:24:11 -08:00
15 changed files with 222 additions and 989 deletions

View File

@@ -101,10 +101,10 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> Dic
return {"decision": "FALSE", "explanation": f"Evaluation failed: {str(e)}"}
def process_batch(batch, counter, results, dataset_length):
for prompt, answer, reasoning_type in batch:
counter += 1
logger.info(f"Processing example: {counter}/{dataset_length}")
def process_batch(batch, batch_start, results, dataset_length):
for idx, (prompt, answer, reasoning_type) in enumerate(batch):
current_index = batch_start + idx
logger.info(f"Processing example: {current_index}/{dataset_length}")
# Trigger research mode if enabled
prompt = f"/{KHOJ_MODE} {prompt}" if KHOJ_MODE else prompt
@@ -113,12 +113,16 @@ def process_batch(batch, counter, results, dataset_length):
agent_response = get_agent_response(prompt)
# Evaluate response
evaluation = evaluate_response(prompt, agent_response, answer)
if agent_response is None or agent_response.strip() == "":
evaluation["decision"] = False
evaluation["explanation"] = "Agent response is empty. This maybe due to a service error."
else:
evaluation = evaluate_response(prompt, agent_response, answer)
# Store results
results.append(
{
"index": counter,
"index": current_index,
"prompt": prompt,
"ground_truth": answer,
"agent_response": agent_response,
@@ -165,12 +169,13 @@ def main():
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for i in range(0, dataset_length, BATCH_SIZE):
batch_start = i
batch = zip(
dataset["Prompt"][i : i + BATCH_SIZE],
dataset["Answer"][i : i + BATCH_SIZE],
dataset["reasoning_types"][i : i + BATCH_SIZE],
)
futures.append(executor.submit(process_batch, batch, counter, results, dataset_length))
futures.append(executor.submit(process_batch, batch, batch_start, results, dataset_length))
# Wait for all futures to complete
concurrent.futures.wait(futures)