Format server code with ruff recommendations

This commit is contained in:
Debanjum
2025-08-01 00:10:34 -07:00
parent 4a3ed9e5a4
commit c8e07e86e4
65 changed files with 407 additions and 370 deletions

View File

@@ -326,7 +326,7 @@ File statistics:
- Code examples: Yes
- Purpose: Stress testing atomic agent updates
{'Additional padding content. ' * 20}
{"Additional padding content. " * 20}
End of file {i}.
"""

View File

@@ -462,7 +462,7 @@ def evaluate_response_with_gemini(
Ground Truth: {ground_truth}
Provide your evaluation in the following json format:
{"explanation:" "[How you made the decision?)", "decision:" "(TRUE if response contains key information, FALSE otherwise)"}
{"explanation:[How you made the decision?)", "decision:(TRUE if response contains key information, FALSE otherwise)"}
"""
gemini_api_url = (
f"https://generativelanguage.googleapis.com/v1beta/models/{eval_model}:generateContent?key={GEMINI_API_KEY}"
@@ -557,7 +557,7 @@ def process_batch(batch, batch_start, results, dataset_length, response_evaluato
---------
Decision: {colored_decision}
Accuracy: {running_accuracy:.2%}
Progress: {running_total_count.get()/dataset_length:.2%}
Progress: {running_total_count.get() / dataset_length:.2%}
Index: {current_index}
Question: {prompt}
Expected Answer: {answer}

View File

@@ -20,7 +20,7 @@ def test_create_default_agent(default_user: KhojUser):
assert agent.input_tools == []
assert agent.output_modes == []
assert agent.privacy_level == Agent.PrivacyLevel.PUBLIC
assert agent.managed_by_admin == True
assert agent.managed_by_admin
@pytest.mark.anyio
@@ -178,7 +178,7 @@ async def test_multiple_agents_with_knowledge_base_and_users(
default_user2: KhojUser, default_openai_chat_model_option: ChatModel, chat_client, default_user3: KhojUser
):
full_filename = get_absolute_path("tests/data/markdown/having_kids.markdown")
new_agent = await AgentAdapters.aupdate_agent(
await AgentAdapters.aupdate_agent(
default_user2,
"Test Agent",
"Test Personality",
@@ -290,17 +290,17 @@ async def test_large_knowledge_base_atomic_update(
assert len(final_entries) > initial_entries_count, "Should have more entries after update"
# With 180 files, we should have many entries (each file creates multiple entries)
assert (
len(final_entries) >= expected_file_count
), f"Expected at least {expected_file_count} entries, got {len(final_entries)}"
assert len(final_entries) >= expected_file_count, (
f"Expected at least {expected_file_count} entries, got {len(final_entries)}"
)
# Verify no partial state - all entries should correspond to the final file set
entry_file_paths = {entry.file_path for entry in final_entries}
# All file objects should have corresponding entries
assert file_paths_in_db.issubset(
entry_file_paths
), "All file objects should have corresponding entries - atomic update verification"
assert file_paths_in_db.issubset(entry_file_paths), (
"All file objects should have corresponding entries - atomic update verification"
)
# Additional stress test: verify referential integrity
# Count entries per file to ensure no partial file processing
@@ -333,7 +333,7 @@ async def test_concurrent_agent_updates_atomicity(
test_files = available_files # Use all available files for the stress test
# Create initial agent
agent = await AgentAdapters.aupdate_agent(
await AgentAdapters.aupdate_agent(
default_user2,
"Concurrent Test Agent",
"Test concurrent updates",
@@ -391,14 +391,14 @@ async def test_concurrent_agent_updates_atomicity(
file_object_paths = {fo.file_name for fo in final_file_objects}
# All entries should have corresponding file objects
assert entry_file_paths.issubset(
file_object_paths
), "All entries should have corresponding file objects - indicates atomic update worked"
assert entry_file_paths.issubset(file_object_paths), (
"All entries should have corresponding file objects - indicates atomic update worked"
)
except Exception as e:
# If we get database integrity errors, that's actually expected behavior
# with proper atomic transactions - they should fail cleanly rather than
# allowing partial updates
assert (
"database" in str(e).lower() or "integrity" in str(e).lower()
), f"Expected database/integrity error with concurrent updates, got: {e}"
assert "database" in str(e).lower() or "integrity" in str(e).lower(), (
f"Expected database/integrity error with concurrent updates, got: {e}"
)

View File

@@ -5,7 +5,6 @@ from urllib.parse import quote
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from PIL import Image
from khoj.configure import configure_routes, configure_search_types
from khoj.database.adapters import EntryAdapters
@@ -101,7 +100,7 @@ def test_update_with_invalid_content_type(client):
headers = {"Authorization": "Bearer kk-secret"}
# Act
response = client.get(f"/api/update?t=invalid_content_type", headers=headers)
response = client.get("/api/update?t=invalid_content_type", headers=headers)
# Assert
assert response.status_code == 422
@@ -114,7 +113,7 @@ def test_regenerate_with_invalid_content_type(client):
headers = {"Authorization": "Bearer kk-secret"}
# Act
response = client.get(f"/api/update?force=true&t=invalid_content_type", headers=headers)
response = client.get("/api/update?force=true&t=invalid_content_type", headers=headers)
# Assert
assert response.status_code == 422
@@ -238,13 +237,13 @@ def test_regenerate_with_valid_content_type(client):
def test_regenerate_with_github_fails_without_pat(client):
# Act
headers = {"Authorization": "Bearer kk-secret"}
response = client.get(f"/api/update?force=true&t=github", headers=headers)
response = client.get("/api/update?force=true&t=github", headers=headers)
# Arrange
files = get_sample_files_data()
# Act
response = client.patch(f"/api/content?t=github", files=files, headers=headers)
response = client.patch("/api/content?t=github", files=files, headers=headers)
# Assert
assert response.status_code == 200, f"Returned status: {response.status_code} for content type: github"
@@ -270,7 +269,7 @@ def test_get_api_config_types(client, sample_org_data, default_user: KhojUser):
text_search.setup(OrgToEntries, sample_org_data, regenerate=False, user=default_user)
# Act
response = client.get(f"/api/content/types", headers=headers)
response = client.get("/api/content/types", headers=headers)
# Assert
assert response.status_code == 200
@@ -286,7 +285,7 @@ def test_get_configured_types_with_no_content_config(fastapi_app: FastAPI):
client = TestClient(fastapi_app)
# Act
response = client.get(f"/api/content/types")
response = client.get("/api/content/types")
# Assert
assert response.status_code == 200
@@ -454,8 +453,8 @@ def test_chat_with_unauthenticated_user(chat_client_with_auth, api_user2: KhojAp
headers = {"Authorization": f"Bearer {api_user2.token}"}
# Act
auth_response = chat_client_with_auth.post(f"/api/chat", json={"q": query}, headers=headers)
no_auth_response = chat_client_with_auth.post(f"/api/chat", json={"q": query})
auth_response = chat_client_with_auth.post("/api/chat", json={"q": query}, headers=headers)
no_auth_response = chat_client_with_auth.post("/api/chat", json={"q": query})
# Assert
assert auth_response.status_code == 200

View File

@@ -77,12 +77,12 @@ class TestTruncateMessage:
# Assert
# The original object has been modified. Verify certain properties
assert (
len(chat_history) == 1
), "Only most recent message should be present as it itself is larger than context size"
assert len(truncated_chat_history[0].content) < len(
copy_big_chat_message.content
), "message content list should be modified"
assert len(chat_history) == 1, (
"Only most recent message should be present as it itself is larger than context size"
)
assert len(truncated_chat_history[0].content) < len(copy_big_chat_message.content), (
"message content list should be modified"
)
assert truncated_chat_history[0].content[-1]["text"] == "Question?", "Query should be preserved"
assert initial_tokens > self.max_prompt_size, "Initial tokens should be greater than max prompt size"
assert final_tokens <= self.max_prompt_size, "Truncated message should be within max prompt size"
@@ -101,9 +101,9 @@ class TestTruncateMessage:
# Assert
# The original object has been modified. Verify certain properties
assert (
len(chat_history) == 1
), "Only most recent message should be present as it itself is larger than context size"
assert len(chat_history) == 1, (
"Only most recent message should be present as it itself is larger than context size"
)
assert truncated_chat_history[0] != copy_big_chat_message, "Original message should be modified"
assert truncated_chat_history[0].content[0]["text"].endswith("\nQuestion?"), "Query should be preserved"
assert initial_tokens > self.max_prompt_size, "Initial tokens should be greater than max prompt size"
@@ -150,9 +150,9 @@ class TestTruncateMessage:
# The original object has been modified. Verify certain properties
assert initial_tokens > self.max_prompt_size, "Initial tokens should be greater than max prompt size"
assert final_tokens <= self.max_prompt_size, "Final tokens should be within max prompt size"
assert (
len(chat_messages) == 1
), "Only most recent message should be present as it itself is larger than context size"
assert len(chat_messages) == 1, (
"Only most recent message should be present as it itself is larger than context size"
)
assert truncated_chat_history[0] != copy_big_chat_message, "Original message should be modified"
assert truncated_chat_history[0].content[0]["text"].endswith("\nQuestion?"), "Query should be preserved"
@@ -172,9 +172,9 @@ class TestTruncateMessage:
# The original object has been modified. Verify certain properties
assert initial_tokens > self.max_prompt_size, "Initial tokens should be greater than max prompt size"
assert final_tokens <= self.max_prompt_size, "Final tokens should be within max prompt size"
assert (
len(chat_messages) == 1
), "Only most recent message should be present as it itself is larger than context size"
assert len(chat_messages) == 1, (
"Only most recent message should be present as it itself is larger than context size"
)
assert truncated_chat_history[0] != copy_big_chat_message, "Original message should be modified"

View File

@@ -162,15 +162,15 @@ def test_date_extraction():
assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected d.m.Y structured date to be extracted"
extracted_dates = DateFilter().extract_dates("CLOCK: [1984-04-01 Sun 09:50]--[1984-04-01 Sun 10:10] => 24:20")
assert extracted_dates == [
datetime(1984, 4, 1, 0, 0, 0)
], "Expected single deduplicated date extracted from logbook entry"
assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], (
"Expected single deduplicated date extracted from logbook entry"
)
extracted_dates = DateFilter().extract_dates("CLOCK: [1984/03/31 mer 09:50]--[1984/04/01 mer 10:10] => 24:20")
expected_dates = [datetime(1984, 4, 1, 0, 0, 0), datetime(1984, 3, 31, 0, 0, 0)]
assert all(
[dt in extracted_dates for dt in expected_dates]
), "Expected multiple different dates extracted from logbook entry"
assert all([dt in extracted_dates for dt in expected_dates]), (
"Expected multiple different dates extracted from logbook entry"
)
def test_natual_date_extraction():
@@ -187,9 +187,9 @@ def test_natual_date_extraction():
assert datetime(1984, 4, 4, 0, 0, 0) in extracted_dates, "Expected natural date to be extracted"
extracted_dates = DateFilter().extract_dates("head 11th april 1984 tail")
assert (
datetime(1984, 4, 11, 0, 0, 0) in extracted_dates
), "Expected natural date with lowercase month to be extracted"
assert datetime(1984, 4, 11, 0, 0, 0) in extracted_dates, (
"Expected natural date with lowercase month to be extracted"
)
extracted_dates = DateFilter().extract_dates("head 23rd april 84 tail")
assert datetime(1984, 4, 23, 0, 0, 0) in extracted_dates, "Expected natural date with 2-digit year to be extracted"
@@ -201,16 +201,16 @@ def test_natual_date_extraction():
assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], "Expected partial natural date to be extracted"
extracted_dates = DateFilter().extract_dates("head Apr 1984 tail")
assert extracted_dates == [
datetime(1984, 4, 1, 0, 0, 0)
], "Expected partial natural date with short month to be extracted"
assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], (
"Expected partial natural date with short month to be extracted"
)
extracted_dates = DateFilter().extract_dates("head apr 1984 tail")
assert extracted_dates == [
datetime(1984, 4, 1, 0, 0, 0)
], "Expected partial natural date with lowercase month to be extracted"
assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], (
"Expected partial natural date with lowercase month to be extracted"
)
extracted_dates = DateFilter().extract_dates("head apr 84 tail")
assert extracted_dates == [
datetime(1984, 4, 1, 0, 0, 0)
], "Expected partial natural date with 2-digit year to be extracted"
assert extracted_dates == [datetime(1984, 4, 1, 0, 0, 0)], (
"Expected partial natural date with 2-digit year to be extracted"
)

View File

@@ -1,5 +1,3 @@
import os
from khoj.processor.content.images.image_to_entries import ImageToEntries

View File

@@ -8,7 +8,7 @@ from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntrie
def test_extract_markdown_with_no_headings(tmp_path):
"Convert markdown file with no heading to entry format."
# Arrange
entry = f"""
entry = """
- Bullet point 1
- Bullet point 2
"""
@@ -35,7 +35,7 @@ def test_extract_markdown_with_no_headings(tmp_path):
def test_extract_single_markdown_entry(tmp_path):
"Convert markdown from single file to entry format."
# Arrange
entry = f"""### Heading
entry = """### Heading
\t\r
Body Line 1
"""
@@ -55,7 +55,7 @@ def test_extract_single_markdown_entry(tmp_path):
def test_extract_multiple_markdown_entries(tmp_path):
"Convert multiple markdown from single file to entry format."
# Arrange
entry = f"""
entry = """
### Heading 1
\t\r
Heading 1 Body Line 1
@@ -81,7 +81,7 @@ def test_extract_multiple_markdown_entries(tmp_path):
def test_extract_entries_with_different_level_headings(tmp_path):
"Extract markdown entries with different level headings."
# Arrange
entry = f"""
entry = """
# Heading 1
## Sub-Heading 1.1
# Heading 2
@@ -104,7 +104,7 @@ def test_extract_entries_with_different_level_headings(tmp_path):
def test_extract_entries_with_non_incremental_heading_levels(tmp_path):
"Extract markdown entries when deeper child level before shallower child level."
# Arrange
entry = f"""
entry = """
# Heading 1
#### Sub-Heading 1.1
## Sub-Heading 1.2
@@ -129,7 +129,7 @@ def test_extract_entries_with_non_incremental_heading_levels(tmp_path):
def test_extract_entries_with_text_before_headings(tmp_path):
"Extract markdown entries with some text before any headings."
# Arrange
entry = f"""
entry = """
Text before headings
# Heading 1
body line 1
@@ -149,15 +149,15 @@ body line 2
assert len(entries[1]) == 3
assert entries[1][0].raw == "\nText before headings"
assert entries[1][1].raw == "# Heading 1\nbody line 1"
assert (
entries[1][2].raw == "# Heading 1\n## Heading 2\nbody line 2\n"
), "Ensure raw entry includes heading ancestory"
assert entries[1][2].raw == "# Heading 1\n## Heading 2\nbody line 2\n", (
"Ensure raw entry includes heading ancestory"
)
def test_parse_markdown_file_into_single_entry_if_small(tmp_path):
"Parse markdown file into single entry if it fits within the token limits."
# Arrange
entry = f"""
entry = """
# Heading 1
body line 1
## Subheading 1.1
@@ -180,7 +180,7 @@ body line 1.1
def test_parse_markdown_entry_with_children_as_single_entry_if_small(tmp_path):
"Parse markdown entry with child headings as single entry if it fits within the tokens limits."
# Arrange
entry = f"""
entry = """
# Heading 1
body line 1
## Subheading 1.1
@@ -201,13 +201,13 @@ longer body line 2.1
# Assert
assert len(entries) == 2
assert len(entries[1]) == 3
assert (
entries[1][0].raw == "# Heading 1\nbody line 1\n## Subheading 1.1\nbody line 1.1"
), "First entry includes children headings"
assert entries[1][0].raw == "# Heading 1\nbody line 1\n## Subheading 1.1\nbody line 1.1", (
"First entry includes children headings"
)
assert entries[1][1].raw == "# Heading 2\nbody line 2", "Second entry does not include children headings"
assert (
entries[1][2].raw == "# Heading 2\n## Subheading 2.1\nlonger body line 2.1\n"
), "Third entry is second entries child heading"
assert entries[1][2].raw == "# Heading 2\n## Subheading 2.1\nlonger body line 2.1\n", (
"Third entry is second entries child heading"
)
def test_line_number_tracking_in_recursive_split():
@@ -252,14 +252,16 @@ def test_line_number_tracking_in_recursive_split():
assert entry.uri is not None, f"Entry '{entry}' has a None URI."
assert match is not None, f"URI format is incorrect: {entry.uri}"
assert (
filepath_from_uri == markdown_file_path
), f"File path in URI '{filepath_from_uri}' does not match expected '{markdown_file_path}'"
assert filepath_from_uri == markdown_file_path, (
f"File path in URI '{filepath_from_uri}' does not match expected '{markdown_file_path}'"
)
# Ensure the first non-heading line in the compiled entry matches the line in the file
assert (
cleaned_first_entry_line in line_in_file.strip() or cleaned_first_entry_line in next_line_in_file.strip()
), f"First non-heading line '{cleaned_first_entry_line}' in {entry.raw} does not match line {line_number_from_uri} in file: '{line_in_file}' or next line '{next_line_in_file}'"
), (
f"First non-heading line '{cleaned_first_entry_line}' in {entry.raw} does not match line {line_number_from_uri} in file: '{line_in_file}' or next line '{next_line_in_file}'"
)
# Helper Functions

View File

@@ -343,12 +343,12 @@ Expenses:Food:Dining 10.00 USD""",
"file": "Ledger.org",
},
{
"compiled": f"""2020-04-01 "SuperMercado" "Bananas"
"compiled": """2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD""",
"file": "Ledger.org",
},
{
"compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
"compiled": """2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD""",
"file": "Ledger.org",
},
@@ -389,12 +389,12 @@ Expenses:Food:Dining 10.00 USD""",
"file": "Ledger.md",
},
{
"compiled": f"""2020-04-01 "SuperMercado" "Bananas"
"compiled": """2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD""",
"file": "Ledger.md",
},
{
"compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
"compiled": """2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD""",
"file": "Ledger.md",
},
@@ -452,17 +452,17 @@ async def test_ask_for_clarification_if_not_enough_context_in_question():
# Arrange
context = [
{
"compiled": f"""# Ramya
"compiled": """# Ramya
My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""",
"file": "Family.md",
},
{
"compiled": f"""# Fang
"compiled": """# Fang
My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""",
"file": "Family.md",
},
{
"compiled": f"""# Aiyla
"compiled": """# Aiyla
My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""",
"file": "Family.md",
},
@@ -497,9 +497,9 @@ async def test_agent_prompt_should_be_used(openai_agent):
"Chat actor should ask be tuned to think like an accountant based on the agent definition"
# Arrange
context = [
{"compiled": f"""I went to the store and bought some bananas for 2.20""", "file": "Ledger.md"},
{"compiled": f"""I went to the store and bought some apples for 1.30""", "file": "Ledger.md"},
{"compiled": f"""I went to the store and bought some oranges for 6.00""", "file": "Ledger.md"},
{"compiled": """I went to the store and bought some bananas for 2.20""", "file": "Ledger.md"},
{"compiled": """I went to the store and bought some apples for 1.30""", "file": "Ledger.md"},
{"compiled": """I went to the store and bought some oranges for 6.00""", "file": "Ledger.md"},
]
expected_responses = ["9.50", "9.5"]
@@ -539,13 +539,13 @@ async def test_websearch_with_operators(chat_client, default_user2):
responses = await generate_online_subqueries(user_query, [], None, default_user2)
# Assert
assert any(
["reddit.com/r/worldnews" in response for response in responses]
), "Expected a search query to include site:reddit.com but got: " + str(responses)
assert any(["reddit.com/r/worldnews" in response for response in responses]), (
"Expected a search query to include site:reddit.com but got: " + str(responses)
)
assert any(
["site:reddit.com" in response for response in responses]
), "Expected a search query to include site:reddit.com but got: " + str(responses)
assert any(["site:reddit.com" in response for response in responses]), (
"Expected a search query to include site:reddit.com but got: " + str(responses)
)
# ----------------------------------------------------------------------------------------------------
@@ -559,9 +559,9 @@ async def test_websearch_khoj_website_for_info_about_khoj(chat_client, default_u
responses = await generate_online_subqueries(user_query, [], None, default_user2)
# Assert
assert any(
["site:khoj.dev" in response for response in responses]
), "Expected search query to include site:khoj.dev but got: " + str(responses)
assert any(["site:khoj.dev" in response for response in responses]), (
"Expected search query to include site:khoj.dev but got: " + str(responses)
)
# ----------------------------------------------------------------------------------------------------
@@ -693,9 +693,9 @@ def test_infer_task_scheduling_request(
for expected_q in expected_qs:
assert expected_q in inferred_query, f"Expected fragment {expected_q} in query: {inferred_query}"
for unexpected_q in unexpected_qs:
assert (
unexpected_q not in inferred_query
), f"Did not expect fragment '{unexpected_q}' in query: '{inferred_query}'"
assert unexpected_q not in inferred_query, (
f"Did not expect fragment '{unexpected_q}' in query: '{inferred_query}'"
)
# ----------------------------------------------------------------------------------------------------

View File

@@ -33,7 +33,7 @@ def create_conversation(message_list, user, agent=None):
@pytest.mark.django_db(transaction=True)
def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
# Act
response = chat_client.post(f"/api/chat", json={"q": "Hello, my name is Testatron. Who are you?"})
response = chat_client.post("/api/chat", json={"q": "Hello, my name is Testatron. Who are you?"})
response_message = response.json()["response"]
# Assert
@@ -50,7 +50,7 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
def test_chat_with_online_content(chat_client):
# Act
q = "/online give me the link to paul graham's essay how to do great work"
response = chat_client.post(f"/api/chat?", json={"q": q})
response = chat_client.post("/api/chat?", json={"q": q})
response_message = response.json()["response"]
# Assert
@@ -59,9 +59,9 @@ def test_chat_with_online_content(chat_client):
"paulgraham.com/hwh.html",
]
assert response.status_code == 200
assert any(
[expected_response in response_message for expected_response in expected_responses]
), f"Expected links: {expected_responses}. Actual response: {response_message}"
assert any([expected_response in response_message for expected_response in expected_responses]), (
f"Expected links: {expected_responses}. Actual response: {response_message}"
)
# ----------------------------------------------------------------------------------------------------
@@ -70,15 +70,15 @@ def test_chat_with_online_content(chat_client):
def test_chat_with_online_webpage_content(chat_client):
# Act
q = "/online how many firefighters were involved in the great chicago fire and which year did it take place?"
response = chat_client.post(f"/api/chat", json={"q": q})
response = chat_client.post("/api/chat", json={"q": q})
response_message = response.json()["response"]
# Assert
expected_responses = ["185", "1871", "horse"]
assert response.status_code == 200
assert any(
[expected_response in response_message for expected_response in expected_responses]
), f"Expected links: {expected_responses}. Actual response: {response_message}"
assert any([expected_response in response_message for expected_response in expected_responses]), (
f"Expected links: {expected_responses}. Actual response: {response_message}"
)
# ----------------------------------------------------------------------------------------------------
@@ -93,7 +93,7 @@ def test_answer_from_chat_history(chat_client, default_user2: KhojUser):
create_conversation(message_list, default_user2)
# Act
response = chat_client.post(f"/api/chat", json={"q": "What is my name?"})
response = chat_client.post("/api/chat", json={"q": "What is my name?"})
response_message = response.content.decode("utf-8")
# Assert
@@ -120,7 +120,7 @@ def test_answer_from_currently_retrieved_content(chat_client, default_user2: Kho
create_conversation(message_list, default_user2)
# Act
response = chat_client.post(f"/api/chat", json={"q": "Where was Xi Li born?"})
response = chat_client.post("/api/chat", json={"q": "Where was Xi Li born?"})
response_message = response.json()["response"]
# Assert
@@ -144,7 +144,7 @@ def test_answer_from_chat_history_and_previously_retrieved_content(chat_client_n
create_conversation(message_list, default_user2)
# Act
response = chat_client_no_background.post(f"/api/chat", json={"q": "Where was I born?"})
response = chat_client_no_background.post("/api/chat", json={"q": "Where was I born?"})
response_message = response.json()["response"]
# Assert
@@ -167,7 +167,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(chat_client, d
create_conversation(message_list, default_user2)
# Act
response = chat_client.post(f"/api/chat", json={"q": "Where was I born?"})
response = chat_client.post("/api/chat", json={"q": "Where was I born?"})
response_message = response.json()["response"]
# Assert
@@ -192,7 +192,7 @@ def test_no_answer_in_chat_history_or_retrieved_content(chat_client, default_use
create_conversation(message_list, default_user2)
# Act
response = chat_client.post(f"/api/chat", json={"q": "Where was I born?"})
response = chat_client.post("/api/chat", json={"q": "Where was I born?"})
response_message = response.json()["response"]
# Assert
@@ -222,7 +222,7 @@ def test_answer_using_general_command(chat_client, default_user2: KhojUser):
create_conversation(message_list, default_user2)
# Act
response = chat_client.post(f"/api/chat", json={"q": query, "stream": True})
response = chat_client.post("/api/chat", json={"q": query, "stream": True})
response_message = response.content.decode("utf-8")
# Assert
@@ -240,7 +240,7 @@ def test_answer_from_retrieved_content_using_notes_command(chat_client, default_
create_conversation(message_list, default_user2)
# Act
response = chat_client.post(f"/api/chat", json={"q": query})
response = chat_client.post("/api/chat", json={"q": query})
response_message = response.json()["response"]
# Assert
@@ -258,7 +258,7 @@ def test_answer_not_known_using_notes_command(chat_client_no_background, default
create_conversation(message_list, default_user2)
# Act
response = chat_client_no_background.post(f"/api/chat", json={"q": query})
response = chat_client_no_background.post("/api/chat", json={"q": query})
response_message = response.json()["response"]
# Assert
@@ -291,7 +291,7 @@ def test_summarize_one_file(chat_client, default_user2: KhojUser):
json={"filename": summarization_file, "conversation_id": str(conversation.id)},
)
query = "/summarize"
response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response_message = response.json()["response"]
# Assert
assert response_message != ""
@@ -322,7 +322,7 @@ def test_summarize_extra_text(chat_client, default_user2: KhojUser):
json={"filename": summarization_file, "conversation_id": str(conversation.id)},
)
query = "/summarize tell me about Xiu"
response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response_message = response.json()["response"]
# Assert
assert response_message != ""
@@ -349,7 +349,7 @@ def test_summarize_multiple_files(chat_client, default_user2: KhojUser):
)
query = "/summarize"
response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response_message = response.json()["response"]
# Assert
@@ -365,7 +365,7 @@ def test_summarize_no_files(chat_client, default_user2: KhojUser):
# Act
query = "/summarize"
response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response_message = response.json()["response"]
# Assert
@@ -400,11 +400,11 @@ def test_summarize_different_conversation(chat_client, default_user2: KhojUser):
# Act
query = "/summarize"
response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation2.id)})
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation2.id)})
response_message_conv2 = response.json()["response"]
# now make sure that the file filter is still in conversation 1
response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation1.id)})
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation1.id)})
response_message_conv1 = response.json()["response"]
# Assert
@@ -430,7 +430,7 @@ def test_summarize_nonexistant_file(chat_client, default_user2: KhojUser):
json={"filename": "imaginary.markdown", "conversation_id": str(conversation.id)},
)
query = urllib.parse.quote("/summarize")
response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response_message = response.json()["response"]
# Assert
assert response_message == "No files selected for summarization. Please add files using the section on the left."
@@ -462,7 +462,7 @@ def test_summarize_diff_user_file(chat_client, default_user: KhojUser, pdf_confi
# Act
query = "/summarize"
response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response_message = response.json()["response"]
# Assert
@@ -477,7 +477,7 @@ def test_summarize_diff_user_file(chat_client, default_user: KhojUser, pdf_confi
def test_answer_requires_current_date_awareness(chat_client):
"Chat actor should be able to answer questions relative to current date using provided notes"
# Act
response = chat_client.post(f"/api/chat", json={"q": "Where did I have lunch today?", "stream": True})
response = chat_client.post("/api/chat", json={"q": "Where did I have lunch today?", "stream": True})
response_message = response.content.decode("utf-8")
# Assert
@@ -496,7 +496,7 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(chat_clien
"Chat director should be able to answer questions that require date aware aggregation across multiple notes"
# Act
query = "How much did I spend on dining this year?"
response = chat_client.post(f"/api/chat", json={"q": query})
response = chat_client.post("/api/chat", json={"q": query})
response_message = response.json()["response"]
# Assert
@@ -518,7 +518,7 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_c
# Act
query = "Write a haiku about unit testing. Do not say anything else."
response = chat_client.post(f"/api/chat", json={"q": query})
response = chat_client.post("/api/chat", json={"q": query})
response_message = response.json()["response"]
# Assert
@@ -536,7 +536,7 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_c
def test_ask_for_clarification_if_not_enough_context_in_question(chat_client_no_background):
# Act
query = "What is the name of Namitas older son?"
response = chat_client_no_background.post(f"/api/chat", json={"q": query})
response = chat_client_no_background.post("/api/chat", json={"q": query})
response_message = response.json()["response"].lower()
# Assert
@@ -571,7 +571,7 @@ def test_answer_in_chat_history_beyond_lookback_window(chat_client, default_user
# Act
query = "What is my name?"
response = chat_client.post(f"/api/chat", json={"q": query})
response = chat_client.post("/api/chat", json={"q": query})
response_message = response.json()["response"]
# Assert
@@ -604,9 +604,7 @@ def test_answer_in_chat_history_by_conversation_id(chat_client, default_user2: K
# Act
query = "/general What is my favorite color?"
response = chat_client.post(
f"/api/chat", json={"q": query, "conversation_id": str(conversation.id), "stream": True}
)
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation.id), "stream": True})
response_message = response.content.decode("utf-8")
# Assert
@@ -639,7 +637,7 @@ def test_answer_in_chat_history_by_conversation_id_with_agent(
# Act
query = "/general What did I buy for breakfast?"
response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response = chat_client.post("/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
response_message = response.json()["response"]
# Assert that agent only responds with the summary of spending
@@ -657,7 +655,7 @@ def test_answer_requires_multiple_independent_searches(chat_client):
"Chat director should be able to answer by doing multiple independent searches for required information"
# Act
query = "Is Xi Li older than Namita? Just say the older persons full name"
response = chat_client.post(f"/api/chat", json={"q": query})
response = chat_client.post("/api/chat", json={"q": query})
response_message = response.json()["response"].lower()
# Assert
@@ -681,7 +679,7 @@ def test_answer_using_file_filter(chat_client):
query = (
'Is Xi Li older than Namita? Just say the older persons full name. file:"Namita.markdown" file:"Xi Li.markdown"'
)
response = chat_client.post(f"/api/chat", json={"q": query})
response = chat_client.post("/api/chat", json={"q": query})
response_message = response.json()["response"].lower()
# Assert

View File

@@ -12,7 +12,7 @@ def test_configure_indexing_heading_only_entries(tmp_path):
"""Ensure entries with empty body are ignored, unless explicitly configured to index heading entries.
Property drawers not considered Body. Ignore control characters for evaluating if Body empty."""
# Arrange
entry = f"""*** Heading
entry = """*** Heading
:PROPERTIES:
:ID: 42-42-42
:END:
@@ -74,7 +74,7 @@ def test_entry_split_when_exceeds_max_tokens():
"Ensure entries with compiled words exceeding max_tokens are split."
# Arrange
tmp_path = "/tmp/test.org"
entry = f"""*** Heading
entry = """*** Heading
\t\r
Body Line
"""
@@ -99,7 +99,7 @@ def test_entry_split_when_exceeds_max_tokens():
def test_entry_split_drops_large_words():
"Ensure entries drops words larger than specified max word length from compiled version."
# Arrange
entry_text = f"""First Line
entry_text = """First Line
dog=1\n\r\t
cat=10
car=4
@@ -124,7 +124,7 @@ book=2
def test_parse_org_file_into_single_entry_if_small(tmp_path):
"Parse org file into single entry if it fits within the token limits."
# Arrange
original_entry = f"""
original_entry = """
* Heading 1
body line 1
** Subheading 1.1
@@ -133,7 +133,7 @@ body line 1.1
data = {
f"{tmp_path}": original_entry,
}
expected_entry = f"""
expected_entry = """
* Heading 1
body line 1
@@ -155,7 +155,7 @@ body line 1.1
def test_parse_org_entry_with_children_as_single_entry_if_small(tmp_path):
"Parse org entry with child headings as single entry only if it fits within the tokens limits."
# Arrange
entry = f"""
entry = """
* Heading 1
body line 1
** Subheading 1.1
@@ -205,7 +205,7 @@ longer body line 2.1
def test_separate_sibling_org_entries_if_all_cannot_fit_in_token_limit(tmp_path):
"Parse org sibling entries as separate entries only if it fits within the tokens limits."
# Arrange
entry = f"""
entry = """
* Heading 1
body line 1
** Subheading 1.1
@@ -267,7 +267,7 @@ body line 3.1
def test_entry_with_body_to_entry(tmp_path):
"Ensure entries with valid body text are loaded."
# Arrange
entry = f"""*** Heading
entry = """*** Heading
:PROPERTIES:
:ID: 42-42-42
:END:
@@ -290,7 +290,7 @@ def test_entry_with_body_to_entry(tmp_path):
def test_file_with_entry_after_intro_text_to_entry(tmp_path):
"Ensure intro text before any headings is indexed."
# Arrange
entry = f"""
entry = """
Intro text
* Entry Heading
@@ -312,7 +312,7 @@ Intro text
def test_file_with_no_headings_to_entry(tmp_path):
"Ensure files with no heading, only body text are loaded."
# Arrange
entry = f"""
entry = """
- Bullet point 1
- Bullet point 2
"""
@@ -332,7 +332,7 @@ def test_file_with_no_headings_to_entry(tmp_path):
def test_extract_entries_with_different_level_headings(tmp_path):
"Extract org entries with different level headings."
# Arrange
entry = f"""
entry = """
* Heading 1
** Sub-Heading 1.1
* Heading 2
@@ -396,14 +396,16 @@ def test_line_number_tracking_in_recursive_split():
assert entry.uri is not None, f"Entry '{entry}' has a None URI."
assert match is not None, f"URI format is incorrect: {entry.uri}"
assert (
filepath_from_uri == org_file_path
), f"File path in URI '{filepath_from_uri}' does not match expected '{org_file_path}'"
assert filepath_from_uri == org_file_path, (
f"File path in URI '{filepath_from_uri}' does not match expected '{org_file_path}'"
)
# Ensure the first non-heading line in the compiled entry matches the line in the file
assert (
cleaned_first_entry_line in line_in_file.strip() or cleaned_first_entry_line in next_line_in_file.strip()
), f"First non-heading line '{cleaned_first_entry_line}' in {entry.raw} does not match line {line_number_from_uri} in file: '{line_in_file}' or next line '{next_line_in_file}'"
), (
f"First non-heading line '{cleaned_first_entry_line}' in {entry.raw} does not match line {line_number_from_uri} in file: '{line_in_file}' or next line '{next_line_in_file}'"
)
# Helper Functions

View File

@@ -8,7 +8,7 @@ from khoj.processor.content.org_mode import orgnode
def test_parse_entry_with_no_headings(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""Body Line 1"""
entry = """Body Line 1"""
orgfile = create_file(tmp_path, entry)
# Act
@@ -30,7 +30,7 @@ def test_parse_entry_with_no_headings(tmp_path):
def test_parse_minimal_entry(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""
entry = """
* Heading
Body Line 1"""
orgfile = create_file(tmp_path, entry)
@@ -54,7 +54,7 @@ Body Line 1"""
def test_parse_complete_entry(tmp_path):
"Test parsing of entry with all important fields"
# Arrange
entry = f"""
entry = """
*** DONE [#A] Heading :Tag1:TAG2:tag3:
CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
:PROPERTIES:
@@ -89,7 +89,7 @@ Body Line 2"""
def test_render_entry_with_property_drawer_and_empty_body(tmp_path):
"Render heading entry with property drawer"
# Arrange
entry_to_render = f"""
entry_to_render = """
*** [#A] Heading1 :tag1:
:PROPERTIES:
:ID: 111-111-111-1111-1111
@@ -116,7 +116,7 @@ def test_render_entry_with_property_drawer_and_empty_body(tmp_path):
def test_all_links_to_entry_rendered(tmp_path):
"Ensure all links to entry rendered in property drawer from entry"
# Arrange
entry = f"""
entry = """
*** [#A] Heading :tag1:
:PROPERTIES:
:ID: 123-456-789-4234-1231
@@ -133,7 +133,7 @@ Body Line 2
# Assert
# SOURCE link rendered with Heading
# ID link rendered with ID
assert f":ID: id:123-456-789-4234-1231" in f"{entries[0]}"
assert ":ID: id:123-456-789-4234-1231" in f"{entries[0]}"
# LINE link rendered with line number
assert f":LINE: file://{orgfile}#line=2" in f"{entries[0]}"
# LINE link rendered with line number
@@ -144,7 +144,7 @@ Body Line 2
def test_parse_multiple_entries(tmp_path):
"Test parsing of multiple entries"
# Arrange
content = f"""
content = """
*** FAILED [#A] Heading1 :tag1:
CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
:PROPERTIES:
@@ -176,12 +176,12 @@ Body 2
# Assert
assert len(entries) == 2
for index, entry in enumerate(entries):
assert entry.heading == f"Heading{index+1}"
assert entry.heading == f"Heading{index + 1}"
assert entry.todo == "FAILED" if index == 0 else "CANCELLED"
assert entry.tags == [f"tag{index+1}"]
assert entry.body == f"- Clocked Log {index+1}\n\nBody {index+1}\n\n"
assert entry.tags == [f"tag{index + 1}"]
assert entry.body == f"- Clocked Log {index + 1}\n\nBody {index + 1}\n\n"
assert entry.priority == "A"
assert entry.Property("ID") == f"id:123-456-789-4234-000{index+1}"
assert entry.Property("ID") == f"id:123-456-789-4234-000{index + 1}"
assert entry.closed == datetime.date(1984, 4, index + 1)
assert entry.scheduled == datetime.date(1984, 4, index + 1)
assert entry.deadline == datetime.date(1984, 4, index + 1)
@@ -194,7 +194,7 @@ Body 2
def test_parse_entry_with_empty_title(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""#+TITLE:
entry = """#+TITLE:
Body Line 1"""
orgfile = create_file(tmp_path, entry)
@@ -217,7 +217,7 @@ Body Line 1"""
def test_parse_entry_with_title_and_no_headings(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""#+TITLE: test
entry = """#+TITLE: test
Body Line 1"""
orgfile = create_file(tmp_path, entry)
@@ -241,7 +241,7 @@ Body Line 1"""
def test_parse_entry_with_multiple_titles_and_no_headings(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""#+TITLE: title1
entry = """#+TITLE: title1
Body Line 1
#+TITLE: title2 """
orgfile = create_file(tmp_path, entry)
@@ -266,7 +266,7 @@ Body Line 1
def test_parse_org_with_intro_text_before_heading(tmp_path):
"Test parsing of org file with intro text before heading"
# Arrange
body = f"""#+TITLE: Title
body = """#+TITLE: Title
intro body
* Entry Heading
entry body
@@ -290,7 +290,7 @@ entry body
def test_parse_org_with_intro_text_multiple_titles_and_heading(tmp_path):
"Test parsing of org file with intro text, multiple titles and heading entry"
# Arrange
body = f"""#+TITLE: Title1
body = """#+TITLE: Title1
intro body
* Entry Heading
entry body
@@ -314,7 +314,7 @@ entry body
def test_parse_org_with_single_ancestor_heading(tmp_path):
"Parse org entries with parent headings context"
# Arrange
body = f"""
body = """
* Heading 1
body 1
** Sub Heading 1
@@ -336,7 +336,7 @@ body 1
def test_parse_org_with_multiple_ancestor_headings(tmp_path):
"Parse org entries with parent headings context"
# Arrange
body = f"""
body = """
* Heading 1
body 1
** Sub Heading 1
@@ -362,7 +362,7 @@ sub sub body 1
def test_parse_org_with_multiple_ancestor_headings_of_siblings(tmp_path):
"Parse org entries with parent headings context"
# Arrange
body = f"""
body = """
* Heading 1
body 1
** Sub Heading 1

View File

@@ -7,7 +7,7 @@ from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEnt
def test_plaintext_file():
"Convert files with no heading to jsonl."
# Arrange
raw_entry = f"""
raw_entry = """
Hi, I am a plaintext file and I have some plaintext words.
"""
plaintextfile = "test.txt"

View File

@@ -145,9 +145,9 @@ def test_entry_chunking_by_max_tokens(tmp_path, search_config, default_user: Kho
text_search.setup(OrgToEntries, data, regenerate=False, user=default_user)
# Assert
assert (
"Deleted 0 entries. Created 3 new entries for user " in caplog.records[-1].message
), "new entry not split by max tokens"
assert "Deleted 0 entries. Created 3 new entries for user " in caplog.records[-1].message, (
"new entry not split by max tokens"
)
# ----------------------------------------------------------------------------------------------------
@@ -198,9 +198,9 @@ conda activate khoj
)
# Assert
assert (
"Deleted 0 entries. Created 3 new entries for user " in caplog.records[-1].message
), "new entry not split by max tokens"
assert "Deleted 0 entries. Created 3 new entries for user " in caplog.records[-1].message, (
"new entry not split by max tokens"
)
# ----------------------------------------------------------------------------------------------------