Use Black to format Khoj server code and tests

2026-03-02 21:19:12 +00:00 · 2023-02-17 10:04:26 -06:00
parent 6130fddf45
commit 5e83baab21
44 changed files with 1167 additions and 915 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,59 +6,67 @@ import pytest
 # Internal Packages
 from khoj.search_type import image_search, text_search
 from khoj.utils.helpers import resolve_absolute_path
-from khoj.utils.rawconfig import ContentConfig, TextContentConfig, ImageContentConfig, SearchConfig, TextSearchConfig, ImageSearchConfig
+from khoj.utils.rawconfig import (
+    ContentConfig,
+    TextContentConfig,
+    ImageContentConfig,
+    SearchConfig,
+    TextSearchConfig,
+    ImageSearchConfig,
+)
 from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
 from khoj.search_filter.date_filter import DateFilter
 from khoj.search_filter.word_filter import WordFilter
 from khoj.search_filter.file_filter import FileFilter


-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def search_config() -> SearchConfig:
-    model_dir = resolve_absolute_path('~/.khoj/search')
+    model_dir = resolve_absolute_path("~/.khoj/search")
    model_dir.mkdir(parents=True, exist_ok=True)
    search_config = SearchConfig()

    search_config.symmetric = TextSearchConfig(
-        encoder = "sentence-transformers/all-MiniLM-L6-v2",
-        cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2",
-        model_directory = model_dir / 'symmetric/'
+        encoder="sentence-transformers/all-MiniLM-L6-v2",
+        cross_encoder="cross-encoder/ms-marco-MiniLM-L-6-v2",
+        model_directory=model_dir / "symmetric/",
    )

    search_config.asymmetric = TextSearchConfig(
-        encoder = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
-        cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2",
-        model_directory = model_dir / 'asymmetric/'
+        encoder="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
+        cross_encoder="cross-encoder/ms-marco-MiniLM-L-6-v2",
+        model_directory=model_dir / "asymmetric/",
    )

    search_config.image = ImageSearchConfig(
-        encoder = "sentence-transformers/clip-ViT-B-32",
-        model_directory = model_dir / 'image/'
+        encoder="sentence-transformers/clip-ViT-B-32", model_directory=model_dir / "image/"
    )

    return search_config


-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def content_config(tmp_path_factory, search_config: SearchConfig):
-    content_dir = tmp_path_factory.mktemp('content')
+    content_dir = tmp_path_factory.mktemp("content")

    # Generate Image Embeddings from Test Images
    content_config = ContentConfig()
    content_config.image = ImageContentConfig(
-        input_directories = ['tests/data/images'],
-        embeddings_file = content_dir.joinpath('image_embeddings.pt'),
-        batch_size = 1,
-        use_xmp_metadata = False)
+        input_directories=["tests/data/images"],
+        embeddings_file=content_dir.joinpath("image_embeddings.pt"),
+        batch_size=1,
+        use_xmp_metadata=False,
+    )

    image_search.setup(content_config.image, search_config.image, regenerate=False)

    # Generate Notes Embeddings from Test Notes
    content_config.org = TextContentConfig(
-        input_files = None,
-        input_filter = ['tests/data/org/*.org'],
-        compressed_jsonl = content_dir.joinpath('notes.jsonl.gz'),
-        embeddings_file = content_dir.joinpath('note_embeddings.pt'))
+        input_files=None,
+        input_filter=["tests/data/org/*.org"],
+        compressed_jsonl=content_dir.joinpath("notes.jsonl.gz"),
+        embeddings_file=content_dir.joinpath("note_embeddings.pt"),
+    )

    filters = [DateFilter(), WordFilter(), FileFilter()]
    text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
@@ -66,7 +74,7 @@ def content_config(tmp_path_factory, search_config: SearchConfig):
    return content_config


-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
 def new_org_file(content_config: ContentConfig):
    # Setup
    new_org_file = Path(content_config.org.input_filter[0]).parent / "new_file.org"
@@ -79,9 +87,9 @@ def new_org_file(content_config: ContentConfig):
        new_org_file.unlink()


-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
 def org_config_with_only_new_file(content_config: ContentConfig, new_org_file: Path):
    new_org_config = deepcopy(content_config.org)
-    new_org_config.input_files = [f'{new_org_file}']
+    new_org_config.input_files = [f"{new_org_file}"]
    new_org_config.input_filter = None
-    return new_org_config
+    return new_org_config
--- a/tests/test_beancount_to_jsonl.py
+++ b/tests/test_beancount_to_jsonl.py
@@ -8,10 +8,10 @@ from khoj.processor.ledger.beancount_to_jsonl import BeancountToJsonl
 def test_no_transactions_in_file(tmp_path):
    "Handle file with no transactions."
    # Arrange
-    entry = f'''
+    entry = f"""
    - Bullet point 1
    - Bullet point 2
-    '''
+    """
    beancount_file = create_file(tmp_path, entry)

    # Act
@@ -20,7 +20,8 @@ def test_no_transactions_in_file(tmp_path):

    # Process Each Entry from All Beancount Files
    jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl(
-        BeancountToJsonl.convert_transactions_to_maps(entry_nodes, file_to_entries))
+        BeancountToJsonl.convert_transactions_to_maps(entry_nodes, file_to_entries)
+    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -30,11 +31,11 @@ def test_no_transactions_in_file(tmp_path):
 def test_single_beancount_transaction_to_jsonl(tmp_path):
    "Convert transaction from single file to jsonl."
    # Arrange
-    entry = f'''
+    entry = f"""
 1984-04-01 * "Payee" "Narration"
 Expenses:Test:Test  1.00 KES
 Assets:Test:Test  -1.00 KES
-    '''
+    """
    beancount_file = create_file(tmp_path, entry)

    # Act
@@ -43,7 +44,8 @@ Assets:Test:Test  -1.00 KES

    # Process Each Entry from All Beancount Files
    jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl(
-        BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map))
+        BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map)
+    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -53,7 +55,7 @@ Assets:Test:Test  -1.00 KES
 def test_multiple_transactions_to_jsonl(tmp_path):
    "Convert multiple transactions from single file to jsonl."
    # Arrange
-    entry = f'''
+    entry = f"""
 1984-04-01 * "Payee" "Narration"
 Expenses:Test:Test  1.00 KES
 Assets:Test:Test  -1.00 KES
@@ -61,7 +63,7 @@ Assets:Test:Test  -1.00 KES
 1984-04-01 * "Payee" "Narration"
 Expenses:Test:Test  1.00 KES
 Assets:Test:Test  -1.00 KES
-'''
+"""

    beancount_file = create_file(tmp_path, entry)

@@ -71,7 +73,8 @@ Assets:Test:Test  -1.00 KES

    # Process Each Entry from All Beancount Files
    jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl(
-        BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map))
+        BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map)
+    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -95,8 +98,8 @@ def test_get_beancount_files(tmp_path):
    expected_files = sorted(map(str, [group1_file1, group1_file2, group2_file1, group2_file2, file1]))

    # Setup input-files, input-filters
-    input_files = [tmp_path / 'ledger.bean']
-    input_filter = [tmp_path / 'group1*.bean', tmp_path / 'group2*.beancount']
+    input_files = [tmp_path / "ledger.bean"]
+    input_filter = [tmp_path / "group1*.bean", tmp_path / "group2*.beancount"]

    # Act
    extracted_org_files = BeancountToJsonl.get_beancount_files(input_files, input_filter)
--- a/tests/test_chatbot.py
+++ b/tests/test_chatbot.py
@@ -6,7 +6,7 @@ from khoj.processor.conversation.gpt import converse, understand, message_to_pro


 # Initialize variables for tests
-model = 'text-davinci-003'
+model = "text-davinci-003"
 api_key = None  # Input your OpenAI API key to run the tests below


@@ -14,19 +14,22 @@ api_key = None  # Input your OpenAI API key to run the tests below
 # ----------------------------------------------------------------------------------------------------
 def test_message_to_understand_prompt():
    # Arrange
-    understand_primer = "Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=[\"companion\", \"notes\", \"ledger\", \"image\", \"music\"]\nsearch(search-type, data);\nsearch-type=[\"google\", \"youtube\"]\ngenerate(activity);\nactivity=[\"paint\",\"write\", \"chat\"]\ntrigger-emotion(emotion);\nemotion=[\"happy\",\"confidence\",\"fear\",\"surprise\",\"sadness\",\"disgust\",\"anger\", \"curiosity\", \"calm\"]\n\nQ: How are you doing?\nA: activity(\"chat\"); trigger-emotion(\"surprise\")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember(\"notes\", \"Brother Antoine when we were at the beach\"); trigger-emotion(\"curiosity\");\nQ: what did we talk about last time?\nA: remember(\"notes\", \"talk last time\"); trigger-emotion(\"curiosity\");\nQ: Let's make some drawings!\nA: generate(\"paint\"); trigger-emotion(\"happy\");\nQ: Do you know anything about Lebanon?\nA: search(\"google\", \"lebanon\");  trigger-emotion(\"confidence\");\nQ: Find a video about a panda rolling in the grass\nA: search(\"youtube\",\"panda rolling in the grass\");  trigger-emotion(\"happy\"); \nQ: Tell me a scary story\nA: generate(\"write\" \"A story about some adventure\"); trigger-emotion(\"fear\");\nQ: What fiction book was I reading last week about AI starship?\nA: remember(\"notes\", \"read fiction book about AI starship last week\"); trigger-emotion(\"curiosity\");\nQ: How much did I spend at Subway for dinner last time?\nA: remember(\"ledger\", \"last Subway dinner\"); trigger-emotion(\"curiosity\");\nQ: I'm feeling sleepy\nA: activity(\"chat\"); trigger-emotion(\"calm\")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember(\"music\", \"popular Sri lankan song that Alex showed recently\");  trigger-emotion(\"curiosity\"); \nQ: You're pretty funny!\nA: activity(\"chat\"); trigger-emotion(\"pride\")"
-    expected_response = "Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=[\"companion\", \"notes\", \"ledger\", \"image\", \"music\"]\nsearch(search-type, data);\nsearch-type=[\"google\", \"youtube\"]\ngenerate(activity);\nactivity=[\"paint\",\"write\", \"chat\"]\ntrigger-emotion(emotion);\nemotion=[\"happy\",\"confidence\",\"fear\",\"surprise\",\"sadness\",\"disgust\",\"anger\", \"curiosity\", \"calm\"]\n\nQ: How are you doing?\nA: activity(\"chat\"); trigger-emotion(\"surprise\")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember(\"notes\", \"Brother Antoine when we were at the beach\"); trigger-emotion(\"curiosity\");\nQ: what did we talk about last time?\nA: remember(\"notes\", \"talk last time\"); trigger-emotion(\"curiosity\");\nQ: Let's make some drawings!\nA: generate(\"paint\"); trigger-emotion(\"happy\");\nQ: Do you know anything about Lebanon?\nA: search(\"google\", \"lebanon\");  trigger-emotion(\"confidence\");\nQ: Find a video about a panda rolling in the grass\nA: search(\"youtube\",\"panda rolling in the grass\");  trigger-emotion(\"happy\"); \nQ: Tell me a scary story\nA: generate(\"write\" \"A story about some adventure\"); trigger-emotion(\"fear\");\nQ: What fiction book was I reading last week about AI starship?\nA: remember(\"notes\", \"read fiction book about AI starship last week\"); trigger-emotion(\"curiosity\");\nQ: How much did I spend at Subway for dinner last time?\nA: remember(\"ledger\", \"last Subway dinner\"); trigger-emotion(\"curiosity\");\nQ: I'm feeling sleepy\nA: activity(\"chat\"); trigger-emotion(\"calm\")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember(\"music\", \"popular Sri lankan song that Alex showed recently\");  trigger-emotion(\"curiosity\"); \nQ: You're pretty funny!\nA: activity(\"chat\"); trigger-emotion(\"pride\")\nQ: When did I last dine at Burger King?\nA:"
+    understand_primer = 'Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=["companion", "notes", "ledger", "image", "music"]\nsearch(search-type, data);\nsearch-type=["google", "youtube"]\ngenerate(activity);\nactivity=["paint","write", "chat"]\ntrigger-emotion(emotion);\nemotion=["happy","confidence","fear","surprise","sadness","disgust","anger", "curiosity", "calm"]\n\nQ: How are you doing?\nA: activity("chat"); trigger-emotion("surprise")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember("notes", "Brother Antoine when we were at the beach"); trigger-emotion("curiosity");\nQ: what did we talk about last time?\nA: remember("notes", "talk last time"); trigger-emotion("curiosity");\nQ: Let\'s make some drawings!\nA: generate("paint"); trigger-emotion("happy");\nQ: Do you know anything about Lebanon?\nA: search("google", "lebanon");  trigger-emotion("confidence");\nQ: Find a video about a panda rolling in the grass\nA: search("youtube","panda rolling in the grass");  trigger-emotion("happy"); \nQ: Tell me a scary story\nA: generate("write" "A story about some adventure"); trigger-emotion("fear");\nQ: What fiction book was I reading last week about AI starship?\nA: remember("notes", "read fiction book about AI starship last week"); trigger-emotion("curiosity");\nQ: How much did I spend at Subway for dinner last time?\nA: remember("ledger", "last Subway dinner"); trigger-emotion("curiosity");\nQ: I\'m feeling sleepy\nA: activity("chat"); trigger-emotion("calm")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember("music", "popular Sri lankan song that Alex showed recently");  trigger-emotion("curiosity"); \nQ: You\'re pretty funny!\nA: activity("chat"); trigger-emotion("pride")'
+    expected_response = 'Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=["companion", "notes", "ledger", "image", "music"]\nsearch(search-type, data);\nsearch-type=["google", "youtube"]\ngenerate(activity);\nactivity=["paint","write", "chat"]\ntrigger-emotion(emotion);\nemotion=["happy","confidence","fear","surprise","sadness","disgust","anger", "curiosity", "calm"]\n\nQ: How are you doing?\nA: activity("chat"); trigger-emotion("surprise")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember("notes", "Brother Antoine when we were at the beach"); trigger-emotion("curiosity");\nQ: what did we talk about last time?\nA: remember("notes", "talk last time"); trigger-emotion("curiosity");\nQ: Let\'s make some drawings!\nA: generate("paint"); trigger-emotion("happy");\nQ: Do you know anything about Lebanon?\nA: search("google", "lebanon");  trigger-emotion("confidence");\nQ: Find a video about a panda rolling in the grass\nA: search("youtube","panda rolling in the grass");  trigger-emotion("happy"); \nQ: Tell me a scary story\nA: generate("write" "A story about some adventure"); trigger-emotion("fear");\nQ: What fiction book was I reading last week about AI starship?\nA: remember("notes", "read fiction book about AI starship last week"); trigger-emotion("curiosity");\nQ: How much did I spend at Subway for dinner last time?\nA: remember("ledger", "last Subway dinner"); trigger-emotion("curiosity");\nQ: I\'m feeling sleepy\nA: activity("chat"); trigger-emotion("calm")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember("music", "popular Sri lankan song that Alex showed recently");  trigger-emotion("curiosity"); \nQ: You\'re pretty funny!\nA: activity("chat"); trigger-emotion("pride")\nQ: When did I last dine at Burger King?\nA:'

    # Act
-    actual_response = message_to_prompt("When did I last dine at Burger King?", understand_primer, start_sequence="\nA:", restart_sequence="\nQ:")
+    actual_response = message_to_prompt(
+        "When did I last dine at Burger King?", understand_primer, start_sequence="\nA:", restart_sequence="\nQ:"
+    )

    # Assert
    assert actual_response == expected_response


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.skipif(api_key is None,
-                    reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys")
+@pytest.mark.skipif(
+    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
+)
 def test_minimal_chat_with_gpt():
    # Act
    response = converse("What will happen when the stars go out?", model=model, api_key=api_key)
@@ -36,21 +39,29 @@ def test_minimal_chat_with_gpt():


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.skipif(api_key is None,
-                    reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys")
+@pytest.mark.skipif(
+    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
+)
 def test_chat_with_history():
    # Arrange
-    ai_prompt="AI:"
-    human_prompt="Human:"
+    ai_prompt = "AI:"
+    human_prompt = "Human:"

-    conversation_primer = f'''
+    conversation_primer = f"""
 The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly companion.

 {human_prompt} Hello, I am Testatron. Who are you?
-{ai_prompt} Hi, I am Khoj, an AI conversational companion created by OpenAI. How can I help you today?'''
+{ai_prompt} Hi, I am Khoj, an AI conversational companion created by OpenAI. How can I help you today?"""

    # Act
-    response = converse("Hi Khoj, What is my name?", model=model, conversation_history=conversation_primer, api_key=api_key, temperature=0, max_tokens=50)
+    response = converse(
+        "Hi Khoj, What is my name?",
+        model=model,
+        conversation_history=conversation_primer,
+        api_key=api_key,
+        temperature=0,
+        max_tokens=50,
+    )

    # Assert
    assert len(response) > 0
@@ -58,12 +69,13 @@ The following is a conversation with an AI assistant. The assistant is helpful,


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.skipif(api_key is None,
-                    reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys")
+@pytest.mark.skipif(
+    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
+)
 def test_understand_message_using_gpt():
    # Act
    response = understand("When did I last dine at Subway?", model=model, api_key=api_key)

    # Assert
    assert len(response) > 0
-    assert response['intent']['memory-type'] == 'ledger'
+    assert response["intent"]["memory-type"] == "ledger"
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -14,35 +14,37 @@ def test_cli_minimal_default():
    actual_args = cli([])

    # Assert
-    assert actual_args.config_file == resolve_absolute_path(Path('~/.khoj/khoj.yml'))
+    assert actual_args.config_file == resolve_absolute_path(Path("~/.khoj/khoj.yml"))
    assert actual_args.regenerate == False
    assert actual_args.no_gui == False
    assert actual_args.verbose == 0

+
 # ----------------------------------------------------------------------------------------------------
 def test_cli_invalid_config_file_path():
    # Arrange
    non_existent_config_file = f"non-existent-khoj-{random()}.yml"

    # Act
-    actual_args = cli([f'-c={non_existent_config_file}'])
+    actual_args = cli([f"-c={non_existent_config_file}"])

    # Assert
    assert actual_args.config_file == resolve_absolute_path(non_existent_config_file)
    assert actual_args.config == None

+
 # ----------------------------------------------------------------------------------------------------
 def test_cli_config_from_file():
    # Act
-    actual_args = cli(['-c=tests/data/config.yml',
-                       '--regenerate',
-                       '--no-gui',
-                       '-vvv'])
+    actual_args = cli(["-c=tests/data/config.yml", "--regenerate", "--no-gui", "-vvv"])

    # Assert
-    assert actual_args.config_file == resolve_absolute_path(Path('tests/data/config.yml'))
+    assert actual_args.config_file == resolve_absolute_path(Path("tests/data/config.yml"))
    assert actual_args.no_gui == True
    assert actual_args.regenerate == True
    assert actual_args.config is not None
-    assert actual_args.config.content_type.org.input_files == [Path('~/first_from_config.org'), Path('~/second_from_config.org')]
+    assert actual_args.config.content_type.org.input_files == [
+        Path("~/first_from_config.org"),
+        Path("~/second_from_config.org"),
+    ]
    assert actual_args.verbose == 3
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -21,6 +21,7 @@ from khoj.search_filter.file_filter import FileFilter
 # ----------------------------------------------------------------------------------------------------
 client = TestClient(app)

+
 # Test
 # ----------------------------------------------------------------------------------------------------
 def test_search_with_invalid_content_type():
@@ -98,9 +99,11 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
    config.content_type = content_config
    config.search_type = search_config
    model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
-    query_expected_image_pairs = [("kitten", "kitten_park.jpg"),
-                                  ("a horse and dog on a leash", "horse_dog.jpg"),
-                                  ("A guinea pig eating grass", "guineapig_grass.jpg")]
+    query_expected_image_pairs = [
+        ("kitten", "kitten_park.jpg"),
+        ("a horse and dog on a leash", "horse_dog.jpg"),
+        ("A guinea pig eating grass", "guineapig_grass.jpg"),
+    ]

    for query, expected_image_name in query_expected_image_pairs:
        # Act
@@ -135,7 +138,9 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig
 def test_notes_search_with_only_filters(content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
    filters = [WordFilter(), FileFilter()]
-    model.orgmode_search = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
+    model.orgmode_search = text_search.setup(
+        OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters
+    )
    user_query = quote('+"Emacs" file:"*.org"')

    # Act
@@ -152,7 +157,9 @@ def test_notes_search_with_only_filters(content_config: ContentConfig, search_co
 def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
    filters = [WordFilter()]
-    model.orgmode_search = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
+    model.orgmode_search = text_search.setup(
+        OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters
+    )
    user_query = quote('How to git install application? +"Emacs"')

    # Act
@@ -169,7 +176,9 @@ def test_notes_search_with_include_filter(content_config: ContentConfig, search_
 def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig):
    # Arrange
    filters = [WordFilter()]
-    model.orgmode_search = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
+    model.orgmode_search = text_search.setup(
+        OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters
+    )
    user_query = quote('How to git install application? -"clone"')

    # Act
--- a/tests/test_date_filter.py
+++ b/tests/test_date_filter.py
@@ -10,53 +10,59 @@ from khoj.utils.rawconfig import Entry

 def test_date_filter():
    entries = [
-        Entry(compiled='', raw='Entry with no date'),
-        Entry(compiled='', raw='April Fools entry: 1984-04-01'),
-        Entry(compiled='', raw='Entry with date:1984-04-02')
+        Entry(compiled="", raw="Entry with no date"),
+        Entry(compiled="", raw="April Fools entry: 1984-04-01"),
+        Entry(compiled="", raw="Entry with date:1984-04-02"),
    ]

-    q_with_no_date_filter = 'head tail'
+    q_with_no_date_filter = "head tail"
    ret_query, entry_indices = DateFilter().apply(q_with_no_date_filter, entries)
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {0, 1, 2}

    q_with_dtrange_non_overlapping_at_boundary = 'head dt>"1984-04-01" dt<"1984-04-02" tail'
    ret_query, entry_indices = DateFilter().apply(q_with_dtrange_non_overlapping_at_boundary, entries)
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == set()

    query_with_overlapping_dtrange = 'head dt>"1984-04-01" dt<"1984-04-03" tail'
    ret_query, entry_indices = DateFilter().apply(query_with_overlapping_dtrange, entries)
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {2}

    query_with_overlapping_dtrange = 'head dt>="1984-04-01" dt<"1984-04-02" tail'
    ret_query, entry_indices = DateFilter().apply(query_with_overlapping_dtrange, entries)
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {1}

    query_with_overlapping_dtrange = 'head dt>"1984-04-01" dt<="1984-04-02" tail'
    ret_query, entry_indices = DateFilter().apply(query_with_overlapping_dtrange, entries)
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {2}

    query_with_overlapping_dtrange = 'head dt>="1984-04-01" dt<="1984-04-02" tail'
    ret_query, entry_indices = DateFilter().apply(query_with_overlapping_dtrange, entries)
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {1, 2}


 def test_extract_date_range():
-    assert DateFilter().extract_date_range('head dt>"1984-01-04" dt<"1984-01-07" tail') == [datetime(1984, 1, 5, 0, 0, 0).timestamp(), datetime(1984, 1, 7, 0, 0, 0).timestamp()]
+    assert DateFilter().extract_date_range('head dt>"1984-01-04" dt<"1984-01-07" tail') == [
+        datetime(1984, 1, 5, 0, 0, 0).timestamp(),
+        datetime(1984, 1, 7, 0, 0, 0).timestamp(),
+    ]
    assert DateFilter().extract_date_range('head dt<="1984-01-01"') == [0, datetime(1984, 1, 2, 0, 0, 0).timestamp()]
    assert DateFilter().extract_date_range('head dt>="1984-01-01"') == [datetime(1984, 1, 1, 0, 0, 0).timestamp(), inf]
-    assert DateFilter().extract_date_range('head dt:"1984-01-01"') == [datetime(1984, 1, 1, 0, 0, 0).timestamp(), datetime(1984, 1, 2, 0, 0, 0).timestamp()]
+    assert DateFilter().extract_date_range('head dt:"1984-01-01"') == [
+        datetime(1984, 1, 1, 0, 0, 0).timestamp(),
+        datetime(1984, 1, 2, 0, 0, 0).timestamp(),
+    ]

    # Unparseable date filter specified in query
    assert DateFilter().extract_date_range('head dt:"Summer of 69" tail') == None

    # No date filter specified in query
-    assert DateFilter().extract_date_range('head tail') == None
+    assert DateFilter().extract_date_range("head tail") == None

    # Non intersecting date ranges
    assert DateFilter().extract_date_range('head dt>"1984-01-01" dt<"1984-01-01" tail') == None
@@ -66,43 +72,79 @@ def test_parse():
    test_now = datetime(1984, 4, 1, 21, 21, 21)

    # day variations
-    assert DateFilter().parse('today', relative_base=test_now) == (datetime(1984, 4, 1, 0, 0, 0), datetime(1984, 4, 2, 0, 0, 0))
-    assert DateFilter().parse('tomorrow', relative_base=test_now) == (datetime(1984, 4, 2, 0, 0, 0), datetime(1984, 4, 3, 0, 0, 0))
-    assert DateFilter().parse('yesterday', relative_base=test_now) == (datetime(1984, 3, 31, 0, 0, 0), datetime(1984, 4, 1, 0, 0, 0))
-    assert DateFilter().parse('5 days ago', relative_base=test_now) == (datetime(1984, 3, 27, 0, 0, 0), datetime(1984, 3, 28, 0, 0, 0))
+    assert DateFilter().parse("today", relative_base=test_now) == (
+        datetime(1984, 4, 1, 0, 0, 0),
+        datetime(1984, 4, 2, 0, 0, 0),
+    )
+    assert DateFilter().parse("tomorrow", relative_base=test_now) == (
+        datetime(1984, 4, 2, 0, 0, 0),
+        datetime(1984, 4, 3, 0, 0, 0),
+    )
+    assert DateFilter().parse("yesterday", relative_base=test_now) == (
+        datetime(1984, 3, 31, 0, 0, 0),
+        datetime(1984, 4, 1, 0, 0, 0),
+    )
+    assert DateFilter().parse("5 days ago", relative_base=test_now) == (
+        datetime(1984, 3, 27, 0, 0, 0),
+        datetime(1984, 3, 28, 0, 0, 0),
+    )

    # week variations
-    assert DateFilter().parse('last week', relative_base=test_now) == (datetime(1984, 3, 18, 0, 0, 0), datetime(1984, 3, 25, 0, 0, 0))
-    assert DateFilter().parse('2 weeks ago', relative_base=test_now) == (datetime(1984, 3, 11, 0, 0, 0), datetime(1984, 3, 18, 0, 0, 0))
+    assert DateFilter().parse("last week", relative_base=test_now) == (
+        datetime(1984, 3, 18, 0, 0, 0),
+        datetime(1984, 3, 25, 0, 0, 0),
+    )
+    assert DateFilter().parse("2 weeks ago", relative_base=test_now) == (
+        datetime(1984, 3, 11, 0, 0, 0),
+        datetime(1984, 3, 18, 0, 0, 0),
+    )

    # month variations
-    assert DateFilter().parse('next month', relative_base=test_now) == (datetime(1984, 5, 1, 0, 0, 0), datetime(1984, 6, 1, 0, 0, 0))
-    assert DateFilter().parse('2 months ago', relative_base=test_now) == (datetime(1984, 2, 1, 0, 0, 0), datetime(1984, 3, 1, 0, 0, 0))
+    assert DateFilter().parse("next month", relative_base=test_now) == (
+        datetime(1984, 5, 1, 0, 0, 0),
+        datetime(1984, 6, 1, 0, 0, 0),
+    )
+    assert DateFilter().parse("2 months ago", relative_base=test_now) == (
+        datetime(1984, 2, 1, 0, 0, 0),
+        datetime(1984, 3, 1, 0, 0, 0),
+    )

    # year variations
-    assert DateFilter().parse('this year', relative_base=test_now) == (datetime(1984, 1, 1, 0, 0, 0), datetime(1985, 1, 1, 0, 0, 0))
-    assert DateFilter().parse('20 years later', relative_base=test_now) == (datetime(2004, 1, 1, 0, 0, 0), datetime(2005, 1, 1, 0, 0, 0))
+    assert DateFilter().parse("this year", relative_base=test_now) == (
+        datetime(1984, 1, 1, 0, 0, 0),
+        datetime(1985, 1, 1, 0, 0, 0),
+    )
+    assert DateFilter().parse("20 years later", relative_base=test_now) == (
+        datetime(2004, 1, 1, 0, 0, 0),
+        datetime(2005, 1, 1, 0, 0, 0),
+    )

    # specific month/date variation
-    assert DateFilter().parse('in august', relative_base=test_now) == (datetime(1983, 8, 1, 0, 0, 0), datetime(1983, 8, 2, 0, 0, 0))
-    assert DateFilter().parse('on 1983-08-01', relative_base=test_now) == (datetime(1983, 8, 1, 0, 0, 0), datetime(1983, 8, 2, 0, 0, 0))
+    assert DateFilter().parse("in august", relative_base=test_now) == (
+        datetime(1983, 8, 1, 0, 0, 0),
+        datetime(1983, 8, 2, 0, 0, 0),
+    )
+    assert DateFilter().parse("on 1983-08-01", relative_base=test_now) == (
+        datetime(1983, 8, 1, 0, 0, 0),
+        datetime(1983, 8, 2, 0, 0, 0),
+    )


 def test_date_filter_regex():
    dtrange_match = re.findall(DateFilter().date_regex, 'multi word head dt>"today" dt:"1984-01-01"')
-    assert dtrange_match == [('>', 'today'), (':', '1984-01-01')]
+    assert dtrange_match == [(">", "today"), (":", "1984-01-01")]

    dtrange_match = re.findall(DateFilter().date_regex, 'head dt>"today" dt:"1984-01-01" multi word tail')
-    assert dtrange_match == [('>', 'today'), (':', '1984-01-01')]
+    assert dtrange_match == [(">", "today"), (":", "1984-01-01")]

    dtrange_match = re.findall(DateFilter().date_regex, 'multi word head dt>="today" dt="1984-01-01"')
-    assert dtrange_match == [('>=', 'today'), ('=', '1984-01-01')]
+    assert dtrange_match == [(">=", "today"), ("=", "1984-01-01")]

    dtrange_match = re.findall(DateFilter().date_regex, 'dt<"multi word date" multi word tail')
-    assert dtrange_match == [('<', 'multi word date')]
+    assert dtrange_match == [("<", "multi word date")]

    dtrange_match = re.findall(DateFilter().date_regex, 'head dt<="multi word date"')
-    assert dtrange_match == [('<=', 'multi word date')]
+    assert dtrange_match == [("<=", "multi word date")]

-    dtrange_match = re.findall(DateFilter().date_regex, 'head tail')
-    assert dtrange_match == []
+    dtrange_match = re.findall(DateFilter().date_regex, "head tail")
+    assert dtrange_match == []
--- a/tests/test_file_filter.py
+++ b/tests/test_file_filter.py
@@ -7,7 +7,7 @@ def test_no_file_filter():
    # Arrange
    file_filter = FileFilter()
    entries = arrange_content()
-    q_with_no_filter = 'head tail'
+    q_with_no_filter = "head tail"

    # Act
    can_filter = file_filter.can_filter(q_with_no_filter)
@@ -15,7 +15,7 @@ def test_no_file_filter():

    # Assert
    assert can_filter == False
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {0, 1, 2, 3}


@@ -31,7 +31,7 @@ def test_file_filter_with_non_existent_file():

    # Assert
    assert can_filter == True
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {}


@@ -47,7 +47,7 @@ def test_single_file_filter():

    # Assert
    assert can_filter == True
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {0, 2}


@@ -63,7 +63,7 @@ def test_file_filter_with_partial_match():

    # Assert
    assert can_filter == True
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {0, 2}


@@ -79,7 +79,7 @@ def test_file_filter_with_regex_match():

    # Assert
    assert can_filter == True
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {0, 1, 2, 3}


@@ -95,16 +95,16 @@ def test_multiple_file_filter():

    # Assert
    assert can_filter == True
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {0, 1, 2, 3}


 def arrange_content():
    entries = [
-        Entry(compiled='', raw='First Entry', file= 'file 1.org'),
-        Entry(compiled='', raw='Second Entry', file= 'file2.org'),
-        Entry(compiled='', raw='Third Entry', file= 'file 1.org'),
-        Entry(compiled='', raw='Fourth Entry', file= 'file2.org')
+        Entry(compiled="", raw="First Entry", file="file 1.org"),
+        Entry(compiled="", raw="Second Entry", file="file2.org"),
+        Entry(compiled="", raw="Third Entry", file="file 1.org"),
+        Entry(compiled="", raw="Fourth Entry", file="file2.org"),
    ]

    return entries
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -1,5 +1,6 @@
 from khoj.utils import helpers

+
 def test_get_from_null_dict():
    # null handling
    assert helpers.get_from_dict(dict()) == dict()
@@ -7,39 +8,39 @@ def test_get_from_null_dict():

    # key present in nested dictionary
    # 1-level dictionary
-    assert helpers.get_from_dict({'a': 1, 'b': 2}, 'a') == 1
-    assert helpers.get_from_dict({'a': 1, 'b': 2}, 'c') == None
+    assert helpers.get_from_dict({"a": 1, "b": 2}, "a") == 1
+    assert helpers.get_from_dict({"a": 1, "b": 2}, "c") == None

    # 2-level dictionary
-    assert helpers.get_from_dict({'a': {'a_a': 1}, 'b': 2}, 'a') == {'a_a': 1}
-    assert helpers.get_from_dict({'a': {'a_a': 1}, 'b': 2}, 'a', 'a_a') == 1
+    assert helpers.get_from_dict({"a": {"a_a": 1}, "b": 2}, "a") == {"a_a": 1}
+    assert helpers.get_from_dict({"a": {"a_a": 1}, "b": 2}, "a", "a_a") == 1

    # key not present in nested dictionary
    # 2-level_dictionary
-    assert helpers.get_from_dict({'a': {'a_a': 1}, 'b': 2}, 'b', 'b_a') == None
+    assert helpers.get_from_dict({"a": {"a_a": 1}, "b": 2}, "b", "b_a") == None


 def test_merge_dicts():
    # basic merge of dicts with non-overlapping keys
-    assert helpers.merge_dicts(priority_dict={'a': 1}, default_dict={'b': 2}) == {'a': 1, 'b': 2}
+    assert helpers.merge_dicts(priority_dict={"a": 1}, default_dict={"b": 2}) == {"a": 1, "b": 2}

    # use default dict items when not present in priority dict
-    assert helpers.merge_dicts(priority_dict={}, default_dict={'b': 2}) == {'b': 2}
+    assert helpers.merge_dicts(priority_dict={}, default_dict={"b": 2}) == {"b": 2}

    # do not override existing key in priority_dict with default dict
-    assert helpers.merge_dicts(priority_dict={'a': 1}, default_dict={'a': 2}) == {'a': 1}
+    assert helpers.merge_dicts(priority_dict={"a": 1}, default_dict={"a": 2}) == {"a": 1}


 def test_lru_cache():
    # Test initializing cache
-    cache = helpers.LRU({'a': 1, 'b': 2}, capacity=2)
-    assert cache == {'a': 1, 'b': 2}
+    cache = helpers.LRU({"a": 1, "b": 2}, capacity=2)
+    assert cache == {"a": 1, "b": 2}

    # Test capacity overflow
-    cache['c'] = 3
-    assert cache == {'b': 2, 'c': 3}
+    cache["c"] = 3
+    assert cache == {"b": 2, "c": 3}

    # Test delete least recently used item from LRU cache on capacity overflow
-    cache['b']      # accessing 'b' makes it the most recently used item
-    cache['d'] = 4  # so 'c' is deleted from the cache instead of 'b'
-    assert cache == {'b': 2, 'd': 4}
+    cache["b"]  # accessing 'b' makes it the most recently used item
+    cache["d"] = 4  # so 'c' is deleted from the cache instead of 'b'
+    assert cache == {"b": 2, "d": 4}
--- a/tests/test_image_search.py
+++ b/tests/test_image_search.py
@@ -30,7 +30,8 @@ def test_image_metadata(content_config: ContentConfig):
    expected_metadata_image_name_pairs = [
        (["Billi Ka Bacha.", "Cat", "Grass"], "kitten_park.jpg"),
        (["Pasture.", "Horse", "Dog"], "horse_dog.jpg"),
-        (["Guinea Pig Eating Celery.", "Rodent", "Whiskers"], "guineapig_grass.jpg")]
+        (["Guinea Pig Eating Celery.", "Rodent", "Whiskers"], "guineapig_grass.jpg"),
+    ]

    test_image_paths = [
        Path(content_config.image.input_directories[0] / image_name[1])
@@ -51,23 +52,23 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
    # Arrange
    output_directory = resolve_absolute_path(web_directory)
    model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
-    query_expected_image_pairs = [("kitten", "kitten_park.jpg"),
-                                  ("horse and dog in a farm", "horse_dog.jpg"),
-                                  ("A guinea pig eating grass", "guineapig_grass.jpg")]
+    query_expected_image_pairs = [
+        ("kitten", "kitten_park.jpg"),
+        ("horse and dog in a farm", "horse_dog.jpg"),
+        ("A guinea pig eating grass", "guineapig_grass.jpg"),
+    ]

    # Act
    for query, expected_image_name in query_expected_image_pairs:
-        hits = image_search.query(
-            query,
-            count = 1,
-            model = model.image_search)
+        hits = image_search.query(query, count=1, model=model.image_search)

        results = image_search.collate_results(
            hits,
            model.image_search.image_names,
            output_directory=output_directory,
-            image_files_url='/static/images',
-            count=1)
+            image_files_url="/static/images",
+            count=1,
+        )

        actual_image_path = output_directory.joinpath(Path(results[0].entry).name)
        actual_image = Image.open(actual_image_path)
@@ -86,16 +87,13 @@ def test_image_search_query_truncated(content_config: ContentConfig, search_conf
    # Arrange
    model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
    max_words_supported = 10
-    query = " ".join(["hello"]*100)
-    truncated_query = " ".join(["hello"]*max_words_supported)
+    query = " ".join(["hello"] * 100)
+    truncated_query = " ".join(["hello"] * max_words_supported)

    # Act
    try:
        with caplog.at_level(logging.INFO, logger="khoj.search_type.image_search"):
-            image_search.query(
-                query,
-                count = 1,
-                model = model.image_search)
+            image_search.query(query, count=1, model=model.image_search)
    # Assert
    except RuntimeError as e:
        if "The size of tensor a (102) must match the size of tensor b (77)" in str(e):
@@ -115,17 +113,15 @@ def test_image_search_by_filepath(content_config: ContentConfig, search_config:

    # Act
    with caplog.at_level(logging.INFO, logger="khoj.search_type.image_search"):
-        hits = image_search.query(
-            query,
-            count = 1,
-            model = model.image_search)
+        hits = image_search.query(query, count=1, model=model.image_search)

        results = image_search.collate_results(
            hits,
            model.image_search.image_names,
            output_directory=output_directory,
-            image_files_url='/static/images',
-            count=1)
+            image_files_url="/static/images",
+            count=1,
+        )

    actual_image_path = output_directory.joinpath(Path(results[0].entry).name)
    actual_image = Image.open(actual_image_path)
@@ -133,7 +129,9 @@ def test_image_search_by_filepath(content_config: ContentConfig, search_config:

    # Assert
    # Ensure file search triggered instead of query with file path as string
-    assert f"Find Images by Image: {resolve_absolute_path(expected_image_path)}" in caplog.text, "File search not triggered"
+    assert (
+        f"Find Images by Image: {resolve_absolute_path(expected_image_path)}" in caplog.text
+    ), "File search not triggered"
    # Ensure the correct image is returned
    assert expected_image == actual_image, "Incorrect image returned by file search"

--- a/tests/test_markdown_to_jsonl.py
+++ b/tests/test_markdown_to_jsonl.py
@@ -8,10 +8,10 @@ from khoj.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
 def test_markdown_file_with_no_headings_to_jsonl(tmp_path):
    "Convert files with no heading to jsonl."
    # Arrange
-    entry = f'''
+    entry = f"""
    - Bullet point 1
    - Bullet point 2
-    '''
+    """
    markdownfile = create_file(tmp_path, entry)

    # Act
@@ -20,7 +20,8 @@ def test_markdown_file_with_no_headings_to_jsonl(tmp_path):

    # Process Each Entry from All Notes Files
    jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(
-        MarkdownToJsonl.convert_markdown_entries_to_maps(entry_nodes, file_to_entries))
+        MarkdownToJsonl.convert_markdown_entries_to_maps(entry_nodes, file_to_entries)
+    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -30,10 +31,10 @@ def test_markdown_file_with_no_headings_to_jsonl(tmp_path):
 def test_single_markdown_entry_to_jsonl(tmp_path):
    "Convert markdown entry from single file to jsonl."
    # Arrange
-    entry = f'''### Heading
+    entry = f"""### Heading
    \t\r
    Body Line 1
-    '''
+    """
    markdownfile = create_file(tmp_path, entry)

    # Act
@@ -42,7 +43,8 @@ def test_single_markdown_entry_to_jsonl(tmp_path):

    # Process Each Entry from All Notes Files
    jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(
-        MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map))
+        MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map)
+    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -52,14 +54,14 @@ def test_single_markdown_entry_to_jsonl(tmp_path):
 def test_multiple_markdown_entries_to_jsonl(tmp_path):
    "Convert multiple markdown entries from single file to jsonl."
    # Arrange
-    entry = f'''
+    entry = f"""
 ### Heading 1
    \t\r
    Heading 1 Body Line 1
 ### Heading 2
    \t\r
    Heading 2 Body Line 2
-    '''
+    """
    markdownfile = create_file(tmp_path, entry)

    # Act
@@ -68,7 +70,8 @@ def test_multiple_markdown_entries_to_jsonl(tmp_path):

    # Process Each Entry from All Notes Files
    jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(
-        MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map))
+        MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map)
+    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -92,8 +95,8 @@ def test_get_markdown_files(tmp_path):
    expected_files = sorted(map(str, [group1_file1, group1_file2, group2_file1, group2_file2, file1]))

    # Setup input-files, input-filters
-    input_files = [tmp_path / 'notes.md']
-    input_filter = [tmp_path / 'group1*.md', tmp_path / 'group2*.markdown']
+    input_files = [tmp_path / "notes.md"]
+    input_filter = [tmp_path / "group1*.md", tmp_path / "group2*.markdown"]

    # Act
    extracted_org_files = MarkdownToJsonl.get_markdown_files(input_files, input_filter)
@@ -106,10 +109,10 @@ def test_get_markdown_files(tmp_path):
 def test_extract_entries_with_different_level_headings(tmp_path):
    "Extract markdown entries with different level headings."
    # Arrange
-    entry = f'''
+    entry = f"""
 # Heading 1
 ## Heading 2
-'''
+"""
    markdownfile = create_file(tmp_path, entry)

    # Act
--- a/tests/test_org_to_jsonl.py
+++ b/tests/test_org_to_jsonl.py
@@ -9,23 +9,25 @@ from khoj.utils.rawconfig import Entry


 def test_configure_heading_entry_to_jsonl(tmp_path):
-    '''Ensure entries with empty body are ignored, unless explicitly configured to index heading entries.
-    Property drawers not considered Body. Ignore control characters for evaluating if Body empty.'''
+    """Ensure entries with empty body are ignored, unless explicitly configured to index heading entries.
+    Property drawers not considered Body. Ignore control characters for evaluating if Body empty."""
    # Arrange
-    entry = f'''*** Heading
+    entry = f"""*** Heading
    :PROPERTIES:
    :ID:       42-42-42
    :END:
    \t \r
-    '''
+    """
    orgfile = create_file(tmp_path, entry)

    for index_heading_entries in [True, False]:
        # Act
        # Extract entries into jsonl from specified Org files
-        jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(OrgToJsonl.convert_org_nodes_to_entries(
-            *OrgToJsonl.extract_org_entries(org_files=[orgfile]),
-            index_heading_entries=index_heading_entries))
+        jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
+            OrgToJsonl.convert_org_nodes_to_entries(
+                *OrgToJsonl.extract_org_entries(org_files=[orgfile]), index_heading_entries=index_heading_entries
+            )
+        )
        jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

        # Assert
@@ -40,10 +42,10 @@ def test_configure_heading_entry_to_jsonl(tmp_path):
 def test_entry_split_when_exceeds_max_words(tmp_path):
    "Ensure entries with compiled words exceeding max_words are split."
    # Arrange
-    entry = f'''*** Heading
+    entry = f"""*** Heading
    \t\r
    Body Line 1
-    '''
+    """
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -53,9 +55,9 @@ def test_entry_split_when_exceeds_max_words(tmp_path):
    # Split each entry from specified Org files by max words
    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
        TextToJsonl.split_entries_by_max_tokens(
-            OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map),
-            max_tokens = 2)
+            OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=2
        )
+    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -65,15 +67,15 @@ def test_entry_split_when_exceeds_max_words(tmp_path):
 def test_entry_split_drops_large_words(tmp_path):
    "Ensure entries drops words larger than specified max word length from compiled version."
    # Arrange
-    entry_text = f'''*** Heading
+    entry_text = f"""*** Heading
    \t\r
    Body Line 1
-    '''
+    """
    entry = Entry(raw=entry_text, compiled=entry_text)

    # Act
    # Split entry by max words and drop words larger than max word length
-    processed_entry = TextToJsonl.split_entries_by_max_tokens([entry], max_word_length = 5)[0]
+    processed_entry = TextToJsonl.split_entries_by_max_tokens([entry], max_word_length=5)[0]

    # Assert
    # "Heading" dropped from compiled version because its over the set max word limit
@@ -83,13 +85,13 @@ def test_entry_split_drops_large_words(tmp_path):
 def test_entry_with_body_to_jsonl(tmp_path):
    "Ensure entries with valid body text are loaded."
    # Arrange
-    entry = f'''*** Heading
+    entry = f"""*** Heading
    :PROPERTIES:
    :ID:       42-42-42
    :END:
    \t\r
    Body Line 1
-    '''
+    """
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -97,7 +99,9 @@ def test_entry_with_body_to_jsonl(tmp_path):
    entries, entry_to_file_map = OrgToJsonl.extract_org_entries(org_files=[orgfile])

    # Process Each Entry from All Notes Files
-    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map))
+    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
+        OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map)
+    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -107,10 +111,10 @@ def test_entry_with_body_to_jsonl(tmp_path):
 def test_file_with_no_headings_to_jsonl(tmp_path):
    "Ensure files with no heading, only body text are loaded."
    # Arrange
-    entry = f'''
+    entry = f"""
    - Bullet point 1
    - Bullet point 2
-    '''
+    """
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -120,7 +124,7 @@ def test_file_with_no_headings_to_jsonl(tmp_path):
    # Process Each Entry from All Notes Files
    entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
-    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] 
+    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
    assert len(jsonl_data) == 1
@@ -143,8 +147,8 @@ def test_get_org_files(tmp_path):
    expected_files = sorted(map(str, [group1_file1, group1_file2, group2_file1, group2_file2, orgfile1]))

    # Setup input-files, input-filters
-    input_files = [tmp_path / 'orgfile1.org']
-    input_filter = [tmp_path / 'group1*.org', tmp_path / 'group2*.org']
+    input_files = [tmp_path / "orgfile1.org"]
+    input_filter = [tmp_path / "group1*.org", tmp_path / "group2*.org"]

    # Act
    extracted_org_files = OrgToJsonl.get_org_files(input_files, input_filter)
@@ -157,10 +161,10 @@ def test_get_org_files(tmp_path):
 def test_extract_entries_with_different_level_headings(tmp_path):
    "Extract org entries with different level headings."
    # Arrange
-    entry = f'''
+    entry = f"""
 * Heading 1
 ** Heading 2
-'''
+"""
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -169,8 +173,8 @@ def test_extract_entries_with_different_level_headings(tmp_path):

    # Assert
    assert len(entries) == 2
-    assert f'{entries[0]}'.startswith("* Heading 1")
-    assert f'{entries[1]}'.startswith("** Heading 2")
+    assert f"{entries[0]}".startswith("* Heading 1")
+    assert f"{entries[1]}".startswith("** Heading 2")


 # Helper Functions
--- a/tests/test_orgnode.py
+++ b/tests/test_orgnode.py
@@ -10,7 +10,7 @@ from khoj.processor.org_mode import orgnode
 def test_parse_entry_with_no_headings(tmp_path):
    "Test parsing of entry with minimal fields"
    # Arrange
-    entry = f'''Body Line 1'''
+    entry = f"""Body Line 1"""
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -18,7 +18,7 @@ def test_parse_entry_with_no_headings(tmp_path):

    # Assert
    assert len(entries) == 1
-    assert entries[0].heading == f'{orgfile}'
+    assert entries[0].heading == f"{orgfile}"
    assert entries[0].tags == list()
    assert entries[0].body == "Body Line 1"
    assert entries[0].priority == ""
@@ -32,9 +32,9 @@ def test_parse_entry_with_no_headings(tmp_path):
 def test_parse_minimal_entry(tmp_path):
    "Test parsing of entry with minimal fields"
    # Arrange
-    entry = f'''
+    entry = f"""
 * Heading
-Body Line 1'''
+Body Line 1"""
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -56,7 +56,7 @@ Body Line 1'''
 def test_parse_complete_entry(tmp_path):
    "Test parsing of entry with all important fields"
    # Arrange
-    entry = f'''
+    entry = f"""
 *** DONE [#A] Heading   :Tag1:TAG2:tag3:
 CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
 :PROPERTIES:
@@ -67,7 +67,7 @@ CLOCK: [1984-04-01 Sun 09:00]--[1984-04-01 Sun 12:00] => 3:00
 - Clocked Log 1
 :END:
 Body Line 1
-Body Line 2'''
+Body Line 2"""
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -81,45 +81,45 @@ Body Line 2'''
    assert entries[0].body == "- Clocked Log 1\nBody Line 1\nBody Line 2"
    assert entries[0].priority == "A"
    assert entries[0].Property("ID") == "id:123-456-789-4234-1231"
-    assert entries[0].closed == datetime.date(1984,4,1)
-    assert entries[0].scheduled == datetime.date(1984,4,1)
-    assert entries[0].deadline == datetime.date(1984,4,1)
-    assert entries[0].logbook == [(datetime.datetime(1984,4,1,9,0,0), datetime.datetime(1984,4,1,12,0,0))]
+    assert entries[0].closed == datetime.date(1984, 4, 1)
+    assert entries[0].scheduled == datetime.date(1984, 4, 1)
+    assert entries[0].deadline == datetime.date(1984, 4, 1)
+    assert entries[0].logbook == [(datetime.datetime(1984, 4, 1, 9, 0, 0), datetime.datetime(1984, 4, 1, 12, 0, 0))]


 # ----------------------------------------------------------------------------------------------------
 def test_render_entry_with_property_drawer_and_empty_body(tmp_path):
    "Render heading entry with property drawer"
    # Arrange
-    entry_to_render = f'''
+    entry_to_render = f"""
 *** [#A] Heading1   :tag1:
    :PROPERTIES:
    :ID: 111-111-111-1111-1111
    :END:
 \t\r  \n
-'''
+"""
    orgfile = create_file(tmp_path, entry_to_render)

-    expected_entry = f'''*** [#A] Heading1                                            :tag1:
+    expected_entry = f"""*** [#A] Heading1                                            :tag1:
 :PROPERTIES:
 :LINE: file:{orgfile}::2
 :ID: id:111-111-111-1111-1111
 :SOURCE: [[file:{orgfile}::*Heading1]]
 :END:
-'''
+"""

    # Act
    parsed_entries = orgnode.makelist(orgfile)

    # Assert
-    assert f'{parsed_entries[0]}' == expected_entry
+    assert f"{parsed_entries[0]}" == expected_entry


 # ----------------------------------------------------------------------------------------------------
 def test_all_links_to_entry_rendered(tmp_path):
    "Ensure all links to entry rendered in property drawer from entry"
    # Arrange
-    entry = f'''
+    entry = f"""
 *** [#A] Heading   :tag1:
 :PROPERTIES:
 :ID: 123-456-789-4234-1231
@@ -127,7 +127,7 @@ def test_all_links_to_entry_rendered(tmp_path):
 Body Line 1
 *** Heading2
 Body Line 2
-'''
+"""
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -135,23 +135,23 @@ Body Line 2

    # Assert
    # SOURCE link rendered with Heading
-    assert f':SOURCE: [[file:{orgfile}::*{entries[0].heading}]]' in f'{entries[0]}'
+    assert f":SOURCE: [[file:{orgfile}::*{entries[0].heading}]]" in f"{entries[0]}"
    # ID link rendered with ID
-    assert f':ID: id:123-456-789-4234-1231' in f'{entries[0]}'
+    assert f":ID: id:123-456-789-4234-1231" in f"{entries[0]}"
    # LINE link rendered with line number
-    assert f':LINE: file:{orgfile}::2' in f'{entries[0]}'
+    assert f":LINE: file:{orgfile}::2" in f"{entries[0]}"


 # ----------------------------------------------------------------------------------------------------
 def test_source_link_to_entry_escaped_for_rendering(tmp_path):
    "Test SOURCE link renders with square brackets in filename, heading escaped for org-mode rendering"
    # Arrange
-    entry = f'''
+    entry = f"""
 *** [#A] Heading[1]   :tag1:
 :PROPERTIES:
 :ID: 123-456-789-4234-1231
 :END:
-Body Line 1'''
+Body Line 1"""
    orgfile = create_file(tmp_path, entry, filename="test[1].org")

    # Act
@@ -162,15 +162,15 @@ Body Line 1'''
    # parsed heading from entry
    assert entries[0].heading == "Heading[1]"
    # ensure SOURCE link has square brackets in filename, heading escaped in rendered entries
-    escaped_orgfile = f'{orgfile}'.replace("[1]", "\\[1\\]")
-    assert f':SOURCE: [[file:{escaped_orgfile}::*Heading\[1\]' in f'{entries[0]}'
+    escaped_orgfile = f"{orgfile}".replace("[1]", "\\[1\\]")
+    assert f":SOURCE: [[file:{escaped_orgfile}::*Heading\[1\]" in f"{entries[0]}"


 # ----------------------------------------------------------------------------------------------------
 def test_parse_multiple_entries(tmp_path):
    "Test parsing of multiple entries"
    # Arrange
-    content = f'''
+    content = f"""
 *** FAILED [#A] Heading1   :tag1:
 CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
 :PROPERTIES:
@@ -193,7 +193,7 @@ CLOCK: [1984-04-02 Mon 09:00]--[1984-04-02 Mon 12:00] => 3:00
 :END:
 Body 2

-'''
+"""
    orgfile = create_file(tmp_path, content)

    # Act
@@ -208,18 +208,20 @@ Body 2
        assert entry.body == f"- Clocked Log {index+1}\nBody {index+1}\n\n"
        assert entry.priority == "A"
        assert entry.Property("ID") == f"id:123-456-789-4234-000{index+1}"
-        assert entry.closed == datetime.date(1984,4,index+1)
-        assert entry.scheduled == datetime.date(1984,4,index+1)
-        assert entry.deadline == datetime.date(1984,4,index+1)
-        assert entry.logbook == [(datetime.datetime(1984,4,index+1,9,0,0), datetime.datetime(1984,4,index+1,12,0,0))]
+        assert entry.closed == datetime.date(1984, 4, index + 1)
+        assert entry.scheduled == datetime.date(1984, 4, index + 1)
+        assert entry.deadline == datetime.date(1984, 4, index + 1)
+        assert entry.logbook == [
+            (datetime.datetime(1984, 4, index + 1, 9, 0, 0), datetime.datetime(1984, 4, index + 1, 12, 0, 0))
+        ]


 # ----------------------------------------------------------------------------------------------------
 def test_parse_entry_with_empty_title(tmp_path):
    "Test parsing of entry with minimal fields"
    # Arrange
-    entry = f'''#+TITLE: 
-Body Line 1'''
+    entry = f"""#+TITLE: 
+Body Line 1"""
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -227,7 +229,7 @@ Body Line 1'''

    # Assert
    assert len(entries) == 1
-    assert entries[0].heading == f'{orgfile}'
+    assert entries[0].heading == f"{orgfile}"
    assert entries[0].tags == list()
    assert entries[0].body == "Body Line 1"
    assert entries[0].priority == ""
@@ -241,8 +243,8 @@ Body Line 1'''
 def test_parse_entry_with_title_and_no_headings(tmp_path):
    "Test parsing of entry with minimal fields"
    # Arrange
-    entry = f'''#+TITLE: test
-Body Line 1'''
+    entry = f"""#+TITLE: test
+Body Line 1"""
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -250,7 +252,7 @@ Body Line 1'''

    # Assert
    assert len(entries) == 1
-    assert entries[0].heading == 'test'
+    assert entries[0].heading == "test"
    assert entries[0].tags == list()
    assert entries[0].body == "Body Line 1"
    assert entries[0].priority == ""
@@ -264,9 +266,9 @@ Body Line 1'''
 def test_parse_entry_with_multiple_titles_and_no_headings(tmp_path):
    "Test parsing of entry with minimal fields"
    # Arrange
-    entry = f'''#+TITLE: title1 
+    entry = f"""#+TITLE: title1 
 Body Line 1
-#+TITLE:  title2  '''
+#+TITLE:  title2  """
    orgfile = create_file(tmp_path, entry)

    # Act
@@ -274,7 +276,7 @@ Body Line 1

    # Assert
    assert len(entries) == 1
-    assert entries[0].heading == 'title1 title2'
+    assert entries[0].heading == "title1 title2"
    assert entries[0].tags == list()
    assert entries[0].body == "Body Line 1\n"
    assert entries[0].priority == ""
--- a/tests/test_text_search.py
+++ b/tests/test_text_search.py
@@ -14,7 +14,9 @@ from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl

 # Test
 # ----------------------------------------------------------------------------------------------------
-def test_asymmetric_setup_with_missing_file_raises_error(org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig):
+def test_asymmetric_setup_with_missing_file_raises_error(
+    org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
+):
    # Arrange
    # Ensure file mentioned in org.input-files is missing
    single_new_file = Path(org_config_with_only_new_file.input_files[0])
@@ -27,10 +29,12 @@ def test_asymmetric_setup_with_missing_file_raises_error(org_config_with_only_ne


 # ----------------------------------------------------------------------------------------------------
-def test_asymmetric_setup_with_empty_file_raises_error(org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig):
+def test_asymmetric_setup_with_empty_file_raises_error(
+    org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
+):
    # Act
    # Generate notes embeddings during asymmetric setup
-    with pytest.raises(ValueError, match=r'^No valid entries found*'):
+    with pytest.raises(ValueError, match=r"^No valid entries found*"):
        text_search.setup(OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=True)


@@ -52,15 +56,9 @@ def test_asymmetric_search(content_config: ContentConfig, search_config: SearchC
    query = "How to git install application?"

    # Act
-    hits, entries = text_search.query(
-        query,
-        model = model.notes_search,
-        rank_results=True)
+    hits, entries = text_search.query(query, model=model.notes_search, rank_results=True)

-    results = text_search.collate_results(
-        hits,
-        entries,
-        count=1)
+    results = text_search.collate_results(hits, entries, count=1)

    # Assert
    # Actual_data should contain "Khoj via Emacs" entry
@@ -76,12 +74,14 @@ def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: TextContent
    new_file_to_index = Path(org_config_with_only_new_file.input_files[0])
    with open(new_file_to_index, "w") as f:
        f.write(f"* Entry more than {max_tokens} words\n")
-        for index in range(max_tokens+1):
+        for index in range(max_tokens + 1):
            f.write(f"{index} ")

    # Act
    # reload embeddings, entries, notes model after adding new org-mode file
-    initial_notes_model = text_search.setup(OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=False)
+    initial_notes_model = text_search.setup(
+        OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=False
+    )

    # Assert
    # verify newly added org-mode entry is split by max tokens
@@ -92,18 +92,20 @@ def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: TextContent
 # ----------------------------------------------------------------------------------------------------
 def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchConfig, new_org_file: Path):
    # Arrange
-    initial_notes_model= text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
+    initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)

    assert len(initial_notes_model.entries) == 10
    assert len(initial_notes_model.corpus_embeddings) == 10

    # append org-mode entry to first org input file in config
-    content_config.org.input_files = [f'{new_org_file}']
+    content_config.org.input_files = [f"{new_org_file}"]
    with open(new_org_file, "w") as f:
        f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n")

    # regenerate notes jsonl, model embeddings and model to include entry from new file
-    regenerated_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
+    regenerated_notes_model = text_search.setup(
+        OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True
+    )

    # Act
    # reload embeddings, entries, notes model from previously generated notes jsonl and model embeddings files
@@ -137,7 +139,7 @@ def test_incremental_update(content_config: ContentConfig, search_config: Search

    # Act
    # update embeddings, entries with the newly added note
-    content_config.org.input_files = [f'{new_org_file}']
+    content_config.org.input_files = [f"{new_org_file}"]
    initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False)

    # Assert
--- a/tests/test_word_filter.py
+++ b/tests/test_word_filter.py
@@ -7,7 +7,7 @@ def test_no_word_filter():
    # Arrange
    word_filter = WordFilter()
    entries = arrange_content()
-    q_with_no_filter = 'head tail'
+    q_with_no_filter = "head tail"

    # Act
    can_filter = word_filter.can_filter(q_with_no_filter)
@@ -15,7 +15,7 @@ def test_no_word_filter():

    # Assert
    assert can_filter == False
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {0, 1, 2, 3}


@@ -31,7 +31,7 @@ def test_word_exclude_filter():

    # Assert
    assert can_filter == True
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {0, 2}


@@ -47,7 +47,7 @@ def test_word_include_filter():

    # Assert
    assert can_filter == True
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {2, 3}


@@ -63,16 +63,16 @@ def test_word_include_and_exclude_filter():

    # Assert
    assert can_filter == True
-    assert ret_query == 'head tail'
+    assert ret_query == "head tail"
    assert entry_indices == {2}


 def arrange_content():
    entries = [
-        Entry(compiled='', raw='Minimal Entry'),
-        Entry(compiled='', raw='Entry with exclude_word'),
-        Entry(compiled='', raw='Entry with include_word'),
-        Entry(compiled='', raw='Entry with include_word and exclude_word')
+        Entry(compiled="", raw="Minimal Entry"),
+        Entry(compiled="", raw="Entry with exclude_word"),
+        Entry(compiled="", raw="Entry with include_word"),
+        Entry(compiled="", raw="Entry with include_word and exclude_word"),
    ]

    return entries