mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Use Black to format Khoj server code and tests
This commit is contained in:
@@ -6,59 +6,67 @@ import pytest
|
||||
# Internal Packages
|
||||
from khoj.search_type import image_search, text_search
|
||||
from khoj.utils.helpers import resolve_absolute_path
|
||||
from khoj.utils.rawconfig import ContentConfig, TextContentConfig, ImageContentConfig, SearchConfig, TextSearchConfig, ImageSearchConfig
|
||||
from khoj.utils.rawconfig import (
|
||||
ContentConfig,
|
||||
TextContentConfig,
|
||||
ImageContentConfig,
|
||||
SearchConfig,
|
||||
TextSearchConfig,
|
||||
ImageSearchConfig,
|
||||
)
|
||||
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
|
||||
from khoj.search_filter.date_filter import DateFilter
|
||||
from khoj.search_filter.word_filter import WordFilter
|
||||
from khoj.search_filter.file_filter import FileFilter
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
@pytest.fixture(scope="session")
|
||||
def search_config() -> SearchConfig:
|
||||
model_dir = resolve_absolute_path('~/.khoj/search')
|
||||
model_dir = resolve_absolute_path("~/.khoj/search")
|
||||
model_dir.mkdir(parents=True, exist_ok=True)
|
||||
search_config = SearchConfig()
|
||||
|
||||
search_config.symmetric = TextSearchConfig(
|
||||
encoder = "sentence-transformers/all-MiniLM-L6-v2",
|
||||
cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2",
|
||||
model_directory = model_dir / 'symmetric/'
|
||||
encoder="sentence-transformers/all-MiniLM-L6-v2",
|
||||
cross_encoder="cross-encoder/ms-marco-MiniLM-L-6-v2",
|
||||
model_directory=model_dir / "symmetric/",
|
||||
)
|
||||
|
||||
search_config.asymmetric = TextSearchConfig(
|
||||
encoder = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
||||
cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2",
|
||||
model_directory = model_dir / 'asymmetric/'
|
||||
encoder="sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
||||
cross_encoder="cross-encoder/ms-marco-MiniLM-L-6-v2",
|
||||
model_directory=model_dir / "asymmetric/",
|
||||
)
|
||||
|
||||
search_config.image = ImageSearchConfig(
|
||||
encoder = "sentence-transformers/clip-ViT-B-32",
|
||||
model_directory = model_dir / 'image/'
|
||||
encoder="sentence-transformers/clip-ViT-B-32", model_directory=model_dir / "image/"
|
||||
)
|
||||
|
||||
return search_config
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
@pytest.fixture(scope="session")
|
||||
def content_config(tmp_path_factory, search_config: SearchConfig):
|
||||
content_dir = tmp_path_factory.mktemp('content')
|
||||
content_dir = tmp_path_factory.mktemp("content")
|
||||
|
||||
# Generate Image Embeddings from Test Images
|
||||
content_config = ContentConfig()
|
||||
content_config.image = ImageContentConfig(
|
||||
input_directories = ['tests/data/images'],
|
||||
embeddings_file = content_dir.joinpath('image_embeddings.pt'),
|
||||
batch_size = 1,
|
||||
use_xmp_metadata = False)
|
||||
input_directories=["tests/data/images"],
|
||||
embeddings_file=content_dir.joinpath("image_embeddings.pt"),
|
||||
batch_size=1,
|
||||
use_xmp_metadata=False,
|
||||
)
|
||||
|
||||
image_search.setup(content_config.image, search_config.image, regenerate=False)
|
||||
|
||||
# Generate Notes Embeddings from Test Notes
|
||||
content_config.org = TextContentConfig(
|
||||
input_files = None,
|
||||
input_filter = ['tests/data/org/*.org'],
|
||||
compressed_jsonl = content_dir.joinpath('notes.jsonl.gz'),
|
||||
embeddings_file = content_dir.joinpath('note_embeddings.pt'))
|
||||
input_files=None,
|
||||
input_filter=["tests/data/org/*.org"],
|
||||
compressed_jsonl=content_dir.joinpath("notes.jsonl.gz"),
|
||||
embeddings_file=content_dir.joinpath("note_embeddings.pt"),
|
||||
)
|
||||
|
||||
filters = [DateFilter(), WordFilter(), FileFilter()]
|
||||
text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
|
||||
@@ -66,7 +74,7 @@ def content_config(tmp_path_factory, search_config: SearchConfig):
|
||||
return content_config
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
@pytest.fixture(scope="function")
|
||||
def new_org_file(content_config: ContentConfig):
|
||||
# Setup
|
||||
new_org_file = Path(content_config.org.input_filter[0]).parent / "new_file.org"
|
||||
@@ -79,9 +87,9 @@ def new_org_file(content_config: ContentConfig):
|
||||
new_org_file.unlink()
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
@pytest.fixture(scope="function")
|
||||
def org_config_with_only_new_file(content_config: ContentConfig, new_org_file: Path):
|
||||
new_org_config = deepcopy(content_config.org)
|
||||
new_org_config.input_files = [f'{new_org_file}']
|
||||
new_org_config.input_files = [f"{new_org_file}"]
|
||||
new_org_config.input_filter = None
|
||||
return new_org_config
|
||||
return new_org_config
|
||||
|
||||
@@ -8,10 +8,10 @@ from khoj.processor.ledger.beancount_to_jsonl import BeancountToJsonl
|
||||
def test_no_transactions_in_file(tmp_path):
|
||||
"Handle file with no transactions."
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
- Bullet point 1
|
||||
- Bullet point 2
|
||||
'''
|
||||
"""
|
||||
beancount_file = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -20,7 +20,8 @@ def test_no_transactions_in_file(tmp_path):
|
||||
|
||||
# Process Each Entry from All Beancount Files
|
||||
jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl(
|
||||
BeancountToJsonl.convert_transactions_to_maps(entry_nodes, file_to_entries))
|
||||
BeancountToJsonl.convert_transactions_to_maps(entry_nodes, file_to_entries)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
@@ -30,11 +31,11 @@ def test_no_transactions_in_file(tmp_path):
|
||||
def test_single_beancount_transaction_to_jsonl(tmp_path):
|
||||
"Convert transaction from single file to jsonl."
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
1984-04-01 * "Payee" "Narration"
|
||||
Expenses:Test:Test 1.00 KES
|
||||
Assets:Test:Test -1.00 KES
|
||||
'''
|
||||
"""
|
||||
beancount_file = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -43,7 +44,8 @@ Assets:Test:Test -1.00 KES
|
||||
|
||||
# Process Each Entry from All Beancount Files
|
||||
jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl(
|
||||
BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map))
|
||||
BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
@@ -53,7 +55,7 @@ Assets:Test:Test -1.00 KES
|
||||
def test_multiple_transactions_to_jsonl(tmp_path):
|
||||
"Convert multiple transactions from single file to jsonl."
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
1984-04-01 * "Payee" "Narration"
|
||||
Expenses:Test:Test 1.00 KES
|
||||
Assets:Test:Test -1.00 KES
|
||||
@@ -61,7 +63,7 @@ Assets:Test:Test -1.00 KES
|
||||
1984-04-01 * "Payee" "Narration"
|
||||
Expenses:Test:Test 1.00 KES
|
||||
Assets:Test:Test -1.00 KES
|
||||
'''
|
||||
"""
|
||||
|
||||
beancount_file = create_file(tmp_path, entry)
|
||||
|
||||
@@ -71,7 +73,8 @@ Assets:Test:Test -1.00 KES
|
||||
|
||||
# Process Each Entry from All Beancount Files
|
||||
jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl(
|
||||
BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map))
|
||||
BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
@@ -95,8 +98,8 @@ def test_get_beancount_files(tmp_path):
|
||||
expected_files = sorted(map(str, [group1_file1, group1_file2, group2_file1, group2_file2, file1]))
|
||||
|
||||
# Setup input-files, input-filters
|
||||
input_files = [tmp_path / 'ledger.bean']
|
||||
input_filter = [tmp_path / 'group1*.bean', tmp_path / 'group2*.beancount']
|
||||
input_files = [tmp_path / "ledger.bean"]
|
||||
input_filter = [tmp_path / "group1*.bean", tmp_path / "group2*.beancount"]
|
||||
|
||||
# Act
|
||||
extracted_org_files = BeancountToJsonl.get_beancount_files(input_files, input_filter)
|
||||
|
||||
@@ -6,7 +6,7 @@ from khoj.processor.conversation.gpt import converse, understand, message_to_pro
|
||||
|
||||
|
||||
# Initialize variables for tests
|
||||
model = 'text-davinci-003'
|
||||
model = "text-davinci-003"
|
||||
api_key = None # Input your OpenAI API key to run the tests below
|
||||
|
||||
|
||||
@@ -14,19 +14,22 @@ api_key = None # Input your OpenAI API key to run the tests below
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_message_to_understand_prompt():
|
||||
# Arrange
|
||||
understand_primer = "Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=[\"companion\", \"notes\", \"ledger\", \"image\", \"music\"]\nsearch(search-type, data);\nsearch-type=[\"google\", \"youtube\"]\ngenerate(activity);\nactivity=[\"paint\",\"write\", \"chat\"]\ntrigger-emotion(emotion);\nemotion=[\"happy\",\"confidence\",\"fear\",\"surprise\",\"sadness\",\"disgust\",\"anger\", \"curiosity\", \"calm\"]\n\nQ: How are you doing?\nA: activity(\"chat\"); trigger-emotion(\"surprise\")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember(\"notes\", \"Brother Antoine when we were at the beach\"); trigger-emotion(\"curiosity\");\nQ: what did we talk about last time?\nA: remember(\"notes\", \"talk last time\"); trigger-emotion(\"curiosity\");\nQ: Let's make some drawings!\nA: generate(\"paint\"); trigger-emotion(\"happy\");\nQ: Do you know anything about Lebanon?\nA: search(\"google\", \"lebanon\"); trigger-emotion(\"confidence\");\nQ: Find a video about a panda rolling in the grass\nA: search(\"youtube\",\"panda rolling in the grass\"); trigger-emotion(\"happy\"); \nQ: Tell me a scary story\nA: generate(\"write\" \"A story about some adventure\"); trigger-emotion(\"fear\");\nQ: What fiction book was I reading last week about AI starship?\nA: remember(\"notes\", \"read fiction book about AI starship last week\"); trigger-emotion(\"curiosity\");\nQ: How much did I spend at Subway for dinner last time?\nA: remember(\"ledger\", \"last Subway dinner\"); trigger-emotion(\"curiosity\");\nQ: I'm feeling sleepy\nA: activity(\"chat\"); trigger-emotion(\"calm\")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember(\"music\", \"popular Sri lankan song that Alex showed recently\"); trigger-emotion(\"curiosity\"); \nQ: You're pretty funny!\nA: activity(\"chat\"); trigger-emotion(\"pride\")"
|
||||
expected_response = "Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=[\"companion\", \"notes\", \"ledger\", \"image\", \"music\"]\nsearch(search-type, data);\nsearch-type=[\"google\", \"youtube\"]\ngenerate(activity);\nactivity=[\"paint\",\"write\", \"chat\"]\ntrigger-emotion(emotion);\nemotion=[\"happy\",\"confidence\",\"fear\",\"surprise\",\"sadness\",\"disgust\",\"anger\", \"curiosity\", \"calm\"]\n\nQ: How are you doing?\nA: activity(\"chat\"); trigger-emotion(\"surprise\")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember(\"notes\", \"Brother Antoine when we were at the beach\"); trigger-emotion(\"curiosity\");\nQ: what did we talk about last time?\nA: remember(\"notes\", \"talk last time\"); trigger-emotion(\"curiosity\");\nQ: Let's make some drawings!\nA: generate(\"paint\"); trigger-emotion(\"happy\");\nQ: Do you know anything about Lebanon?\nA: search(\"google\", \"lebanon\"); trigger-emotion(\"confidence\");\nQ: Find a video about a panda rolling in the grass\nA: search(\"youtube\",\"panda rolling in the grass\"); trigger-emotion(\"happy\"); \nQ: Tell me a scary story\nA: generate(\"write\" \"A story about some adventure\"); trigger-emotion(\"fear\");\nQ: What fiction book was I reading last week about AI starship?\nA: remember(\"notes\", \"read fiction book about AI starship last week\"); trigger-emotion(\"curiosity\");\nQ: How much did I spend at Subway for dinner last time?\nA: remember(\"ledger\", \"last Subway dinner\"); trigger-emotion(\"curiosity\");\nQ: I'm feeling sleepy\nA: activity(\"chat\"); trigger-emotion(\"calm\")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember(\"music\", \"popular Sri lankan song that Alex showed recently\"); trigger-emotion(\"curiosity\"); \nQ: You're pretty funny!\nA: activity(\"chat\"); trigger-emotion(\"pride\")\nQ: When did I last dine at Burger King?\nA:"
|
||||
understand_primer = 'Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=["companion", "notes", "ledger", "image", "music"]\nsearch(search-type, data);\nsearch-type=["google", "youtube"]\ngenerate(activity);\nactivity=["paint","write", "chat"]\ntrigger-emotion(emotion);\nemotion=["happy","confidence","fear","surprise","sadness","disgust","anger", "curiosity", "calm"]\n\nQ: How are you doing?\nA: activity("chat"); trigger-emotion("surprise")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember("notes", "Brother Antoine when we were at the beach"); trigger-emotion("curiosity");\nQ: what did we talk about last time?\nA: remember("notes", "talk last time"); trigger-emotion("curiosity");\nQ: Let\'s make some drawings!\nA: generate("paint"); trigger-emotion("happy");\nQ: Do you know anything about Lebanon?\nA: search("google", "lebanon"); trigger-emotion("confidence");\nQ: Find a video about a panda rolling in the grass\nA: search("youtube","panda rolling in the grass"); trigger-emotion("happy"); \nQ: Tell me a scary story\nA: generate("write" "A story about some adventure"); trigger-emotion("fear");\nQ: What fiction book was I reading last week about AI starship?\nA: remember("notes", "read fiction book about AI starship last week"); trigger-emotion("curiosity");\nQ: How much did I spend at Subway for dinner last time?\nA: remember("ledger", "last Subway dinner"); trigger-emotion("curiosity");\nQ: I\'m feeling sleepy\nA: activity("chat"); trigger-emotion("calm")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember("music", "popular Sri lankan song that Alex showed recently"); trigger-emotion("curiosity"); \nQ: You\'re pretty funny!\nA: activity("chat"); trigger-emotion("pride")'
|
||||
expected_response = 'Extract information from each chat message\n\nremember(memory-type, data);\nmemory-type=["companion", "notes", "ledger", "image", "music"]\nsearch(search-type, data);\nsearch-type=["google", "youtube"]\ngenerate(activity);\nactivity=["paint","write", "chat"]\ntrigger-emotion(emotion);\nemotion=["happy","confidence","fear","surprise","sadness","disgust","anger", "curiosity", "calm"]\n\nQ: How are you doing?\nA: activity("chat"); trigger-emotion("surprise")\nQ: Do you remember what I told you about my brother Antoine when we were at the beach?\nA: remember("notes", "Brother Antoine when we were at the beach"); trigger-emotion("curiosity");\nQ: what did we talk about last time?\nA: remember("notes", "talk last time"); trigger-emotion("curiosity");\nQ: Let\'s make some drawings!\nA: generate("paint"); trigger-emotion("happy");\nQ: Do you know anything about Lebanon?\nA: search("google", "lebanon"); trigger-emotion("confidence");\nQ: Find a video about a panda rolling in the grass\nA: search("youtube","panda rolling in the grass"); trigger-emotion("happy"); \nQ: Tell me a scary story\nA: generate("write" "A story about some adventure"); trigger-emotion("fear");\nQ: What fiction book was I reading last week about AI starship?\nA: remember("notes", "read fiction book about AI starship last week"); trigger-emotion("curiosity");\nQ: How much did I spend at Subway for dinner last time?\nA: remember("ledger", "last Subway dinner"); trigger-emotion("curiosity");\nQ: I\'m feeling sleepy\nA: activity("chat"); trigger-emotion("calm")\nQ: What was that popular Sri lankan song that Alex showed me recently?\nA: remember("music", "popular Sri lankan song that Alex showed recently"); trigger-emotion("curiosity"); \nQ: You\'re pretty funny!\nA: activity("chat"); trigger-emotion("pride")\nQ: When did I last dine at Burger King?\nA:'
|
||||
|
||||
# Act
|
||||
actual_response = message_to_prompt("When did I last dine at Burger King?", understand_primer, start_sequence="\nA:", restart_sequence="\nQ:")
|
||||
actual_response = message_to_prompt(
|
||||
"When did I last dine at Burger King?", understand_primer, start_sequence="\nA:", restart_sequence="\nQ:"
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert actual_response == expected_response
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.skipif(api_key is None,
|
||||
reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys")
|
||||
@pytest.mark.skipif(
|
||||
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
|
||||
)
|
||||
def test_minimal_chat_with_gpt():
|
||||
# Act
|
||||
response = converse("What will happen when the stars go out?", model=model, api_key=api_key)
|
||||
@@ -36,21 +39,29 @@ def test_minimal_chat_with_gpt():
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.skipif(api_key is None,
|
||||
reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys")
|
||||
@pytest.mark.skipif(
|
||||
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
|
||||
)
|
||||
def test_chat_with_history():
|
||||
# Arrange
|
||||
ai_prompt="AI:"
|
||||
human_prompt="Human:"
|
||||
ai_prompt = "AI:"
|
||||
human_prompt = "Human:"
|
||||
|
||||
conversation_primer = f'''
|
||||
conversation_primer = f"""
|
||||
The following is a conversation with an AI assistant. The assistant is helpful, creative, clever, and very friendly companion.
|
||||
|
||||
{human_prompt} Hello, I am Testatron. Who are you?
|
||||
{ai_prompt} Hi, I am Khoj, an AI conversational companion created by OpenAI. How can I help you today?'''
|
||||
{ai_prompt} Hi, I am Khoj, an AI conversational companion created by OpenAI. How can I help you today?"""
|
||||
|
||||
# Act
|
||||
response = converse("Hi Khoj, What is my name?", model=model, conversation_history=conversation_primer, api_key=api_key, temperature=0, max_tokens=50)
|
||||
response = converse(
|
||||
"Hi Khoj, What is my name?",
|
||||
model=model,
|
||||
conversation_history=conversation_primer,
|
||||
api_key=api_key,
|
||||
temperature=0,
|
||||
max_tokens=50,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert len(response) > 0
|
||||
@@ -58,12 +69,13 @@ The following is a conversation with an AI assistant. The assistant is helpful,
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.skipif(api_key is None,
|
||||
reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys")
|
||||
@pytest.mark.skipif(
|
||||
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
|
||||
)
|
||||
def test_understand_message_using_gpt():
|
||||
# Act
|
||||
response = understand("When did I last dine at Subway?", model=model, api_key=api_key)
|
||||
|
||||
# Assert
|
||||
assert len(response) > 0
|
||||
assert response['intent']['memory-type'] == 'ledger'
|
||||
assert response["intent"]["memory-type"] == "ledger"
|
||||
|
||||
@@ -14,35 +14,37 @@ def test_cli_minimal_default():
|
||||
actual_args = cli([])
|
||||
|
||||
# Assert
|
||||
assert actual_args.config_file == resolve_absolute_path(Path('~/.khoj/khoj.yml'))
|
||||
assert actual_args.config_file == resolve_absolute_path(Path("~/.khoj/khoj.yml"))
|
||||
assert actual_args.regenerate == False
|
||||
assert actual_args.no_gui == False
|
||||
assert actual_args.verbose == 0
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_cli_invalid_config_file_path():
|
||||
# Arrange
|
||||
non_existent_config_file = f"non-existent-khoj-{random()}.yml"
|
||||
|
||||
# Act
|
||||
actual_args = cli([f'-c={non_existent_config_file}'])
|
||||
actual_args = cli([f"-c={non_existent_config_file}"])
|
||||
|
||||
# Assert
|
||||
assert actual_args.config_file == resolve_absolute_path(non_existent_config_file)
|
||||
assert actual_args.config == None
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_cli_config_from_file():
|
||||
# Act
|
||||
actual_args = cli(['-c=tests/data/config.yml',
|
||||
'--regenerate',
|
||||
'--no-gui',
|
||||
'-vvv'])
|
||||
actual_args = cli(["-c=tests/data/config.yml", "--regenerate", "--no-gui", "-vvv"])
|
||||
|
||||
# Assert
|
||||
assert actual_args.config_file == resolve_absolute_path(Path('tests/data/config.yml'))
|
||||
assert actual_args.config_file == resolve_absolute_path(Path("tests/data/config.yml"))
|
||||
assert actual_args.no_gui == True
|
||||
assert actual_args.regenerate == True
|
||||
assert actual_args.config is not None
|
||||
assert actual_args.config.content_type.org.input_files == [Path('~/first_from_config.org'), Path('~/second_from_config.org')]
|
||||
assert actual_args.config.content_type.org.input_files == [
|
||||
Path("~/first_from_config.org"),
|
||||
Path("~/second_from_config.org"),
|
||||
]
|
||||
assert actual_args.verbose == 3
|
||||
|
||||
@@ -21,6 +21,7 @@ from khoj.search_filter.file_filter import FileFilter
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
# Test
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_search_with_invalid_content_type():
|
||||
@@ -98,9 +99,11 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
|
||||
config.content_type = content_config
|
||||
config.search_type = search_config
|
||||
model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
|
||||
query_expected_image_pairs = [("kitten", "kitten_park.jpg"),
|
||||
("a horse and dog on a leash", "horse_dog.jpg"),
|
||||
("A guinea pig eating grass", "guineapig_grass.jpg")]
|
||||
query_expected_image_pairs = [
|
||||
("kitten", "kitten_park.jpg"),
|
||||
("a horse and dog on a leash", "horse_dog.jpg"),
|
||||
("A guinea pig eating grass", "guineapig_grass.jpg"),
|
||||
]
|
||||
|
||||
for query, expected_image_name in query_expected_image_pairs:
|
||||
# Act
|
||||
@@ -135,7 +138,9 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig
|
||||
def test_notes_search_with_only_filters(content_config: ContentConfig, search_config: SearchConfig):
|
||||
# Arrange
|
||||
filters = [WordFilter(), FileFilter()]
|
||||
model.orgmode_search = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
|
||||
model.orgmode_search = text_search.setup(
|
||||
OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters
|
||||
)
|
||||
user_query = quote('+"Emacs" file:"*.org"')
|
||||
|
||||
# Act
|
||||
@@ -152,7 +157,9 @@ def test_notes_search_with_only_filters(content_config: ContentConfig, search_co
|
||||
def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
|
||||
# Arrange
|
||||
filters = [WordFilter()]
|
||||
model.orgmode_search = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
|
||||
model.orgmode_search = text_search.setup(
|
||||
OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters
|
||||
)
|
||||
user_query = quote('How to git install application? +"Emacs"')
|
||||
|
||||
# Act
|
||||
@@ -169,7 +176,9 @@ def test_notes_search_with_include_filter(content_config: ContentConfig, search_
|
||||
def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig):
|
||||
# Arrange
|
||||
filters = [WordFilter()]
|
||||
model.orgmode_search = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
|
||||
model.orgmode_search = text_search.setup(
|
||||
OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters
|
||||
)
|
||||
user_query = quote('How to git install application? -"clone"')
|
||||
|
||||
# Act
|
||||
|
||||
@@ -10,53 +10,59 @@ from khoj.utils.rawconfig import Entry
|
||||
|
||||
def test_date_filter():
|
||||
entries = [
|
||||
Entry(compiled='', raw='Entry with no date'),
|
||||
Entry(compiled='', raw='April Fools entry: 1984-04-01'),
|
||||
Entry(compiled='', raw='Entry with date:1984-04-02')
|
||||
Entry(compiled="", raw="Entry with no date"),
|
||||
Entry(compiled="", raw="April Fools entry: 1984-04-01"),
|
||||
Entry(compiled="", raw="Entry with date:1984-04-02"),
|
||||
]
|
||||
|
||||
q_with_no_date_filter = 'head tail'
|
||||
q_with_no_date_filter = "head tail"
|
||||
ret_query, entry_indices = DateFilter().apply(q_with_no_date_filter, entries)
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {0, 1, 2}
|
||||
|
||||
q_with_dtrange_non_overlapping_at_boundary = 'head dt>"1984-04-01" dt<"1984-04-02" tail'
|
||||
ret_query, entry_indices = DateFilter().apply(q_with_dtrange_non_overlapping_at_boundary, entries)
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == set()
|
||||
|
||||
query_with_overlapping_dtrange = 'head dt>"1984-04-01" dt<"1984-04-03" tail'
|
||||
ret_query, entry_indices = DateFilter().apply(query_with_overlapping_dtrange, entries)
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {2}
|
||||
|
||||
query_with_overlapping_dtrange = 'head dt>="1984-04-01" dt<"1984-04-02" tail'
|
||||
ret_query, entry_indices = DateFilter().apply(query_with_overlapping_dtrange, entries)
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {1}
|
||||
|
||||
query_with_overlapping_dtrange = 'head dt>"1984-04-01" dt<="1984-04-02" tail'
|
||||
ret_query, entry_indices = DateFilter().apply(query_with_overlapping_dtrange, entries)
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {2}
|
||||
|
||||
query_with_overlapping_dtrange = 'head dt>="1984-04-01" dt<="1984-04-02" tail'
|
||||
ret_query, entry_indices = DateFilter().apply(query_with_overlapping_dtrange, entries)
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {1, 2}
|
||||
|
||||
|
||||
def test_extract_date_range():
|
||||
assert DateFilter().extract_date_range('head dt>"1984-01-04" dt<"1984-01-07" tail') == [datetime(1984, 1, 5, 0, 0, 0).timestamp(), datetime(1984, 1, 7, 0, 0, 0).timestamp()]
|
||||
assert DateFilter().extract_date_range('head dt>"1984-01-04" dt<"1984-01-07" tail') == [
|
||||
datetime(1984, 1, 5, 0, 0, 0).timestamp(),
|
||||
datetime(1984, 1, 7, 0, 0, 0).timestamp(),
|
||||
]
|
||||
assert DateFilter().extract_date_range('head dt<="1984-01-01"') == [0, datetime(1984, 1, 2, 0, 0, 0).timestamp()]
|
||||
assert DateFilter().extract_date_range('head dt>="1984-01-01"') == [datetime(1984, 1, 1, 0, 0, 0).timestamp(), inf]
|
||||
assert DateFilter().extract_date_range('head dt:"1984-01-01"') == [datetime(1984, 1, 1, 0, 0, 0).timestamp(), datetime(1984, 1, 2, 0, 0, 0).timestamp()]
|
||||
assert DateFilter().extract_date_range('head dt:"1984-01-01"') == [
|
||||
datetime(1984, 1, 1, 0, 0, 0).timestamp(),
|
||||
datetime(1984, 1, 2, 0, 0, 0).timestamp(),
|
||||
]
|
||||
|
||||
# Unparseable date filter specified in query
|
||||
assert DateFilter().extract_date_range('head dt:"Summer of 69" tail') == None
|
||||
|
||||
# No date filter specified in query
|
||||
assert DateFilter().extract_date_range('head tail') == None
|
||||
assert DateFilter().extract_date_range("head tail") == None
|
||||
|
||||
# Non intersecting date ranges
|
||||
assert DateFilter().extract_date_range('head dt>"1984-01-01" dt<"1984-01-01" tail') == None
|
||||
@@ -66,43 +72,79 @@ def test_parse():
|
||||
test_now = datetime(1984, 4, 1, 21, 21, 21)
|
||||
|
||||
# day variations
|
||||
assert DateFilter().parse('today', relative_base=test_now) == (datetime(1984, 4, 1, 0, 0, 0), datetime(1984, 4, 2, 0, 0, 0))
|
||||
assert DateFilter().parse('tomorrow', relative_base=test_now) == (datetime(1984, 4, 2, 0, 0, 0), datetime(1984, 4, 3, 0, 0, 0))
|
||||
assert DateFilter().parse('yesterday', relative_base=test_now) == (datetime(1984, 3, 31, 0, 0, 0), datetime(1984, 4, 1, 0, 0, 0))
|
||||
assert DateFilter().parse('5 days ago', relative_base=test_now) == (datetime(1984, 3, 27, 0, 0, 0), datetime(1984, 3, 28, 0, 0, 0))
|
||||
assert DateFilter().parse("today", relative_base=test_now) == (
|
||||
datetime(1984, 4, 1, 0, 0, 0),
|
||||
datetime(1984, 4, 2, 0, 0, 0),
|
||||
)
|
||||
assert DateFilter().parse("tomorrow", relative_base=test_now) == (
|
||||
datetime(1984, 4, 2, 0, 0, 0),
|
||||
datetime(1984, 4, 3, 0, 0, 0),
|
||||
)
|
||||
assert DateFilter().parse("yesterday", relative_base=test_now) == (
|
||||
datetime(1984, 3, 31, 0, 0, 0),
|
||||
datetime(1984, 4, 1, 0, 0, 0),
|
||||
)
|
||||
assert DateFilter().parse("5 days ago", relative_base=test_now) == (
|
||||
datetime(1984, 3, 27, 0, 0, 0),
|
||||
datetime(1984, 3, 28, 0, 0, 0),
|
||||
)
|
||||
|
||||
# week variations
|
||||
assert DateFilter().parse('last week', relative_base=test_now) == (datetime(1984, 3, 18, 0, 0, 0), datetime(1984, 3, 25, 0, 0, 0))
|
||||
assert DateFilter().parse('2 weeks ago', relative_base=test_now) == (datetime(1984, 3, 11, 0, 0, 0), datetime(1984, 3, 18, 0, 0, 0))
|
||||
assert DateFilter().parse("last week", relative_base=test_now) == (
|
||||
datetime(1984, 3, 18, 0, 0, 0),
|
||||
datetime(1984, 3, 25, 0, 0, 0),
|
||||
)
|
||||
assert DateFilter().parse("2 weeks ago", relative_base=test_now) == (
|
||||
datetime(1984, 3, 11, 0, 0, 0),
|
||||
datetime(1984, 3, 18, 0, 0, 0),
|
||||
)
|
||||
|
||||
# month variations
|
||||
assert DateFilter().parse('next month', relative_base=test_now) == (datetime(1984, 5, 1, 0, 0, 0), datetime(1984, 6, 1, 0, 0, 0))
|
||||
assert DateFilter().parse('2 months ago', relative_base=test_now) == (datetime(1984, 2, 1, 0, 0, 0), datetime(1984, 3, 1, 0, 0, 0))
|
||||
assert DateFilter().parse("next month", relative_base=test_now) == (
|
||||
datetime(1984, 5, 1, 0, 0, 0),
|
||||
datetime(1984, 6, 1, 0, 0, 0),
|
||||
)
|
||||
assert DateFilter().parse("2 months ago", relative_base=test_now) == (
|
||||
datetime(1984, 2, 1, 0, 0, 0),
|
||||
datetime(1984, 3, 1, 0, 0, 0),
|
||||
)
|
||||
|
||||
# year variations
|
||||
assert DateFilter().parse('this year', relative_base=test_now) == (datetime(1984, 1, 1, 0, 0, 0), datetime(1985, 1, 1, 0, 0, 0))
|
||||
assert DateFilter().parse('20 years later', relative_base=test_now) == (datetime(2004, 1, 1, 0, 0, 0), datetime(2005, 1, 1, 0, 0, 0))
|
||||
assert DateFilter().parse("this year", relative_base=test_now) == (
|
||||
datetime(1984, 1, 1, 0, 0, 0),
|
||||
datetime(1985, 1, 1, 0, 0, 0),
|
||||
)
|
||||
assert DateFilter().parse("20 years later", relative_base=test_now) == (
|
||||
datetime(2004, 1, 1, 0, 0, 0),
|
||||
datetime(2005, 1, 1, 0, 0, 0),
|
||||
)
|
||||
|
||||
# specific month/date variation
|
||||
assert DateFilter().parse('in august', relative_base=test_now) == (datetime(1983, 8, 1, 0, 0, 0), datetime(1983, 8, 2, 0, 0, 0))
|
||||
assert DateFilter().parse('on 1983-08-01', relative_base=test_now) == (datetime(1983, 8, 1, 0, 0, 0), datetime(1983, 8, 2, 0, 0, 0))
|
||||
assert DateFilter().parse("in august", relative_base=test_now) == (
|
||||
datetime(1983, 8, 1, 0, 0, 0),
|
||||
datetime(1983, 8, 2, 0, 0, 0),
|
||||
)
|
||||
assert DateFilter().parse("on 1983-08-01", relative_base=test_now) == (
|
||||
datetime(1983, 8, 1, 0, 0, 0),
|
||||
datetime(1983, 8, 2, 0, 0, 0),
|
||||
)
|
||||
|
||||
|
||||
def test_date_filter_regex():
|
||||
dtrange_match = re.findall(DateFilter().date_regex, 'multi word head dt>"today" dt:"1984-01-01"')
|
||||
assert dtrange_match == [('>', 'today'), (':', '1984-01-01')]
|
||||
assert dtrange_match == [(">", "today"), (":", "1984-01-01")]
|
||||
|
||||
dtrange_match = re.findall(DateFilter().date_regex, 'head dt>"today" dt:"1984-01-01" multi word tail')
|
||||
assert dtrange_match == [('>', 'today'), (':', '1984-01-01')]
|
||||
assert dtrange_match == [(">", "today"), (":", "1984-01-01")]
|
||||
|
||||
dtrange_match = re.findall(DateFilter().date_regex, 'multi word head dt>="today" dt="1984-01-01"')
|
||||
assert dtrange_match == [('>=', 'today'), ('=', '1984-01-01')]
|
||||
assert dtrange_match == [(">=", "today"), ("=", "1984-01-01")]
|
||||
|
||||
dtrange_match = re.findall(DateFilter().date_regex, 'dt<"multi word date" multi word tail')
|
||||
assert dtrange_match == [('<', 'multi word date')]
|
||||
assert dtrange_match == [("<", "multi word date")]
|
||||
|
||||
dtrange_match = re.findall(DateFilter().date_regex, 'head dt<="multi word date"')
|
||||
assert dtrange_match == [('<=', 'multi word date')]
|
||||
assert dtrange_match == [("<=", "multi word date")]
|
||||
|
||||
dtrange_match = re.findall(DateFilter().date_regex, 'head tail')
|
||||
assert dtrange_match == []
|
||||
dtrange_match = re.findall(DateFilter().date_regex, "head tail")
|
||||
assert dtrange_match == []
|
||||
|
||||
@@ -7,7 +7,7 @@ def test_no_file_filter():
|
||||
# Arrange
|
||||
file_filter = FileFilter()
|
||||
entries = arrange_content()
|
||||
q_with_no_filter = 'head tail'
|
||||
q_with_no_filter = "head tail"
|
||||
|
||||
# Act
|
||||
can_filter = file_filter.can_filter(q_with_no_filter)
|
||||
@@ -15,7 +15,7 @@ def test_no_file_filter():
|
||||
|
||||
# Assert
|
||||
assert can_filter == False
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {0, 1, 2, 3}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ def test_file_filter_with_non_existent_file():
|
||||
|
||||
# Assert
|
||||
assert can_filter == True
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ def test_single_file_filter():
|
||||
|
||||
# Assert
|
||||
assert can_filter == True
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {0, 2}
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ def test_file_filter_with_partial_match():
|
||||
|
||||
# Assert
|
||||
assert can_filter == True
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {0, 2}
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ def test_file_filter_with_regex_match():
|
||||
|
||||
# Assert
|
||||
assert can_filter == True
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {0, 1, 2, 3}
|
||||
|
||||
|
||||
@@ -95,16 +95,16 @@ def test_multiple_file_filter():
|
||||
|
||||
# Assert
|
||||
assert can_filter == True
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {0, 1, 2, 3}
|
||||
|
||||
|
||||
def arrange_content():
|
||||
entries = [
|
||||
Entry(compiled='', raw='First Entry', file= 'file 1.org'),
|
||||
Entry(compiled='', raw='Second Entry', file= 'file2.org'),
|
||||
Entry(compiled='', raw='Third Entry', file= 'file 1.org'),
|
||||
Entry(compiled='', raw='Fourth Entry', file= 'file2.org')
|
||||
Entry(compiled="", raw="First Entry", file="file 1.org"),
|
||||
Entry(compiled="", raw="Second Entry", file="file2.org"),
|
||||
Entry(compiled="", raw="Third Entry", file="file 1.org"),
|
||||
Entry(compiled="", raw="Fourth Entry", file="file2.org"),
|
||||
]
|
||||
|
||||
return entries
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from khoj.utils import helpers
|
||||
|
||||
|
||||
def test_get_from_null_dict():
|
||||
# null handling
|
||||
assert helpers.get_from_dict(dict()) == dict()
|
||||
@@ -7,39 +8,39 @@ def test_get_from_null_dict():
|
||||
|
||||
# key present in nested dictionary
|
||||
# 1-level dictionary
|
||||
assert helpers.get_from_dict({'a': 1, 'b': 2}, 'a') == 1
|
||||
assert helpers.get_from_dict({'a': 1, 'b': 2}, 'c') == None
|
||||
assert helpers.get_from_dict({"a": 1, "b": 2}, "a") == 1
|
||||
assert helpers.get_from_dict({"a": 1, "b": 2}, "c") == None
|
||||
|
||||
# 2-level dictionary
|
||||
assert helpers.get_from_dict({'a': {'a_a': 1}, 'b': 2}, 'a') == {'a_a': 1}
|
||||
assert helpers.get_from_dict({'a': {'a_a': 1}, 'b': 2}, 'a', 'a_a') == 1
|
||||
assert helpers.get_from_dict({"a": {"a_a": 1}, "b": 2}, "a") == {"a_a": 1}
|
||||
assert helpers.get_from_dict({"a": {"a_a": 1}, "b": 2}, "a", "a_a") == 1
|
||||
|
||||
# key not present in nested dictionary
|
||||
# 2-level_dictionary
|
||||
assert helpers.get_from_dict({'a': {'a_a': 1}, 'b': 2}, 'b', 'b_a') == None
|
||||
assert helpers.get_from_dict({"a": {"a_a": 1}, "b": 2}, "b", "b_a") == None
|
||||
|
||||
|
||||
def test_merge_dicts():
|
||||
# basic merge of dicts with non-overlapping keys
|
||||
assert helpers.merge_dicts(priority_dict={'a': 1}, default_dict={'b': 2}) == {'a': 1, 'b': 2}
|
||||
assert helpers.merge_dicts(priority_dict={"a": 1}, default_dict={"b": 2}) == {"a": 1, "b": 2}
|
||||
|
||||
# use default dict items when not present in priority dict
|
||||
assert helpers.merge_dicts(priority_dict={}, default_dict={'b': 2}) == {'b': 2}
|
||||
assert helpers.merge_dicts(priority_dict={}, default_dict={"b": 2}) == {"b": 2}
|
||||
|
||||
# do not override existing key in priority_dict with default dict
|
||||
assert helpers.merge_dicts(priority_dict={'a': 1}, default_dict={'a': 2}) == {'a': 1}
|
||||
assert helpers.merge_dicts(priority_dict={"a": 1}, default_dict={"a": 2}) == {"a": 1}
|
||||
|
||||
|
||||
def test_lru_cache():
|
||||
# Test initializing cache
|
||||
cache = helpers.LRU({'a': 1, 'b': 2}, capacity=2)
|
||||
assert cache == {'a': 1, 'b': 2}
|
||||
cache = helpers.LRU({"a": 1, "b": 2}, capacity=2)
|
||||
assert cache == {"a": 1, "b": 2}
|
||||
|
||||
# Test capacity overflow
|
||||
cache['c'] = 3
|
||||
assert cache == {'b': 2, 'c': 3}
|
||||
cache["c"] = 3
|
||||
assert cache == {"b": 2, "c": 3}
|
||||
|
||||
# Test delete least recently used item from LRU cache on capacity overflow
|
||||
cache['b'] # accessing 'b' makes it the most recently used item
|
||||
cache['d'] = 4 # so 'c' is deleted from the cache instead of 'b'
|
||||
assert cache == {'b': 2, 'd': 4}
|
||||
cache["b"] # accessing 'b' makes it the most recently used item
|
||||
cache["d"] = 4 # so 'c' is deleted from the cache instead of 'b'
|
||||
assert cache == {"b": 2, "d": 4}
|
||||
|
||||
@@ -30,7 +30,8 @@ def test_image_metadata(content_config: ContentConfig):
|
||||
expected_metadata_image_name_pairs = [
|
||||
(["Billi Ka Bacha.", "Cat", "Grass"], "kitten_park.jpg"),
|
||||
(["Pasture.", "Horse", "Dog"], "horse_dog.jpg"),
|
||||
(["Guinea Pig Eating Celery.", "Rodent", "Whiskers"], "guineapig_grass.jpg")]
|
||||
(["Guinea Pig Eating Celery.", "Rodent", "Whiskers"], "guineapig_grass.jpg"),
|
||||
]
|
||||
|
||||
test_image_paths = [
|
||||
Path(content_config.image.input_directories[0] / image_name[1])
|
||||
@@ -51,23 +52,23 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
|
||||
# Arrange
|
||||
output_directory = resolve_absolute_path(web_directory)
|
||||
model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
|
||||
query_expected_image_pairs = [("kitten", "kitten_park.jpg"),
|
||||
("horse and dog in a farm", "horse_dog.jpg"),
|
||||
("A guinea pig eating grass", "guineapig_grass.jpg")]
|
||||
query_expected_image_pairs = [
|
||||
("kitten", "kitten_park.jpg"),
|
||||
("horse and dog in a farm", "horse_dog.jpg"),
|
||||
("A guinea pig eating grass", "guineapig_grass.jpg"),
|
||||
]
|
||||
|
||||
# Act
|
||||
for query, expected_image_name in query_expected_image_pairs:
|
||||
hits = image_search.query(
|
||||
query,
|
||||
count = 1,
|
||||
model = model.image_search)
|
||||
hits = image_search.query(query, count=1, model=model.image_search)
|
||||
|
||||
results = image_search.collate_results(
|
||||
hits,
|
||||
model.image_search.image_names,
|
||||
output_directory=output_directory,
|
||||
image_files_url='/static/images',
|
||||
count=1)
|
||||
image_files_url="/static/images",
|
||||
count=1,
|
||||
)
|
||||
|
||||
actual_image_path = output_directory.joinpath(Path(results[0].entry).name)
|
||||
actual_image = Image.open(actual_image_path)
|
||||
@@ -86,16 +87,13 @@ def test_image_search_query_truncated(content_config: ContentConfig, search_conf
|
||||
# Arrange
|
||||
model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
|
||||
max_words_supported = 10
|
||||
query = " ".join(["hello"]*100)
|
||||
truncated_query = " ".join(["hello"]*max_words_supported)
|
||||
query = " ".join(["hello"] * 100)
|
||||
truncated_query = " ".join(["hello"] * max_words_supported)
|
||||
|
||||
# Act
|
||||
try:
|
||||
with caplog.at_level(logging.INFO, logger="khoj.search_type.image_search"):
|
||||
image_search.query(
|
||||
query,
|
||||
count = 1,
|
||||
model = model.image_search)
|
||||
image_search.query(query, count=1, model=model.image_search)
|
||||
# Assert
|
||||
except RuntimeError as e:
|
||||
if "The size of tensor a (102) must match the size of tensor b (77)" in str(e):
|
||||
@@ -115,17 +113,15 @@ def test_image_search_by_filepath(content_config: ContentConfig, search_config:
|
||||
|
||||
# Act
|
||||
with caplog.at_level(logging.INFO, logger="khoj.search_type.image_search"):
|
||||
hits = image_search.query(
|
||||
query,
|
||||
count = 1,
|
||||
model = model.image_search)
|
||||
hits = image_search.query(query, count=1, model=model.image_search)
|
||||
|
||||
results = image_search.collate_results(
|
||||
hits,
|
||||
model.image_search.image_names,
|
||||
output_directory=output_directory,
|
||||
image_files_url='/static/images',
|
||||
count=1)
|
||||
image_files_url="/static/images",
|
||||
count=1,
|
||||
)
|
||||
|
||||
actual_image_path = output_directory.joinpath(Path(results[0].entry).name)
|
||||
actual_image = Image.open(actual_image_path)
|
||||
@@ -133,7 +129,9 @@ def test_image_search_by_filepath(content_config: ContentConfig, search_config:
|
||||
|
||||
# Assert
|
||||
# Ensure file search triggered instead of query with file path as string
|
||||
assert f"Find Images by Image: {resolve_absolute_path(expected_image_path)}" in caplog.text, "File search not triggered"
|
||||
assert (
|
||||
f"Find Images by Image: {resolve_absolute_path(expected_image_path)}" in caplog.text
|
||||
), "File search not triggered"
|
||||
# Ensure the correct image is returned
|
||||
assert expected_image == actual_image, "Incorrect image returned by file search"
|
||||
|
||||
|
||||
@@ -8,10 +8,10 @@ from khoj.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
|
||||
def test_markdown_file_with_no_headings_to_jsonl(tmp_path):
|
||||
"Convert files with no heading to jsonl."
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
- Bullet point 1
|
||||
- Bullet point 2
|
||||
'''
|
||||
"""
|
||||
markdownfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -20,7 +20,8 @@ def test_markdown_file_with_no_headings_to_jsonl(tmp_path):
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(
|
||||
MarkdownToJsonl.convert_markdown_entries_to_maps(entry_nodes, file_to_entries))
|
||||
MarkdownToJsonl.convert_markdown_entries_to_maps(entry_nodes, file_to_entries)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
@@ -30,10 +31,10 @@ def test_markdown_file_with_no_headings_to_jsonl(tmp_path):
|
||||
def test_single_markdown_entry_to_jsonl(tmp_path):
|
||||
"Convert markdown entry from single file to jsonl."
|
||||
# Arrange
|
||||
entry = f'''### Heading
|
||||
entry = f"""### Heading
|
||||
\t\r
|
||||
Body Line 1
|
||||
'''
|
||||
"""
|
||||
markdownfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -42,7 +43,8 @@ def test_single_markdown_entry_to_jsonl(tmp_path):
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(
|
||||
MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map))
|
||||
MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
@@ -52,14 +54,14 @@ def test_single_markdown_entry_to_jsonl(tmp_path):
|
||||
def test_multiple_markdown_entries_to_jsonl(tmp_path):
|
||||
"Convert multiple markdown entries from single file to jsonl."
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
### Heading 1
|
||||
\t\r
|
||||
Heading 1 Body Line 1
|
||||
### Heading 2
|
||||
\t\r
|
||||
Heading 2 Body Line 2
|
||||
'''
|
||||
"""
|
||||
markdownfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -68,7 +70,8 @@ def test_multiple_markdown_entries_to_jsonl(tmp_path):
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(
|
||||
MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map))
|
||||
MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
@@ -92,8 +95,8 @@ def test_get_markdown_files(tmp_path):
|
||||
expected_files = sorted(map(str, [group1_file1, group1_file2, group2_file1, group2_file2, file1]))
|
||||
|
||||
# Setup input-files, input-filters
|
||||
input_files = [tmp_path / 'notes.md']
|
||||
input_filter = [tmp_path / 'group1*.md', tmp_path / 'group2*.markdown']
|
||||
input_files = [tmp_path / "notes.md"]
|
||||
input_filter = [tmp_path / "group1*.md", tmp_path / "group2*.markdown"]
|
||||
|
||||
# Act
|
||||
extracted_org_files = MarkdownToJsonl.get_markdown_files(input_files, input_filter)
|
||||
@@ -106,10 +109,10 @@ def test_get_markdown_files(tmp_path):
|
||||
def test_extract_entries_with_different_level_headings(tmp_path):
|
||||
"Extract markdown entries with different level headings."
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
# Heading 1
|
||||
## Heading 2
|
||||
'''
|
||||
"""
|
||||
markdownfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
|
||||
@@ -9,23 +9,25 @@ from khoj.utils.rawconfig import Entry
|
||||
|
||||
|
||||
def test_configure_heading_entry_to_jsonl(tmp_path):
|
||||
'''Ensure entries with empty body are ignored, unless explicitly configured to index heading entries.
|
||||
Property drawers not considered Body. Ignore control characters for evaluating if Body empty.'''
|
||||
"""Ensure entries with empty body are ignored, unless explicitly configured to index heading entries.
|
||||
Property drawers not considered Body. Ignore control characters for evaluating if Body empty."""
|
||||
# Arrange
|
||||
entry = f'''*** Heading
|
||||
entry = f"""*** Heading
|
||||
:PROPERTIES:
|
||||
:ID: 42-42-42
|
||||
:END:
|
||||
\t \r
|
||||
'''
|
||||
"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
for index_heading_entries in [True, False]:
|
||||
# Act
|
||||
# Extract entries into jsonl from specified Org files
|
||||
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(OrgToJsonl.convert_org_nodes_to_entries(
|
||||
*OrgToJsonl.extract_org_entries(org_files=[orgfile]),
|
||||
index_heading_entries=index_heading_entries))
|
||||
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
|
||||
OrgToJsonl.convert_org_nodes_to_entries(
|
||||
*OrgToJsonl.extract_org_entries(org_files=[orgfile]), index_heading_entries=index_heading_entries
|
||||
)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
@@ -40,10 +42,10 @@ def test_configure_heading_entry_to_jsonl(tmp_path):
|
||||
def test_entry_split_when_exceeds_max_words(tmp_path):
|
||||
"Ensure entries with compiled words exceeding max_words are split."
|
||||
# Arrange
|
||||
entry = f'''*** Heading
|
||||
entry = f"""*** Heading
|
||||
\t\r
|
||||
Body Line 1
|
||||
'''
|
||||
"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -53,9 +55,9 @@ def test_entry_split_when_exceeds_max_words(tmp_path):
|
||||
# Split each entry from specified Org files by max words
|
||||
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
|
||||
TextToJsonl.split_entries_by_max_tokens(
|
||||
OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map),
|
||||
max_tokens = 2)
|
||||
OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=2
|
||||
)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
@@ -65,15 +67,15 @@ def test_entry_split_when_exceeds_max_words(tmp_path):
|
||||
def test_entry_split_drops_large_words(tmp_path):
|
||||
"Ensure entries drops words larger than specified max word length from compiled version."
|
||||
# Arrange
|
||||
entry_text = f'''*** Heading
|
||||
entry_text = f"""*** Heading
|
||||
\t\r
|
||||
Body Line 1
|
||||
'''
|
||||
"""
|
||||
entry = Entry(raw=entry_text, compiled=entry_text)
|
||||
|
||||
# Act
|
||||
# Split entry by max words and drop words larger than max word length
|
||||
processed_entry = TextToJsonl.split_entries_by_max_tokens([entry], max_word_length = 5)[0]
|
||||
processed_entry = TextToJsonl.split_entries_by_max_tokens([entry], max_word_length=5)[0]
|
||||
|
||||
# Assert
|
||||
# "Heading" dropped from compiled version because its over the set max word limit
|
||||
@@ -83,13 +85,13 @@ def test_entry_split_drops_large_words(tmp_path):
|
||||
def test_entry_with_body_to_jsonl(tmp_path):
|
||||
"Ensure entries with valid body text are loaded."
|
||||
# Arrange
|
||||
entry = f'''*** Heading
|
||||
entry = f"""*** Heading
|
||||
:PROPERTIES:
|
||||
:ID: 42-42-42
|
||||
:END:
|
||||
\t\r
|
||||
Body Line 1
|
||||
'''
|
||||
"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -97,7 +99,9 @@ def test_entry_with_body_to_jsonl(tmp_path):
|
||||
entries, entry_to_file_map = OrgToJsonl.extract_org_entries(org_files=[orgfile])
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map))
|
||||
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
|
||||
OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
@@ -107,10 +111,10 @@ def test_entry_with_body_to_jsonl(tmp_path):
|
||||
def test_file_with_no_headings_to_jsonl(tmp_path):
|
||||
"Ensure files with no heading, only body text are loaded."
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
- Bullet point 1
|
||||
- Bullet point 2
|
||||
'''
|
||||
"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -120,7 +124,7 @@ def test_file_with_no_headings_to_jsonl(tmp_path):
|
||||
# Process Each Entry from All Notes Files
|
||||
entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
|
||||
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
assert len(jsonl_data) == 1
|
||||
@@ -143,8 +147,8 @@ def test_get_org_files(tmp_path):
|
||||
expected_files = sorted(map(str, [group1_file1, group1_file2, group2_file1, group2_file2, orgfile1]))
|
||||
|
||||
# Setup input-files, input-filters
|
||||
input_files = [tmp_path / 'orgfile1.org']
|
||||
input_filter = [tmp_path / 'group1*.org', tmp_path / 'group2*.org']
|
||||
input_files = [tmp_path / "orgfile1.org"]
|
||||
input_filter = [tmp_path / "group1*.org", tmp_path / "group2*.org"]
|
||||
|
||||
# Act
|
||||
extracted_org_files = OrgToJsonl.get_org_files(input_files, input_filter)
|
||||
@@ -157,10 +161,10 @@ def test_get_org_files(tmp_path):
|
||||
def test_extract_entries_with_different_level_headings(tmp_path):
|
||||
"Extract org entries with different level headings."
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
* Heading 1
|
||||
** Heading 2
|
||||
'''
|
||||
"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -169,8 +173,8 @@ def test_extract_entries_with_different_level_headings(tmp_path):
|
||||
|
||||
# Assert
|
||||
assert len(entries) == 2
|
||||
assert f'{entries[0]}'.startswith("* Heading 1")
|
||||
assert f'{entries[1]}'.startswith("** Heading 2")
|
||||
assert f"{entries[0]}".startswith("* Heading 1")
|
||||
assert f"{entries[1]}".startswith("** Heading 2")
|
||||
|
||||
|
||||
# Helper Functions
|
||||
|
||||
@@ -10,7 +10,7 @@ from khoj.processor.org_mode import orgnode
|
||||
def test_parse_entry_with_no_headings(tmp_path):
|
||||
"Test parsing of entry with minimal fields"
|
||||
# Arrange
|
||||
entry = f'''Body Line 1'''
|
||||
entry = f"""Body Line 1"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -18,7 +18,7 @@ def test_parse_entry_with_no_headings(tmp_path):
|
||||
|
||||
# Assert
|
||||
assert len(entries) == 1
|
||||
assert entries[0].heading == f'{orgfile}'
|
||||
assert entries[0].heading == f"{orgfile}"
|
||||
assert entries[0].tags == list()
|
||||
assert entries[0].body == "Body Line 1"
|
||||
assert entries[0].priority == ""
|
||||
@@ -32,9 +32,9 @@ def test_parse_entry_with_no_headings(tmp_path):
|
||||
def test_parse_minimal_entry(tmp_path):
|
||||
"Test parsing of entry with minimal fields"
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
* Heading
|
||||
Body Line 1'''
|
||||
Body Line 1"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -56,7 +56,7 @@ Body Line 1'''
|
||||
def test_parse_complete_entry(tmp_path):
|
||||
"Test parsing of entry with all important fields"
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
*** DONE [#A] Heading :Tag1:TAG2:tag3:
|
||||
CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
|
||||
:PROPERTIES:
|
||||
@@ -67,7 +67,7 @@ CLOCK: [1984-04-01 Sun 09:00]--[1984-04-01 Sun 12:00] => 3:00
|
||||
- Clocked Log 1
|
||||
:END:
|
||||
Body Line 1
|
||||
Body Line 2'''
|
||||
Body Line 2"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -81,45 +81,45 @@ Body Line 2'''
|
||||
assert entries[0].body == "- Clocked Log 1\nBody Line 1\nBody Line 2"
|
||||
assert entries[0].priority == "A"
|
||||
assert entries[0].Property("ID") == "id:123-456-789-4234-1231"
|
||||
assert entries[0].closed == datetime.date(1984,4,1)
|
||||
assert entries[0].scheduled == datetime.date(1984,4,1)
|
||||
assert entries[0].deadline == datetime.date(1984,4,1)
|
||||
assert entries[0].logbook == [(datetime.datetime(1984,4,1,9,0,0), datetime.datetime(1984,4,1,12,0,0))]
|
||||
assert entries[0].closed == datetime.date(1984, 4, 1)
|
||||
assert entries[0].scheduled == datetime.date(1984, 4, 1)
|
||||
assert entries[0].deadline == datetime.date(1984, 4, 1)
|
||||
assert entries[0].logbook == [(datetime.datetime(1984, 4, 1, 9, 0, 0), datetime.datetime(1984, 4, 1, 12, 0, 0))]
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_render_entry_with_property_drawer_and_empty_body(tmp_path):
|
||||
"Render heading entry with property drawer"
|
||||
# Arrange
|
||||
entry_to_render = f'''
|
||||
entry_to_render = f"""
|
||||
*** [#A] Heading1 :tag1:
|
||||
:PROPERTIES:
|
||||
:ID: 111-111-111-1111-1111
|
||||
:END:
|
||||
\t\r \n
|
||||
'''
|
||||
"""
|
||||
orgfile = create_file(tmp_path, entry_to_render)
|
||||
|
||||
expected_entry = f'''*** [#A] Heading1 :tag1:
|
||||
expected_entry = f"""*** [#A] Heading1 :tag1:
|
||||
:PROPERTIES:
|
||||
:LINE: file:{orgfile}::2
|
||||
:ID: id:111-111-111-1111-1111
|
||||
:SOURCE: [[file:{orgfile}::*Heading1]]
|
||||
:END:
|
||||
'''
|
||||
"""
|
||||
|
||||
# Act
|
||||
parsed_entries = orgnode.makelist(orgfile)
|
||||
|
||||
# Assert
|
||||
assert f'{parsed_entries[0]}' == expected_entry
|
||||
assert f"{parsed_entries[0]}" == expected_entry
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_all_links_to_entry_rendered(tmp_path):
|
||||
"Ensure all links to entry rendered in property drawer from entry"
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
*** [#A] Heading :tag1:
|
||||
:PROPERTIES:
|
||||
:ID: 123-456-789-4234-1231
|
||||
@@ -127,7 +127,7 @@ def test_all_links_to_entry_rendered(tmp_path):
|
||||
Body Line 1
|
||||
*** Heading2
|
||||
Body Line 2
|
||||
'''
|
||||
"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -135,23 +135,23 @@ Body Line 2
|
||||
|
||||
# Assert
|
||||
# SOURCE link rendered with Heading
|
||||
assert f':SOURCE: [[file:{orgfile}::*{entries[0].heading}]]' in f'{entries[0]}'
|
||||
assert f":SOURCE: [[file:{orgfile}::*{entries[0].heading}]]" in f"{entries[0]}"
|
||||
# ID link rendered with ID
|
||||
assert f':ID: id:123-456-789-4234-1231' in f'{entries[0]}'
|
||||
assert f":ID: id:123-456-789-4234-1231" in f"{entries[0]}"
|
||||
# LINE link rendered with line number
|
||||
assert f':LINE: file:{orgfile}::2' in f'{entries[0]}'
|
||||
assert f":LINE: file:{orgfile}::2" in f"{entries[0]}"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_source_link_to_entry_escaped_for_rendering(tmp_path):
|
||||
"Test SOURCE link renders with square brackets in filename, heading escaped for org-mode rendering"
|
||||
# Arrange
|
||||
entry = f'''
|
||||
entry = f"""
|
||||
*** [#A] Heading[1] :tag1:
|
||||
:PROPERTIES:
|
||||
:ID: 123-456-789-4234-1231
|
||||
:END:
|
||||
Body Line 1'''
|
||||
Body Line 1"""
|
||||
orgfile = create_file(tmp_path, entry, filename="test[1].org")
|
||||
|
||||
# Act
|
||||
@@ -162,15 +162,15 @@ Body Line 1'''
|
||||
# parsed heading from entry
|
||||
assert entries[0].heading == "Heading[1]"
|
||||
# ensure SOURCE link has square brackets in filename, heading escaped in rendered entries
|
||||
escaped_orgfile = f'{orgfile}'.replace("[1]", "\\[1\\]")
|
||||
assert f':SOURCE: [[file:{escaped_orgfile}::*Heading\[1\]' in f'{entries[0]}'
|
||||
escaped_orgfile = f"{orgfile}".replace("[1]", "\\[1\\]")
|
||||
assert f":SOURCE: [[file:{escaped_orgfile}::*Heading\[1\]" in f"{entries[0]}"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_parse_multiple_entries(tmp_path):
|
||||
"Test parsing of multiple entries"
|
||||
# Arrange
|
||||
content = f'''
|
||||
content = f"""
|
||||
*** FAILED [#A] Heading1 :tag1:
|
||||
CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
|
||||
:PROPERTIES:
|
||||
@@ -193,7 +193,7 @@ CLOCK: [1984-04-02 Mon 09:00]--[1984-04-02 Mon 12:00] => 3:00
|
||||
:END:
|
||||
Body 2
|
||||
|
||||
'''
|
||||
"""
|
||||
orgfile = create_file(tmp_path, content)
|
||||
|
||||
# Act
|
||||
@@ -208,18 +208,20 @@ Body 2
|
||||
assert entry.body == f"- Clocked Log {index+1}\nBody {index+1}\n\n"
|
||||
assert entry.priority == "A"
|
||||
assert entry.Property("ID") == f"id:123-456-789-4234-000{index+1}"
|
||||
assert entry.closed == datetime.date(1984,4,index+1)
|
||||
assert entry.scheduled == datetime.date(1984,4,index+1)
|
||||
assert entry.deadline == datetime.date(1984,4,index+1)
|
||||
assert entry.logbook == [(datetime.datetime(1984,4,index+1,9,0,0), datetime.datetime(1984,4,index+1,12,0,0))]
|
||||
assert entry.closed == datetime.date(1984, 4, index + 1)
|
||||
assert entry.scheduled == datetime.date(1984, 4, index + 1)
|
||||
assert entry.deadline == datetime.date(1984, 4, index + 1)
|
||||
assert entry.logbook == [
|
||||
(datetime.datetime(1984, 4, index + 1, 9, 0, 0), datetime.datetime(1984, 4, index + 1, 12, 0, 0))
|
||||
]
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_parse_entry_with_empty_title(tmp_path):
|
||||
"Test parsing of entry with minimal fields"
|
||||
# Arrange
|
||||
entry = f'''#+TITLE:
|
||||
Body Line 1'''
|
||||
entry = f"""#+TITLE:
|
||||
Body Line 1"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -227,7 +229,7 @@ Body Line 1'''
|
||||
|
||||
# Assert
|
||||
assert len(entries) == 1
|
||||
assert entries[0].heading == f'{orgfile}'
|
||||
assert entries[0].heading == f"{orgfile}"
|
||||
assert entries[0].tags == list()
|
||||
assert entries[0].body == "Body Line 1"
|
||||
assert entries[0].priority == ""
|
||||
@@ -241,8 +243,8 @@ Body Line 1'''
|
||||
def test_parse_entry_with_title_and_no_headings(tmp_path):
|
||||
"Test parsing of entry with minimal fields"
|
||||
# Arrange
|
||||
entry = f'''#+TITLE: test
|
||||
Body Line 1'''
|
||||
entry = f"""#+TITLE: test
|
||||
Body Line 1"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -250,7 +252,7 @@ Body Line 1'''
|
||||
|
||||
# Assert
|
||||
assert len(entries) == 1
|
||||
assert entries[0].heading == 'test'
|
||||
assert entries[0].heading == "test"
|
||||
assert entries[0].tags == list()
|
||||
assert entries[0].body == "Body Line 1"
|
||||
assert entries[0].priority == ""
|
||||
@@ -264,9 +266,9 @@ Body Line 1'''
|
||||
def test_parse_entry_with_multiple_titles_and_no_headings(tmp_path):
|
||||
"Test parsing of entry with minimal fields"
|
||||
# Arrange
|
||||
entry = f'''#+TITLE: title1
|
||||
entry = f"""#+TITLE: title1
|
||||
Body Line 1
|
||||
#+TITLE: title2 '''
|
||||
#+TITLE: title2 """
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -274,7 +276,7 @@ Body Line 1
|
||||
|
||||
# Assert
|
||||
assert len(entries) == 1
|
||||
assert entries[0].heading == 'title1 title2'
|
||||
assert entries[0].heading == "title1 title2"
|
||||
assert entries[0].tags == list()
|
||||
assert entries[0].body == "Body Line 1\n"
|
||||
assert entries[0].priority == ""
|
||||
|
||||
@@ -14,7 +14,9 @@ from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
|
||||
|
||||
# Test
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_asymmetric_setup_with_missing_file_raises_error(org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig):
|
||||
def test_asymmetric_setup_with_missing_file_raises_error(
|
||||
org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
|
||||
):
|
||||
# Arrange
|
||||
# Ensure file mentioned in org.input-files is missing
|
||||
single_new_file = Path(org_config_with_only_new_file.input_files[0])
|
||||
@@ -27,10 +29,12 @@ def test_asymmetric_setup_with_missing_file_raises_error(org_config_with_only_ne
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_asymmetric_setup_with_empty_file_raises_error(org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig):
|
||||
def test_asymmetric_setup_with_empty_file_raises_error(
|
||||
org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
|
||||
):
|
||||
# Act
|
||||
# Generate notes embeddings during asymmetric setup
|
||||
with pytest.raises(ValueError, match=r'^No valid entries found*'):
|
||||
with pytest.raises(ValueError, match=r"^No valid entries found*"):
|
||||
text_search.setup(OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=True)
|
||||
|
||||
|
||||
@@ -52,15 +56,9 @@ def test_asymmetric_search(content_config: ContentConfig, search_config: SearchC
|
||||
query = "How to git install application?"
|
||||
|
||||
# Act
|
||||
hits, entries = text_search.query(
|
||||
query,
|
||||
model = model.notes_search,
|
||||
rank_results=True)
|
||||
hits, entries = text_search.query(query, model=model.notes_search, rank_results=True)
|
||||
|
||||
results = text_search.collate_results(
|
||||
hits,
|
||||
entries,
|
||||
count=1)
|
||||
results = text_search.collate_results(hits, entries, count=1)
|
||||
|
||||
# Assert
|
||||
# Actual_data should contain "Khoj via Emacs" entry
|
||||
@@ -76,12 +74,14 @@ def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: TextContent
|
||||
new_file_to_index = Path(org_config_with_only_new_file.input_files[0])
|
||||
with open(new_file_to_index, "w") as f:
|
||||
f.write(f"* Entry more than {max_tokens} words\n")
|
||||
for index in range(max_tokens+1):
|
||||
for index in range(max_tokens + 1):
|
||||
f.write(f"{index} ")
|
||||
|
||||
# Act
|
||||
# reload embeddings, entries, notes model after adding new org-mode file
|
||||
initial_notes_model = text_search.setup(OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=False)
|
||||
initial_notes_model = text_search.setup(
|
||||
OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=False
|
||||
)
|
||||
|
||||
# Assert
|
||||
# verify newly added org-mode entry is split by max tokens
|
||||
@@ -92,18 +92,20 @@ def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: TextContent
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchConfig, new_org_file: Path):
|
||||
# Arrange
|
||||
initial_notes_model= text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
|
||||
initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
|
||||
|
||||
assert len(initial_notes_model.entries) == 10
|
||||
assert len(initial_notes_model.corpus_embeddings) == 10
|
||||
|
||||
# append org-mode entry to first org input file in config
|
||||
content_config.org.input_files = [f'{new_org_file}']
|
||||
content_config.org.input_files = [f"{new_org_file}"]
|
||||
with open(new_org_file, "w") as f:
|
||||
f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n")
|
||||
|
||||
# regenerate notes jsonl, model embeddings and model to include entry from new file
|
||||
regenerated_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
|
||||
regenerated_notes_model = text_search.setup(
|
||||
OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True
|
||||
)
|
||||
|
||||
# Act
|
||||
# reload embeddings, entries, notes model from previously generated notes jsonl and model embeddings files
|
||||
@@ -137,7 +139,7 @@ def test_incremental_update(content_config: ContentConfig, search_config: Search
|
||||
|
||||
# Act
|
||||
# update embeddings, entries with the newly added note
|
||||
content_config.org.input_files = [f'{new_org_file}']
|
||||
content_config.org.input_files = [f"{new_org_file}"]
|
||||
initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False)
|
||||
|
||||
# Assert
|
||||
|
||||
@@ -7,7 +7,7 @@ def test_no_word_filter():
|
||||
# Arrange
|
||||
word_filter = WordFilter()
|
||||
entries = arrange_content()
|
||||
q_with_no_filter = 'head tail'
|
||||
q_with_no_filter = "head tail"
|
||||
|
||||
# Act
|
||||
can_filter = word_filter.can_filter(q_with_no_filter)
|
||||
@@ -15,7 +15,7 @@ def test_no_word_filter():
|
||||
|
||||
# Assert
|
||||
assert can_filter == False
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {0, 1, 2, 3}
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ def test_word_exclude_filter():
|
||||
|
||||
# Assert
|
||||
assert can_filter == True
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {0, 2}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ def test_word_include_filter():
|
||||
|
||||
# Assert
|
||||
assert can_filter == True
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {2, 3}
|
||||
|
||||
|
||||
@@ -63,16 +63,16 @@ def test_word_include_and_exclude_filter():
|
||||
|
||||
# Assert
|
||||
assert can_filter == True
|
||||
assert ret_query == 'head tail'
|
||||
assert ret_query == "head tail"
|
||||
assert entry_indices == {2}
|
||||
|
||||
|
||||
def arrange_content():
|
||||
entries = [
|
||||
Entry(compiled='', raw='Minimal Entry'),
|
||||
Entry(compiled='', raw='Entry with exclude_word'),
|
||||
Entry(compiled='', raw='Entry with include_word'),
|
||||
Entry(compiled='', raw='Entry with include_word and exclude_word')
|
||||
Entry(compiled="", raw="Minimal Entry"),
|
||||
Entry(compiled="", raw="Entry with exclude_word"),
|
||||
Entry(compiled="", raw="Entry with include_word"),
|
||||
Entry(compiled="", raw="Entry with include_word and exclude_word"),
|
||||
]
|
||||
|
||||
return entries
|
||||
|
||||
Reference in New Issue
Block a user