diff --git a/docs/index.html b/docs/index.html
index 763d65dd..5c1d3466 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -5,6 +5,15 @@
Document
+
+
+
+
+
+
+
+
+
diff --git a/src/database/migrations/0019_alter_googleuser_family_name_and_more.py b/src/database/migrations/0019_alter_googleuser_family_name_and_more.py
new file mode 100644
index 00000000..bb31adcb
--- /dev/null
+++ b/src/database/migrations/0019_alter_googleuser_family_name_and_more.py
@@ -0,0 +1,27 @@
+# Generated by Django 4.2.7 on 2023-11-19 22:20
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("database", "0018_searchmodelconfig_delete_searchmodel"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="googleuser",
+ name="family_name",
+ field=models.CharField(blank=True, default=None, max_length=200, null=True),
+ ),
+ migrations.AlterField(
+ model_name="googleuser",
+ name="given_name",
+ field=models.CharField(blank=True, default=None, max_length=200, null=True),
+ ),
+ migrations.AlterField(
+ model_name="googleuser",
+ name="name",
+ field=models.CharField(blank=True, default=None, max_length=200, null=True),
+ ),
+ ]
diff --git a/src/database/models/__init__.py b/src/database/models/__init__.py
index 92848e5c..92da3e6e 100644
--- a/src/database/models/__init__.py
+++ b/src/database/models/__init__.py
@@ -27,9 +27,9 @@ class GoogleUser(models.Model):
sub = models.CharField(max_length=200)
azp = models.CharField(max_length=200)
email = models.CharField(max_length=200)
- name = models.CharField(max_length=200)
- given_name = models.CharField(max_length=200)
- family_name = models.CharField(max_length=200)
+ name = models.CharField(max_length=200, null=True, default=None, blank=True)
+ given_name = models.CharField(max_length=200, null=True, default=None, blank=True)
+ family_name = models.CharField(max_length=200, null=True, default=None, blank=True)
picture = models.CharField(max_length=200, null=True, default=None)
locale = models.CharField(max_length=200)
diff --git a/src/khoj/processor/data_sources/org_mode/org_to_entries.py b/src/khoj/processor/data_sources/org_mode/org_to_entries.py
index 0aef9b67..b8d118a7 100644
--- a/src/khoj/processor/data_sources/org_mode/org_to_entries.py
+++ b/src/khoj/processor/data_sources/org_mode/org_to_entries.py
@@ -100,16 +100,17 @@ class OrgToEntries(TextToEntries):
continue
todo_str = f"{parsed_entry.todo} " if parsed_entry.todo else ""
- # Prepend filename as top heading to entry
- filename = Path(entry_to_file_map[parsed_entry]).stem
+
+ # Prepend ancestor headings, filename as top heading to entry for context
+ ancestors_trail = " / ".join(parsed_entry.ancestors) or Path(entry_to_file_map[parsed_entry])
if parsed_entry.heading:
- heading = f"* {filename}\n** {todo_str}{parsed_entry.heading}."
+ heading = f"* Path: {ancestors_trail}\n** {todo_str}{parsed_entry.heading}."
else:
- heading = f"* {filename}."
+ heading = f"* Path: {ancestors_trail}."
compiled = heading
if state.verbose > 2:
- logger.debug(f"Title: {parsed_entry.heading}")
+ logger.debug(f"Title: {heading}")
if parsed_entry.tags:
tags_str = " ".join(parsed_entry.tags)
diff --git a/src/khoj/processor/data_sources/org_mode/orgnode.py b/src/khoj/processor/data_sources/org_mode/orgnode.py
index db660ee7..28f55c17 100644
--- a/src/khoj/processor/data_sources/org_mode/orgnode.py
+++ b/src/khoj/processor/data_sources/org_mode/orgnode.py
@@ -80,6 +80,7 @@ def makelist(file, filename):
} # populated from #+SEQ_TODO line
level = ""
heading = ""
+ ancestor_headings = []
bodytext = ""
introtext = ""
tags = list() # set of all tags in headline
@@ -98,7 +99,7 @@ def makelist(file, filename):
heading_search = re.search(r"^(\*+)\s(.*?)\s*$", line)
if heading_search: # we are processing a heading line
if heading: # if we have are on second heading, append first heading to headings list
- thisNode = Orgnode(level, heading, bodytext, tags)
+ thisNode = Orgnode(level, heading, bodytext, tags, ancestor_headings)
if closed_date:
thisNode.closed = closed_date
closed_date = ""
@@ -114,6 +115,8 @@ def makelist(file, filename):
thisNode.properties = property_map
nodelist.append(thisNode)
property_map = {"LINE": f"file:{normalize_filename(filename)}::{ctr}"}
+ previous_level = level
+ previous_heading = heading
level = heading_search.group(1)
heading = heading_search.group(2)
bodytext = ""
@@ -126,6 +129,17 @@ def makelist(file, filename):
for parsedtag in parsedtags.split(":"):
if parsedtag != "":
tags.append(parsedtag)
+
+ # Add previous heading to ancestors if current heading is deeper than previous level
+ if len(level) > len(previous_level) and previous_heading:
+ ancestor_headings.append(previous_heading)
+ # Remove last ancestor(s) if current heading is shallower than previous level
+ elif len(level) < len(previous_level):
+ for _ in range(len(level), len(previous_level)):
+ if not ancestor_headings or len(ancestor_headings) == 0:
+ break
+ ancestor_headings.pop()
+
else: # we are processing a non-heading line
if line[:10] == "#+SEQ_TODO":
kwlist = re.findall(r"([A-Z]+)\(", line)
@@ -216,7 +230,7 @@ def makelist(file, filename):
nodelist = [thisNode] + nodelist
# write out last heading node
if heading:
- thisNode = Orgnode(level, heading, bodytext, tags)
+ thisNode = Orgnode(level, heading, bodytext, tags, ancestor_headings)
thisNode.properties = property_map
if sched_date:
thisNode.scheduled = sched_date
@@ -243,6 +257,9 @@ def makelist(file, filename):
n.priority = priority_search.group(1)
n.heading = priority_search.group(2)
+ # Prefix filepath/title to ancestors
+ n.ancestors = [file_title] + n.ancestors
+
# Set SOURCE property to a file+heading based org-mode link to the entry
if n.level == 0:
n.properties["LINE"] = f"file:{normalize_filename(filename)}::0"
@@ -261,7 +278,7 @@ class Orgnode(object):
with the headline.
"""
- def __init__(self, level, headline, body, tags):
+ def __init__(self, level, headline, body, tags, ancestor_headings=[]):
"""
Create an Orgnode object given the parameters of level (as the
raw asterisks), headline text (including the TODO tag), and
@@ -279,8 +296,21 @@ class Orgnode(object):
self._closed = "" # Closed date
self._properties = dict()
self._logbook = list() # List of clock-in, clock-out tuples representing logbook entries
+ self._ancestor_headings = ancestor_headings.copy()
- # Look for priority in headline and transfer to prty field
+ @property
+ def ancestors(self) -> List[str]:
+ """
+ Return the ancestor headings of the node
+ """
+ return self._ancestor_headings
+
+ @ancestors.setter
+ def ancestors(self, new_ancestors):
+ """
+ Update the ancestor headings of the node
+ """
+ self._ancestor_headings = new_ancestors
@property
def heading(self):
diff --git a/tests/conftest.py b/tests/conftest.py
index d3a27748..669759e5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -384,6 +384,45 @@ def sample_org_data():
def get_sample_data(type):
sample_data = {
"org": {
+ "elisp.org": """
+* Emacs Khoj
+ /An Emacs interface for [[https://github.com/khoj-ai/khoj][khoj]]/
+
+** Requirements
+ - Install and Run [[https://github.com/khoj-ai/khoj][khoj]]
+
+** Installation
+*** Direct
+ - Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp
+ - Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet
+ #+begin_src elisp
+ ;; Khoj Package
+ (use-package khoj
+ :load-path "~/.emacs.d/lisp/khoj.el"
+ :bind ("C-c s" . 'khoj))
+ #+end_src
+
+*** Using [[https://github.com/quelpa/quelpa#installation][Quelpa]]
+ - Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed
+ - Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it.
+ #+begin_src elisp
+ ;; Khoj Package
+ (use-package khoj
+ :quelpa (khoj :fetcher url :url "https://raw.githubusercontent.com/khoj-ai/khoj/master/interface/emacs/khoj.el")
+ :bind ("C-c s" . 'khoj))
+ #+end_src
+
+** Usage
+ 1. Call ~khoj~ using keybinding ~C-c s~ or ~M-x khoj~
+ 2. Enter Query in Natural Language
+ e.g "What is the meaning of life?" "What are my life goals?"
+ 3. Wait for results
+ *Note: It takes about 15s on a Mac M1 and a ~100K lines corpus of org-mode files*
+ 4. (Optional) Narrow down results further
+ Include/Exclude specific words from results by adding to query
+ e.g "What is the meaning of life? -god +none"
+
+""",
"readme.org": """
* Khoj
/Allow natural language search on user content like notes, images using transformer based models/
@@ -399,7 +438,7 @@ def get_sample_data(type):
git clone https://github.com/khoj-ai/khoj && cd khoj
conda env create -f environment.yml
conda activate khoj
- #+end_src"""
+ #+end_src""",
},
"markdown": {
"readme.markdown": """
diff --git a/tests/data/org/interface_emacs_readme.org b/tests/data/org/interface_emacs_readme.org
index 300f1013..ef43b3cc 100644
--- a/tests/data/org/interface_emacs_readme.org
+++ b/tests/data/org/interface_emacs_readme.org
@@ -4,10 +4,9 @@
** Requirements
- Install and Run [[https://github.com/khoj-ai/khoj][khoj]]
-** Installation
- - Direct Install
+** Install
+*** Direct
- Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp
-
- Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet
#+begin_src elisp
;; Khoj Package
@@ -16,7 +15,7 @@
:bind ("C-c s" . 'khoj))
#+end_src
- - Use [[https://github.com/quelpa/quelpa#installation][Quelpa]]
+*** Using [[https://github.com/quelpa/quelpa#installation][Quelpa]]
- Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed
- Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it.
#+begin_src elisp
@@ -28,17 +27,10 @@
** Usage
1. Call ~khoj~ using keybinding ~C-c s~ or ~M-x khoj~
-
2. Enter Query in Natural Language
-
e.g "What is the meaning of life?" "What are my life goals?"
-
3. Wait for results
-
*Note: It takes about 15s on a Mac M1 and a ~100K lines corpus of org-mode files*
-
4. (Optional) Narrow down results further
-
Include/Exclude specific words from results by adding to query
-
e.g "What is the meaning of life? -god +none"
diff --git a/tests/data/org/main_readme.org b/tests/data/org/main_readme.org
index 6495d6ba..d88a2b2b 100644
--- a/tests/data/org/main_readme.org
+++ b/tests/data/org/main_readme.org
@@ -22,16 +22,16 @@
#+end_src
** Use
- - *Khoj via Emacs*
+*** *Khoj via Emacs*
- [[https://github.com/khoj-ai/khoj/tree/master/interface/emacs#installation][Install]] [[./interface/emacs/khoj.el][khoj.el]]
- Run ~M-x khoj ~ or Call ~C-c C-s~
- - *Khoj via API*
+*** *Khoj via API*
- Query: ~GET~ [[http://localhost:42110/api/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:42110/api/search?q="What is the meaning of life"]]
- Update Index: ~GET~ [[http://localhost:42110/api/update][http://localhost:42110/api/update]]
- [[http://localhost:42110/docs][Khoj API Docs]]
- - *Call Khoj via Python Script Directly*
+*** *Call Khoj via Python Script Directly*
#+begin_src shell
python3 search_types/asymmetric.py \
--compressed-jsonl .notes.jsonl.gz \
diff --git a/tests/test_client.py b/tests/test_client.py
index 9c02a05a..52de0c95 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -321,7 +321,7 @@ def test_notes_search_with_include_filter(client, sample_org_data, default_user:
assert response.status_code == 200
# assert actual_data contains word "Emacs"
search_result = response.json()[0]["entry"]
- assert "Emacs" in search_result
+ assert "emacs" in search_result
# ----------------------------------------------------------------------------------------------------
@@ -347,6 +347,27 @@ def test_notes_search_with_exclude_filter(client, sample_org_data, default_user:
assert "clone" not in search_result
+# ----------------------------------------------------------------------------------------------------
+@pytest.mark.django_db(transaction=True)
+def test_notes_search_requires_parent_context(
+ client, search_config: SearchConfig, sample_org_data, default_user: KhojUser
+):
+ # Arrange
+ headers = {"Authorization": "Bearer kk-secret"}
+ text_search.setup(OrgToEntries, sample_org_data, regenerate=False, user=default_user)
+ user_query = quote("Install Khoj on Emacs")
+
+ # Act
+ response = client.get(f"/api/search?q={user_query}&n=1&t=org&r=true&max_distance=0.18", headers=headers)
+
+ # Assert
+ assert response.status_code == 200
+
+ assert len(response.json()) == 1, "Expected only 1 result"
+ search_result = response.json()[0]["entry"]
+ assert "Emacs load path" in search_result, "Expected 'Emacs load path' in search result"
+
+
# ----------------------------------------------------------------------------------------------------
@pytest.mark.django_db(transaction=True)
def test_different_user_data_not_accessed(client, sample_org_data, default_user: KhojUser):
diff --git a/tests/test_multiple_users.py b/tests/test_multiple_users.py
index 2b1eb2f4..94d0560c 100644
--- a/tests/test_multiple_users.py
+++ b/tests/test_multiple_users.py
@@ -69,7 +69,7 @@ def test_index_update_with_user2_inaccessible_user1(client, api_user2: KhojApiUs
# Assert
assert update_response.status_code == 200
- assert len(results) == 4
+ assert len(results) == 5
for result in results:
assert result["additional"]["file"] not in source_file_symbol
diff --git a/tests/test_org_to_entries.py b/tests/test_org_to_entries.py
index 3b80873a..fb37426a 100644
--- a/tests/test_org_to_entries.py
+++ b/tests/test_org_to_entries.py
@@ -45,9 +45,10 @@ def test_configure_heading_entry_to_jsonl(tmp_path):
assert is_none_or_empty(jsonl_data)
-def test_entry_split_when_exceeds_max_words(tmp_path):
+def test_entry_split_when_exceeds_max_words():
"Ensure entries with compiled words exceeding max_words are split."
# Arrange
+ tmp_path = "/tmp/test.org"
entry = f"""*** Heading
\t\r
Body Line
@@ -55,7 +56,7 @@ def test_entry_split_when_exceeds_max_words(tmp_path):
data = {
f"{tmp_path}": entry,
}
- expected_heading = f"* {tmp_path.stem}\n** Heading"
+ expected_heading = f"* Path: {tmp_path}\n** Heading"
# Act
# Extract Entries from specified Org files
diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py
index 7ee948e2..aa6a3cb9 100644
--- a/tests/test_orgnode.py
+++ b/tests/test_orgnode.py
@@ -161,6 +161,8 @@ Body Line 1"""
assert len(entries) == 1
# parsed heading from entry
assert entries[0].heading == "Heading[1]"
+ # track ancestors of entry
+ assert entries[0].ancestors == [f"{orgfile}"]
# ensure SOURCE link has square brackets in filename, heading escaped in rendered entries
escaped_orgfile = f"{orgfile}".replace("[1]", "\\[1\\]")
assert f":SOURCE: [[file:{escaped_orgfile}::*Heading\\[1\\]" in f"{entries[0]}"
@@ -260,6 +262,7 @@ Body Line 1"""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
+ assert entries[0].ancestors == ["test"]
# ----------------------------------------------------------------------------------------------------
@@ -284,6 +287,7 @@ Body Line 1
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
+ assert entries[0].ancestors == ["title1 title2"]
# ----------------------------------------------------------------------------------------------------
@@ -304,8 +308,10 @@ entry body
assert len(entries) == 2
assert entries[0].heading == "Title"
assert entries[0].body == "intro body\n"
+ assert entries[0].ancestors == ["Title"]
assert entries[1].heading == "Entry Heading"
assert entries[1].body == "entry body\n\n"
+ assert entries[1].ancestors == ["Title"]
# ----------------------------------------------------------------------------------------------------
@@ -326,8 +332,93 @@ entry body
assert len(entries) == 2
assert entries[0].heading == "Title1 Title2"
assert entries[0].body == "intro body\n"
+ assert entries[0].ancestors == ["Title1 Title2"]
assert entries[1].heading == "Entry Heading"
assert entries[1].body == "entry body\n\n"
+ assert entries[0].ancestors == ["Title1 Title2"]
+
+
+# ----------------------------------------------------------------------------------------------------
+def test_parse_org_with_single_ancestor_heading(tmp_path):
+ "Parse org entries with parent headings context"
+ # Arrange
+ body = f"""
+* Heading 1
+body 1
+** Sub Heading 1
+"""
+ orgfile = create_file(tmp_path, body)
+
+ # Act
+ entries = orgnode.makelist_with_filepath(orgfile)
+
+ # Assert
+ assert len(entries) == 2
+ assert entries[0].heading == "Heading 1"
+ assert entries[0].ancestors == [f"{orgfile}"]
+ assert entries[1].heading == "Sub Heading 1"
+ assert entries[1].ancestors == [f"{orgfile}", "Heading 1"]
+
+
+# ----------------------------------------------------------------------------------------------------
+def test_parse_org_with_multiple_ancestor_headings(tmp_path):
+ "Parse org entries with parent headings context"
+ # Arrange
+ body = f"""
+* Heading 1
+body 1
+** Sub Heading 1
+*** Sub Sub Heading 1
+sub sub body 1
+"""
+ orgfile = create_file(tmp_path, body)
+
+ # Act
+ entries = orgnode.makelist_with_filepath(orgfile)
+
+ # Assert
+ assert len(entries) == 3
+ assert entries[0].heading == "Heading 1"
+ assert entries[0].ancestors == [f"{orgfile}"]
+ assert entries[1].heading == "Sub Heading 1"
+ assert entries[1].ancestors == [f"{orgfile}", "Heading 1"]
+ assert entries[2].heading == "Sub Sub Heading 1"
+ assert entries[2].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"]
+
+
+# ----------------------------------------------------------------------------------------------------
+def test_parse_org_with_multiple_ancestor_headings_of_siblings(tmp_path):
+ "Parse org entries with parent headings context"
+ # Arrange
+ body = f"""
+* Heading 1
+body 1
+** Sub Heading 1
+*** Sub Sub Heading 1
+sub sub body 1
+*** Sub Sub Heading 2
+** Sub Heading 2
+*** Sub Sub Heading 3
+"""
+ orgfile = create_file(tmp_path, body)
+
+ # Act
+ entries = orgnode.makelist_with_filepath(orgfile)
+
+ # Assert
+ assert len(entries) == 6
+ assert entries[0].heading == "Heading 1"
+ assert entries[0].ancestors == [f"{orgfile}"]
+ assert entries[1].heading == "Sub Heading 1"
+ assert entries[1].ancestors == [f"{orgfile}", "Heading 1"]
+ assert entries[2].heading == "Sub Sub Heading 1"
+ assert entries[2].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"]
+ assert entries[3].heading == "Sub Sub Heading 2"
+ assert entries[3].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"]
+ assert entries[4].heading == "Sub Heading 2"
+ assert entries[4].ancestors == [f"{orgfile}", "Heading 1"]
+ assert entries[5].heading == "Sub Sub Heading 3"
+ assert entries[5].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 2"]
# Helper Functions
diff --git a/tests/test_text_search.py b/tests/test_text_search.py
index ac24c9a0..4d08d5e8 100644
--- a/tests/test_text_search.py
+++ b/tests/test_text_search.py
@@ -70,7 +70,7 @@ def test_text_search_setup_with_empty_file_creates_no_entries(
text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
# Assert
- assert "Deleted 3 entries. Created 0 new entries for user " in caplog.records[-1].message
+ assert "Deleted 8 entries. Created 0 new entries for user " in caplog.records[-1].message
verify_embeddings(0, default_user)
@@ -90,7 +90,7 @@ def test_text_indexer_deletes_embedding_before_regenerate(
# Assert
assert "Deleting all entries for file type org" in caplog.text
- assert "Deleted 3 entries. Created 10 new entries for user " in caplog.records[-1].message
+ assert "Deleted 8 entries. Created 13 new entries for user " in caplog.records[-1].message
# ----------------------------------------------------------------------------------------------------
@@ -106,7 +106,7 @@ def test_text_search_setup_batch_processes(content_config: ContentConfig, defaul
text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
# Assert
- assert "Deleted 3 entries. Created 10 new entries for user " in caplog.records[-1].message
+ assert "Deleted 8 entries. Created 13 new entries for user " in caplog.records[-1].message
# ----------------------------------------------------------------------------------------------------
@@ -161,7 +161,7 @@ async def test_text_search(search_config: SearchConfig):
default_user,
)
- query = "How to git install application?"
+ query = "Load Khoj on Emacs?"
# Act
hits = await text_search.query(default_user, query)
@@ -170,7 +170,7 @@ async def test_text_search(search_config: SearchConfig):
# Assert
search_result = results[0].entry
- assert "git clone" in search_result, 'search result did not contain "git clone" entry'
+ assert "Emacs load path" in search_result, 'Expected "Emacs load path" in entry'
# ----------------------------------------------------------------------------------------------------
@@ -284,9 +284,9 @@ def test_regenerate_index_with_new_entry(
final_logs = caplog.text
# Assert
- assert "Deleted 3 entries. Created 10 new entries for user " in initial_logs
- assert "Deleted 10 entries. Created 11 new entries for user " in final_logs
- verify_embeddings(11, default_user)
+ assert "Deleted 8 entries. Created 13 new entries for user " in initial_logs
+ assert "Deleted 13 entries. Created 14 new entries for user " in final_logs
+ verify_embeddings(14, default_user)
# ----------------------------------------------------------------------------------------------------
@@ -320,7 +320,7 @@ def test_update_index_with_duplicate_entries_in_stable_order(
# Assert
# verify only 1 entry added even if there are multiple duplicate entries
- assert "Deleted 3 entries. Created 1 new entries for user " in initial_logs
+ assert "Deleted 8 entries. Created 1 new entries for user " in initial_logs
assert "Deleted 0 entries. Created 0 new entries for user " in final_logs
verify_embeddings(1, default_user)
@@ -357,7 +357,7 @@ def test_update_index_with_deleted_entry(org_config_with_only_new_file: LocalOrg
# Assert
# verify only 1 entry added even if there are multiple duplicate entries
- assert "Deleted 3 entries. Created 2 new entries for user " in initial_logs
+ assert "Deleted 8 entries. Created 2 new entries for user " in initial_logs
assert "Deleted 1 entries. Created 0 new entries for user " in final_logs
verify_embeddings(1, default_user)
@@ -388,9 +388,9 @@ def test_update_index_with_new_entry(content_config: ContentConfig, new_org_file
final_logs = caplog.text
# Assert
- assert "Deleted 3 entries. Created 10 new entries for user " in initial_logs
+ assert "Deleted 8 entries. Created 13 new entries for user " in initial_logs
assert "Deleted 0 entries. Created 1 new entries for user " in final_logs
- verify_embeddings(11, default_user)
+ verify_embeddings(14, default_user)
# ----------------------------------------------------------------------------------------------------
diff --git a/tests/test_word_filter.py b/tests/test_word_filter.py
index 2ede35e7..ebd6cccf 100644
--- a/tests/test_word_filter.py
+++ b/tests/test_word_filter.py
@@ -3,6 +3,25 @@ from khoj.search_filter.word_filter import WordFilter
from khoj.utils.rawconfig import Entry
+# Test
+# ----------------------------------------------------------------------------------------------------
+def test_no_word_filter():
+ # Arrange
+ word_filter = WordFilter()
+ q_with_no_filter = "head tail"
+
+ # Act
+ can_filter = word_filter.can_filter(q_with_no_filter)
+ filter_terms = word_filter.get_filter_terms(q_with_no_filter)
+
+ # Assert
+ assert can_filter == False
+ assert filter_terms == []
+
+
+# ----------------------------------------------------------------------------------------------------
+
+
def test_word_exclude_filter():
# Arrange
word_filter = WordFilter()
@@ -15,6 +34,7 @@ def test_word_exclude_filter():
assert can_filter == True
+# ----------------------------------------------------------------------------------------------------
def test_word_include_filter():
# Arrange
word_filter = WordFilter()
@@ -27,6 +47,7 @@ def test_word_include_filter():
assert can_filter == True
+# ----------------------------------------------------------------------------------------------------
def test_word_include_and_exclude_filter():
# Arrange
word_filter = WordFilter()
@@ -39,6 +60,7 @@ def test_word_include_and_exclude_filter():
assert can_filter == True
+# ----------------------------------------------------------------------------------------------------
def test_get_word_filter_terms():
# Arrange
word_filter = WordFilter()