mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Only index files returned by input-filter globs in fs_syncer
Ignore .org, .pdf etc. suffixed directories under `input-filter' from being evaluated as files. Explicitly filter results by input-filter globs to only index files, not directory for each text type Add test to prevent regression Closes #448
This commit is contained in:
@@ -31,6 +31,22 @@ def test_text_search_setup_with_missing_file_raises_error(org_config_with_only_n
|
||||
get_org_files(org_config_with_only_new_file)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_get_org_files_with_org_suffixed_dir_doesnt_raise_error(tmp_path: Path):
|
||||
# Arrange
|
||||
orgfile = tmp_path / "directory.org" / "file.org"
|
||||
orgfile.parent.mkdir()
|
||||
with open(orgfile, "w") as f:
|
||||
f.write("* Heading\n- List item\n")
|
||||
org_content_config = TextContentConfig(
|
||||
input_filter=[f"{tmp_path}/**/*"], compressed_jsonl="test.jsonl", embeddings_file="test.pt"
|
||||
)
|
||||
|
||||
# Act
|
||||
# should not raise IsADirectoryError and return orgfile
|
||||
assert get_org_files(org_content_config) == {f"{orgfile}": "* Heading\n- List item\n"}
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_text_search_setup_with_empty_file_raises_error(
|
||||
org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
|
||||
|
||||
Reference in New Issue
Block a user