Process text content files in sorted order for stable indexing

- Image search already uses a sorted list of images to process
- Prevents index of entries to desync when entries, embeddings
  generated by a separate server/app instance
This commit is contained in:
Debanjum Singh Solanky
2022-09-12 11:02:05 +03:00
parent a701ad08b9
commit 536f03af8f
6 changed files with 12 additions and 10 deletions

View File

@@ -77,12 +77,14 @@ def get_beancount_files(beancount_files=None, beancount_file_filters=None):
for filtered_file in glob.glob(get_absolute_path(beancount_file_filter))
}
all_beancount_files = absolute_beancount_files | filtered_beancount_files
all_beancount_files = sorted(absolute_beancount_files | filtered_beancount_files)
files_with_non_beancount_extensions = {beancount_file
for beancount_file
in all_beancount_files
if not beancount_file.endswith(".bean") and not beancount_file.endswith(".beancount")}
files_with_non_beancount_extensions = {
beancount_file
for beancount_file
in all_beancount_files
if not beancount_file.endswith(".bean") and not beancount_file.endswith(".beancount")
}
if any(files_with_non_beancount_extensions):
print(f"[Warning] There maybe non beancount files in the input set: {files_with_non_beancount_extensions}")

View File

@@ -75,7 +75,7 @@ def get_markdown_files(markdown_files=None, markdown_file_filters=None):
for filtered_file in glob.glob(get_absolute_path(markdown_file_filter))
}
all_markdown_files = absolute_markdown_files | filtered_markdown_files
all_markdown_files = sorted(absolute_markdown_files | filtered_markdown_files)
files_with_non_markdown_extensions = {
md_file

View File

@@ -82,7 +82,7 @@ def get_org_files(org_files=None, org_file_filters=None):
for filtered_file in glob.glob(get_absolute_path(org_file_filter))
}
all_org_files = absolute_org_files | filtered_org_files
all_org_files = sorted(absolute_org_files | filtered_org_files)
files_with_non_org_extensions = {org_file for org_file in all_org_files if not org_file.endswith(".org")}
if any(files_with_non_org_extensions):