mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-05 05:39:11 +00:00
Support incremental update of org-mode entries and embeddings
- What
- Hash the entries and compare to find new/updated entries
- Reuse embeddings encoded for existing entries
- Only encode embeddings for updated or new entries
- Merge the existing and new entries and embeddings to get the updated
entries, embeddings
- Why
- Given most note text entries are expected to be unchanged
across time. Reusing their earlier encoded embeddings should
significantly speed up embeddings updates
- Previously we were regenerating embeddings for all entries,
even if they had existed in previous runs
This commit is contained in:
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Define Functions
|
||||
def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file):
|
||||
def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file, previous_entries=None):
|
||||
# Input Validation
|
||||
if is_none_or_empty(beancount_files) and is_none_or_empty(beancount_file_filter):
|
||||
print("At least one of beancount-files or beancount-file-filter is required to be specified")
|
||||
@@ -39,7 +39,7 @@ def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file):
|
||||
elif output_file.suffix == ".jsonl":
|
||||
dump_jsonl(jsonl_data, output_file)
|
||||
|
||||
return entries
|
||||
return list(enumerate(entries))
|
||||
|
||||
|
||||
def get_beancount_files(beancount_files=None, beancount_file_filter=None):
|
||||
|
||||
Reference in New Issue
Block a user