[Multi-User Part 1]: Enable storage of settings for plaintext files based on user account (#498)

- Partition configuration for indexing local data based on user accounts - Store indexed data in an underlying postgres db using the `pgvector` extension - Add migrations for all relevant user data and embeddings generation. Very little performance optimization has been done for the lookup time - Apply filters using SQL queries - Start removing many server-level configuration settings - Configure GitHub test actions to run during any PR. Update the test action to run in a containerized environment with a DB. - Update the Docker image and docker-compose.yml to work with the new application design
2026-03-07 21:29:13 +00:00 · 2023-10-26 09:42:29 -07:00
parent 963cd165eb
commit 216acf545f
60 changed files with 1827 additions and 1792 deletions
--- a/tests/test_org_to_jsonl.py
+++ b/tests/test_org_to_jsonl.py
@@ -4,7 +4,7 @@ import os

 # Internal Packages
 from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
-from khoj.processor.text_to_jsonl import TextToJsonl
+from khoj.processor.text_to_jsonl import TextEmbeddings
 from khoj.utils.helpers import is_none_or_empty
 from khoj.utils.rawconfig import Entry
 from khoj.utils.fs_syncer import get_org_files
@@ -63,7 +63,7 @@ def test_entry_split_when_exceeds_max_words(tmp_path):

    # Split each entry from specified Org files by max words
    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
-        TextToJsonl.split_entries_by_max_tokens(
+        TextEmbeddings.split_entries_by_max_tokens(
            OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=4
        )
    )
@@ -86,7 +86,7 @@ def test_entry_split_drops_large_words():

    # Act
    # Split entry by max words and drop words larger than max word length
-    processed_entry = TextToJsonl.split_entries_by_max_tokens([entry], max_word_length=5)[0]
+    processed_entry = TextEmbeddings.split_entries_by_max_tokens([entry], max_word_length=5)[0]

    # Assert
    # "Heading" dropped from compiled version because its over the set max word limit