Rename DbModels Embeddings, EmbeddingsAdapter to Entry, EntryAdapter

Improves readability as name has closer match to underlying
constructs

- Entry is any atomic item indexed by Khoj. This can be an org-mode
  entry, a markdown section, a PDF or Notion page etc.

- Embeddings are semantic vectors generated by the search ML model
  that encodes for meaning contained in an entries text.

- An "Entry" contains "Embeddings" vectors but also other metadata
  about the entry like filename etc.
This commit is contained in:
Debanjum Singh Solanky
2023-10-31 18:50:54 -07:00
parent 54a387326c
commit bcbee05a9e
15 changed files with 115 additions and 87 deletions

View File

@@ -1,6 +1,5 @@
# System Packages
import logging
import locale
from pathlib import Path
import os
import asyncio
@@ -14,7 +13,7 @@ from khoj.utils.rawconfig import ContentConfig, SearchConfig
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
from khoj.processor.github.github_to_jsonl import GithubToJsonl
from khoj.utils.fs_syncer import collect_files, get_org_files
from database.models import LocalOrgConfig, KhojUser, Embeddings, GithubConfig
from database.models import LocalOrgConfig, KhojUser, Entry, GithubConfig
logger = logging.getLogger(__name__)
@@ -402,10 +401,10 @@ def test_text_search_setup_github(content_config: ContentConfig, default_user: K
)
# Assert
embeddings = Embeddings.objects.filter(user=default_user, file_type="github").count()
embeddings = Entry.objects.filter(user=default_user, file_type="github").count()
assert embeddings > 1
def verify_embeddings(expected_count, user):
embeddings = Embeddings.objects.filter(user=user, file_type="org").count()
embeddings = Entry.objects.filter(user=user, file_type="org").count()
assert embeddings == expected_count