Rename DbModels Embeddings, EmbeddingsAdapter to Entry, EntryAdapter

Improves readability as name has closer match to underlying
constructs

- Entry is any atomic item indexed by Khoj. This can be an org-mode
  entry, a markdown section, a PDF or Notion page etc.

- Embeddings are semantic vectors generated by the search ML model
  that encodes for meaning contained in an entries text.

- An "Entry" contains "Embeddings" vectors but also other metadata
  about the entry like filename etc.
This commit is contained in:
Debanjum Singh Solanky
2023-10-31 18:50:54 -07:00
parent 54a387326c
commit bcbee05a9e
15 changed files with 115 additions and 87 deletions

View File

@@ -114,8 +114,8 @@ class Conversation(BaseModel):
conversation_log = models.JSONField(default=dict)
class Embeddings(BaseModel):
class EmbeddingsType(models.TextChoices):
class Entry(BaseModel):
class EntryType(models.TextChoices):
IMAGE = "image"
PDF = "pdf"
PLAINTEXT = "plaintext"
@@ -130,7 +130,7 @@ class Embeddings(BaseModel):
raw = models.TextField()
compiled = models.TextField()
heading = models.CharField(max_length=1000, default=None, null=True, blank=True)
file_type = models.CharField(max_length=30, choices=EmbeddingsType.choices, default=EmbeddingsType.PLAINTEXT)
file_type = models.CharField(max_length=30, choices=EntryType.choices, default=EntryType.PLAINTEXT)
file_path = models.CharField(max_length=400, default=None, null=True, blank=True)
file_name = models.CharField(max_length=400, default=None, null=True, blank=True)
url = models.URLField(max_length=400, default=None, null=True, blank=True)
@@ -138,9 +138,9 @@ class Embeddings(BaseModel):
corpus_id = models.UUIDField(default=uuid.uuid4, editable=False)
class EmbeddingsDates(BaseModel):
class EntryDates(BaseModel):
date = models.DateField()
embeddings = models.ForeignKey(Embeddings, on_delete=models.CASCADE, related_name="embeddings_dates")
entry = models.ForeignKey(Entry, on_delete=models.CASCADE, related_name="embeddings_dates")
class Meta:
indexes = [