Files
khoj/src/database/models/__init__.py
Debanjum Singh Solanky 9ab327a2b6 Store the data source of each entry in database
This will be useful for updating, deleting entries by their data
source. Data source can be one of Computer, Github or Notion for now

Store each file/entries source in database
2023-11-07 02:18:48 -08:00

161 lines
5.6 KiB
Python

import uuid
from django.db import models
from django.contrib.auth.models import AbstractUser
from pgvector.django import VectorField
class BaseModel(models.Model):
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
abstract = True
class KhojUser(AbstractUser):
uuid = models.UUIDField(models.UUIDField(default=uuid.uuid4, editable=False))
def save(self, *args, **kwargs):
if not self.uuid:
self.uuid = uuid.uuid4()
super().save(*args, **kwargs)
class GoogleUser(models.Model):
user = models.OneToOneField(KhojUser, on_delete=models.CASCADE)
sub = models.CharField(max_length=200)
azp = models.CharField(max_length=200)
email = models.CharField(max_length=200)
name = models.CharField(max_length=200)
given_name = models.CharField(max_length=200)
family_name = models.CharField(max_length=200)
picture = models.CharField(max_length=200)
locale = models.CharField(max_length=200)
def __str__(self):
return self.name
class KhojApiUser(models.Model):
"""User issued API tokens to authenticate Khoj clients"""
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
token = models.CharField(max_length=50, unique=True)
name = models.CharField(max_length=50)
accessed_at = models.DateTimeField(null=True, default=None)
class NotionConfig(BaseModel):
token = models.CharField(max_length=200)
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
class GithubConfig(BaseModel):
pat_token = models.CharField(max_length=200)
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
class GithubRepoConfig(BaseModel):
name = models.CharField(max_length=200)
owner = models.CharField(max_length=200)
branch = models.CharField(max_length=200)
github_config = models.ForeignKey(GithubConfig, on_delete=models.CASCADE, related_name="githubrepoconfig")
class LocalOrgConfig(BaseModel):
input_files = models.JSONField(default=list, null=True)
input_filter = models.JSONField(default=list, null=True)
index_heading_entries = models.BooleanField(default=False)
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
class LocalMarkdownConfig(BaseModel):
input_files = models.JSONField(default=list, null=True)
input_filter = models.JSONField(default=list, null=True)
index_heading_entries = models.BooleanField(default=False)
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
class LocalPdfConfig(BaseModel):
input_files = models.JSONField(default=list, null=True)
input_filter = models.JSONField(default=list, null=True)
index_heading_entries = models.BooleanField(default=False)
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
class LocalPlaintextConfig(BaseModel):
input_files = models.JSONField(default=list, null=True)
input_filter = models.JSONField(default=list, null=True)
index_heading_entries = models.BooleanField(default=False)
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
class OpenAIProcessorConversationConfig(BaseModel):
api_key = models.CharField(max_length=200)
class OfflineChatProcessorConversationConfig(BaseModel):
enabled = models.BooleanField(default=False)
class ChatModelOptions(BaseModel):
class ModelType(models.TextChoices):
OPENAI = "openai"
OFFLINE = "offline"
max_prompt_size = models.IntegerField(default=None, null=True, blank=True)
tokenizer = models.CharField(max_length=200, default=None, null=True, blank=True)
chat_model = models.CharField(max_length=200, default=None, null=True, blank=True)
model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OPENAI)
class UserConversationConfig(BaseModel):
user = models.OneToOneField(KhojUser, on_delete=models.CASCADE)
setting = models.ForeignKey(ChatModelOptions, on_delete=models.CASCADE, default=None, null=True, blank=True)
class Conversation(BaseModel):
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
conversation_log = models.JSONField(default=dict)
class Entry(BaseModel):
class EntryType(models.TextChoices):
IMAGE = "image"
PDF = "pdf"
PLAINTEXT = "plaintext"
MARKDOWN = "markdown"
ORG = "org"
NOTION = "notion"
GITHUB = "github"
CONVERSATION = "conversation"
class EntrySource(models.TextChoices):
COMPUTER = "computer"
NOTION = "notion"
GITHUB = "github"
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
embeddings = VectorField(dimensions=384)
raw = models.TextField()
compiled = models.TextField()
heading = models.CharField(max_length=1000, default=None, null=True, blank=True)
file_source = models.CharField(max_length=30, choices=EntrySource.choices, default=EntrySource.COMPUTER)
file_type = models.CharField(max_length=30, choices=EntryType.choices, default=EntryType.PLAINTEXT)
file_path = models.CharField(max_length=400, default=None, null=True, blank=True)
file_name = models.CharField(max_length=400, default=None, null=True, blank=True)
url = models.URLField(max_length=400, default=None, null=True, blank=True)
hashed_value = models.CharField(max_length=100)
corpus_id = models.UUIDField(default=uuid.uuid4, editable=False)
class EntryDates(BaseModel):
date = models.DateField()
entry = models.ForeignKey(Entry, on_delete=models.CASCADE, related_name="embeddings_dates")
class Meta:
indexes = [
models.Index(fields=["date"]),
]