import uuid from django.db import models from django.contrib.auth.models import AbstractUser from pgvector.django import VectorField class BaseModel(models.Model): created_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) class Meta: abstract = True class KhojUser(AbstractUser): uuid = models.UUIDField(models.UUIDField(default=uuid.uuid4, editable=False)) def save(self, *args, **kwargs): if not self.uuid: self.uuid = uuid.uuid4() super().save(*args, **kwargs) class GoogleUser(models.Model): user = models.OneToOneField(KhojUser, on_delete=models.CASCADE) sub = models.CharField(max_length=200) azp = models.CharField(max_length=200) email = models.CharField(max_length=200) name = models.CharField(max_length=200) given_name = models.CharField(max_length=200) family_name = models.CharField(max_length=200) picture = models.CharField(max_length=200) locale = models.CharField(max_length=200) def __str__(self): return self.name class KhojApiUser(models.Model): """User issued API tokens to authenticate Khoj clients""" user = models.ForeignKey(KhojUser, on_delete=models.CASCADE) token = models.CharField(max_length=50, unique=True) name = models.CharField(max_length=50) accessed_at = models.DateTimeField(null=True, default=None) class NotionConfig(BaseModel): token = models.CharField(max_length=200) user = models.ForeignKey(KhojUser, on_delete=models.CASCADE) class GithubConfig(BaseModel): pat_token = models.CharField(max_length=200) user = models.ForeignKey(KhojUser, on_delete=models.CASCADE) class GithubRepoConfig(BaseModel): name = models.CharField(max_length=200) owner = models.CharField(max_length=200) branch = models.CharField(max_length=200) github_config = models.ForeignKey(GithubConfig, on_delete=models.CASCADE, related_name="githubrepoconfig") class LocalOrgConfig(BaseModel): input_files = models.JSONField(default=list, null=True) input_filter = models.JSONField(default=list, null=True) index_heading_entries = models.BooleanField(default=False) user = models.ForeignKey(KhojUser, on_delete=models.CASCADE) class LocalMarkdownConfig(BaseModel): input_files = models.JSONField(default=list, null=True) input_filter = models.JSONField(default=list, null=True) index_heading_entries = models.BooleanField(default=False) user = models.ForeignKey(KhojUser, on_delete=models.CASCADE) class LocalPdfConfig(BaseModel): input_files = models.JSONField(default=list, null=True) input_filter = models.JSONField(default=list, null=True) index_heading_entries = models.BooleanField(default=False) user = models.ForeignKey(KhojUser, on_delete=models.CASCADE) class LocalPlaintextConfig(BaseModel): input_files = models.JSONField(default=list, null=True) input_filter = models.JSONField(default=list, null=True) index_heading_entries = models.BooleanField(default=False) user = models.ForeignKey(KhojUser, on_delete=models.CASCADE) class OpenAIProcessorConversationConfig(BaseModel): api_key = models.CharField(max_length=200) class OfflineChatProcessorConversationConfig(BaseModel): enabled = models.BooleanField(default=False) class ChatModelOptions(BaseModel): class ModelType(models.TextChoices): OPENAI = "openai" OFFLINE = "offline" max_prompt_size = models.IntegerField(default=None, null=True, blank=True) tokenizer = models.CharField(max_length=200, default=None, null=True, blank=True) chat_model = models.CharField(max_length=200, default=None, null=True, blank=True) model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OPENAI) class UserConversationConfig(BaseModel): user = models.OneToOneField(KhojUser, on_delete=models.CASCADE) setting = models.ForeignKey(ChatModelOptions, on_delete=models.CASCADE, default=None, null=True, blank=True) class Conversation(BaseModel): user = models.ForeignKey(KhojUser, on_delete=models.CASCADE) conversation_log = models.JSONField(default=dict) class Entry(BaseModel): class EntryType(models.TextChoices): IMAGE = "image" PDF = "pdf" PLAINTEXT = "plaintext" MARKDOWN = "markdown" ORG = "org" NOTION = "notion" GITHUB = "github" CONVERSATION = "conversation" class EntrySource(models.TextChoices): COMPUTER = "computer" NOTION = "notion" GITHUB = "github" user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True) embeddings = VectorField(dimensions=384) raw = models.TextField() compiled = models.TextField() heading = models.CharField(max_length=1000, default=None, null=True, blank=True) file_source = models.CharField(max_length=30, choices=EntrySource.choices, default=EntrySource.COMPUTER) file_type = models.CharField(max_length=30, choices=EntryType.choices, default=EntryType.PLAINTEXT) file_path = models.CharField(max_length=400, default=None, null=True, blank=True) file_name = models.CharField(max_length=400, default=None, null=True, blank=True) url = models.URLField(max_length=400, default=None, null=True, blank=True) hashed_value = models.CharField(max_length=100) corpus_id = models.UUIDField(default=uuid.uuid4, editable=False) class EntryDates(BaseModel): date = models.DateField() entry = models.ForeignKey(Entry, on_delete=models.CASCADE, related_name="embeddings_dates") class Meta: indexes = [ models.Index(fields=["date"]), ]