mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
This will be useful for updating, deleting entries by their data source. Data source can be one of Computer, Github or Notion for now Store each file/entries source in database
161 lines
5.6 KiB
Python
161 lines
5.6 KiB
Python
import uuid
|
|
|
|
from django.db import models
|
|
from django.contrib.auth.models import AbstractUser
|
|
from pgvector.django import VectorField
|
|
|
|
|
|
class BaseModel(models.Model):
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
|
|
class Meta:
|
|
abstract = True
|
|
|
|
|
|
class KhojUser(AbstractUser):
|
|
uuid = models.UUIDField(models.UUIDField(default=uuid.uuid4, editable=False))
|
|
|
|
def save(self, *args, **kwargs):
|
|
if not self.uuid:
|
|
self.uuid = uuid.uuid4()
|
|
super().save(*args, **kwargs)
|
|
|
|
|
|
class GoogleUser(models.Model):
|
|
user = models.OneToOneField(KhojUser, on_delete=models.CASCADE)
|
|
sub = models.CharField(max_length=200)
|
|
azp = models.CharField(max_length=200)
|
|
email = models.CharField(max_length=200)
|
|
name = models.CharField(max_length=200)
|
|
given_name = models.CharField(max_length=200)
|
|
family_name = models.CharField(max_length=200)
|
|
picture = models.CharField(max_length=200)
|
|
locale = models.CharField(max_length=200)
|
|
|
|
def __str__(self):
|
|
return self.name
|
|
|
|
|
|
class KhojApiUser(models.Model):
|
|
"""User issued API tokens to authenticate Khoj clients"""
|
|
|
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
|
token = models.CharField(max_length=50, unique=True)
|
|
name = models.CharField(max_length=50)
|
|
accessed_at = models.DateTimeField(null=True, default=None)
|
|
|
|
|
|
class NotionConfig(BaseModel):
|
|
token = models.CharField(max_length=200)
|
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
|
|
|
|
|
class GithubConfig(BaseModel):
|
|
pat_token = models.CharField(max_length=200)
|
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
|
|
|
|
|
class GithubRepoConfig(BaseModel):
|
|
name = models.CharField(max_length=200)
|
|
owner = models.CharField(max_length=200)
|
|
branch = models.CharField(max_length=200)
|
|
github_config = models.ForeignKey(GithubConfig, on_delete=models.CASCADE, related_name="githubrepoconfig")
|
|
|
|
|
|
class LocalOrgConfig(BaseModel):
|
|
input_files = models.JSONField(default=list, null=True)
|
|
input_filter = models.JSONField(default=list, null=True)
|
|
index_heading_entries = models.BooleanField(default=False)
|
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
|
|
|
|
|
class LocalMarkdownConfig(BaseModel):
|
|
input_files = models.JSONField(default=list, null=True)
|
|
input_filter = models.JSONField(default=list, null=True)
|
|
index_heading_entries = models.BooleanField(default=False)
|
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
|
|
|
|
|
class LocalPdfConfig(BaseModel):
|
|
input_files = models.JSONField(default=list, null=True)
|
|
input_filter = models.JSONField(default=list, null=True)
|
|
index_heading_entries = models.BooleanField(default=False)
|
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
|
|
|
|
|
class LocalPlaintextConfig(BaseModel):
|
|
input_files = models.JSONField(default=list, null=True)
|
|
input_filter = models.JSONField(default=list, null=True)
|
|
index_heading_entries = models.BooleanField(default=False)
|
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
|
|
|
|
|
class OpenAIProcessorConversationConfig(BaseModel):
|
|
api_key = models.CharField(max_length=200)
|
|
|
|
|
|
class OfflineChatProcessorConversationConfig(BaseModel):
|
|
enabled = models.BooleanField(default=False)
|
|
|
|
|
|
class ChatModelOptions(BaseModel):
|
|
class ModelType(models.TextChoices):
|
|
OPENAI = "openai"
|
|
OFFLINE = "offline"
|
|
|
|
max_prompt_size = models.IntegerField(default=None, null=True, blank=True)
|
|
tokenizer = models.CharField(max_length=200, default=None, null=True, blank=True)
|
|
chat_model = models.CharField(max_length=200, default=None, null=True, blank=True)
|
|
model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OPENAI)
|
|
|
|
|
|
class UserConversationConfig(BaseModel):
|
|
user = models.OneToOneField(KhojUser, on_delete=models.CASCADE)
|
|
setting = models.ForeignKey(ChatModelOptions, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
|
|
|
|
|
class Conversation(BaseModel):
|
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
|
conversation_log = models.JSONField(default=dict)
|
|
|
|
|
|
class Entry(BaseModel):
|
|
class EntryType(models.TextChoices):
|
|
IMAGE = "image"
|
|
PDF = "pdf"
|
|
PLAINTEXT = "plaintext"
|
|
MARKDOWN = "markdown"
|
|
ORG = "org"
|
|
NOTION = "notion"
|
|
GITHUB = "github"
|
|
CONVERSATION = "conversation"
|
|
|
|
class EntrySource(models.TextChoices):
|
|
COMPUTER = "computer"
|
|
NOTION = "notion"
|
|
GITHUB = "github"
|
|
|
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
|
embeddings = VectorField(dimensions=384)
|
|
raw = models.TextField()
|
|
compiled = models.TextField()
|
|
heading = models.CharField(max_length=1000, default=None, null=True, blank=True)
|
|
file_source = models.CharField(max_length=30, choices=EntrySource.choices, default=EntrySource.COMPUTER)
|
|
file_type = models.CharField(max_length=30, choices=EntryType.choices, default=EntryType.PLAINTEXT)
|
|
file_path = models.CharField(max_length=400, default=None, null=True, blank=True)
|
|
file_name = models.CharField(max_length=400, default=None, null=True, blank=True)
|
|
url = models.URLField(max_length=400, default=None, null=True, blank=True)
|
|
hashed_value = models.CharField(max_length=100)
|
|
corpus_id = models.UUIDField(default=uuid.uuid4, editable=False)
|
|
|
|
|
|
class EntryDates(BaseModel):
|
|
date = models.DateField()
|
|
entry = models.ForeignKey(Entry, on_delete=models.CASCADE, related_name="embeddings_dates")
|
|
|
|
class Meta:
|
|
indexes = [
|
|
models.Index(fields=["date"]),
|
|
]
|