mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-03 21:29:08 +00:00
[Multi-User Part 1]: Enable storage of settings for plaintext files based on user account (#498)
- Partition configuration for indexing local data based on user accounts - Store indexed data in an underlying postgres db using the `pgvector` extension - Add migrations for all relevant user data and embeddings generation. Very little performance optimization has been done for the lookup time - Apply filters using SQL queries - Start removing many server-level configuration settings - Configure GitHub test actions to run during any PR. Update the test action to run in a containerized environment with a DB. - Update the Docker image and docker-compose.yml to work with the new application design
This commit is contained in:
@@ -2,11 +2,25 @@ import uuid
|
||||
|
||||
from django.db import models
|
||||
from django.contrib.auth.models import AbstractUser
|
||||
from pgvector.django import VectorField
|
||||
|
||||
|
||||
class BaseModel(models.Model):
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
abstract = True
|
||||
|
||||
|
||||
class KhojUser(AbstractUser):
|
||||
uuid = models.UUIDField(models.UUIDField(default=uuid.uuid4, editable=False))
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
if not self.uuid:
|
||||
self.uuid = uuid.uuid4()
|
||||
super().save(*args, **kwargs)
|
||||
|
||||
|
||||
class GoogleUser(models.Model):
|
||||
user = models.OneToOneField(KhojUser, on_delete=models.CASCADE)
|
||||
@@ -23,31 +37,85 @@ class GoogleUser(models.Model):
|
||||
return self.name
|
||||
|
||||
|
||||
class Configuration(models.Model):
|
||||
user = models.OneToOneField(KhojUser, on_delete=models.CASCADE)
|
||||
|
||||
|
||||
class NotionConfig(models.Model):
|
||||
class NotionConfig(BaseModel):
|
||||
token = models.CharField(max_length=200)
|
||||
compressed_jsonl = models.CharField(max_length=300)
|
||||
embeddings_file = models.CharField(max_length=300)
|
||||
config = models.OneToOneField(Configuration, on_delete=models.CASCADE)
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
||||
|
||||
|
||||
class GithubConfig(models.Model):
|
||||
class GithubConfig(BaseModel):
|
||||
pat_token = models.CharField(max_length=200)
|
||||
compressed_jsonl = models.CharField(max_length=300)
|
||||
embeddings_file = models.CharField(max_length=300)
|
||||
config = models.OneToOneField(Configuration, on_delete=models.CASCADE)
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
||||
|
||||
|
||||
class GithubRepoConfig(models.Model):
|
||||
class GithubRepoConfig(BaseModel):
|
||||
name = models.CharField(max_length=200)
|
||||
owner = models.CharField(max_length=200)
|
||||
branch = models.CharField(max_length=200)
|
||||
github_config = models.ForeignKey(GithubConfig, on_delete=models.CASCADE)
|
||||
github_config = models.ForeignKey(GithubConfig, on_delete=models.CASCADE, related_name="githubrepoconfig")
|
||||
|
||||
|
||||
class ConversationProcessorConfig(models.Model):
|
||||
class LocalOrgConfig(BaseModel):
|
||||
input_files = models.JSONField(default=list, null=True)
|
||||
input_filter = models.JSONField(default=list, null=True)
|
||||
index_heading_entries = models.BooleanField(default=False)
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
||||
|
||||
|
||||
class LocalMarkdownConfig(BaseModel):
|
||||
input_files = models.JSONField(default=list, null=True)
|
||||
input_filter = models.JSONField(default=list, null=True)
|
||||
index_heading_entries = models.BooleanField(default=False)
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
||||
|
||||
|
||||
class LocalPdfConfig(BaseModel):
|
||||
input_files = models.JSONField(default=list, null=True)
|
||||
input_filter = models.JSONField(default=list, null=True)
|
||||
index_heading_entries = models.BooleanField(default=False)
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
||||
|
||||
|
||||
class LocalPlaintextConfig(BaseModel):
|
||||
input_files = models.JSONField(default=list, null=True)
|
||||
input_filter = models.JSONField(default=list, null=True)
|
||||
index_heading_entries = models.BooleanField(default=False)
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE)
|
||||
|
||||
|
||||
class ConversationProcessorConfig(BaseModel):
|
||||
conversation = models.JSONField()
|
||||
enable_offline_chat = models.BooleanField(default=False)
|
||||
|
||||
|
||||
class Embeddings(BaseModel):
|
||||
class EmbeddingsType(models.TextChoices):
|
||||
IMAGE = "image"
|
||||
PDF = "pdf"
|
||||
PLAINTEXT = "plaintext"
|
||||
MARKDOWN = "markdown"
|
||||
ORG = "org"
|
||||
NOTION = "notion"
|
||||
GITHUB = "github"
|
||||
CONVERSATION = "conversation"
|
||||
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||
embeddings = VectorField(dimensions=384)
|
||||
raw = models.TextField()
|
||||
compiled = models.TextField()
|
||||
heading = models.CharField(max_length=1000, default=None, null=True, blank=True)
|
||||
file_type = models.CharField(max_length=30, choices=EmbeddingsType.choices, default=EmbeddingsType.PLAINTEXT)
|
||||
file_path = models.CharField(max_length=400, default=None, null=True, blank=True)
|
||||
file_name = models.CharField(max_length=400, default=None, null=True, blank=True)
|
||||
url = models.URLField(max_length=400, default=None, null=True, blank=True)
|
||||
hashed_value = models.CharField(max_length=100)
|
||||
corpus_id = models.UUIDField(default=uuid.uuid4, editable=False)
|
||||
|
||||
|
||||
class EmbeddingsDates(BaseModel):
|
||||
date = models.DateField()
|
||||
embeddings = models.ForeignKey(Embeddings, on_delete=models.CASCADE, related_name="embeddings_dates")
|
||||
|
||||
class Meta:
|
||||
indexes = [
|
||||
models.Index(fields=["date"]),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user