From bbae7dd83c33f71c0457fe6f6cdd3b614783e91f Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 15 Nov 2023 12:50:39 -0800 Subject: [PATCH 1/8] Update logic for creating a new user to use aupdate_or_create --- src/database/adapters/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/database/adapters/__init__.py b/src/database/adapters/__init__.py index a61b3d64..552e9124 100644 --- a/src/database/adapters/__init__.py +++ b/src/database/adapters/__init__.py @@ -80,10 +80,10 @@ async def get_or_create_user(token: dict) -> KhojUser: async def create_user_by_token(token: dict) -> KhojUser: - user = await KhojUser.objects.filter(email=token.get("email")).aget_or_none() - if not user: - user = await KhojUser.objects.acreate(username=token.get("email"), email=token.get("email")) - await user.asave() + user = await KhojUser.objects.filter(email=token.get("email")).aupdate_or_create( + defaults={"username": token.get("email"), "email": token.get("email")} + ) + await user.asave() await GoogleUser.objects.acreate( sub=token.get("sub"), From 70f5d0ed3c2b9cf68829ac07880661206c8ddaad Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 15 Nov 2023 14:07:25 -0800 Subject: [PATCH 2/8] Add a dev workflow for GitHub actions, change the production workflow to only kick off when pushed to master --- .github/workflows/dockerize_dev.yml | 43 ++++++++++++++++++++++ .github/workflows/dockerize_production.yml | 1 - 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/dockerize_dev.yml diff --git a/.github/workflows/dockerize_dev.yml b/.github/workflows/dockerize_dev.yml new file mode 100644 index 00000000..3257d457 --- /dev/null +++ b/.github/workflows/dockerize_dev.yml @@ -0,0 +1,43 @@ +name: dockerize-prod + +on: + pull_request: + paths: + - src/khoj/** + - config/** + - pyproject.toml + - prod.Dockerfile + - .github/workflows/dockerize_dev.yml + workflow_dispatch: + +env: + DOCKER_IMAGE_TAG: 'dev' + +jobs: + build: + name: Build Production Docker Image, Push to Container Registry + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.PAT }} + + - name: 📦 Build and Push Docker Image + uses: docker/build-push-action@v2 + with: + context: . + file: prod.Dockerfile + platforms: linux/amd64 + push: true + tags: ghcr.io/${{ github.repository }}:${{ env.DOCKER_IMAGE_TAG }} + build-args: | + PORT=42110 diff --git a/.github/workflows/dockerize_production.yml b/.github/workflows/dockerize_production.yml index 97fc876d..2be5e5cb 100644 --- a/.github/workflows/dockerize_production.yml +++ b/.github/workflows/dockerize_production.yml @@ -1,7 +1,6 @@ name: dockerize-prod on: - pull_request: push: tags: - "*" From 10be8dfad9ff407024bf56e842b135926a20de59 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 15 Nov 2023 14:09:28 -0800 Subject: [PATCH 3/8] Rename dockerize dev action to be more accurate --- .github/workflows/dockerize_dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dockerize_dev.yml b/.github/workflows/dockerize_dev.yml index 3257d457..1d037ce7 100644 --- a/.github/workflows/dockerize_dev.yml +++ b/.github/workflows/dockerize_dev.yml @@ -1,4 +1,4 @@ -name: dockerize-prod +name: dockerize-dev on: pull_request: From 245a9cbf632ca7ef0f47f84edd1e7f904187644d Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 15 Nov 2023 14:35:42 -0800 Subject: [PATCH 4/8] Fix return type of the update_or_create method --- src/database/adapters/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/database/adapters/__init__.py b/src/database/adapters/__init__.py index 552e9124..7d857c7f 100644 --- a/src/database/adapters/__init__.py +++ b/src/database/adapters/__init__.py @@ -80,7 +80,7 @@ async def get_or_create_user(token: dict) -> KhojUser: async def create_user_by_token(token: dict) -> KhojUser: - user = await KhojUser.objects.filter(email=token.get("email")).aupdate_or_create( + user, _ = await KhojUser.objects.filter(email=token.get("email")).aupdate_or_create( defaults={"username": token.get("email"), "email": token.get("email")} ) await user.asave() From f88a5867b485623bee69e9c4efbd38e2919319f2 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 15 Nov 2023 17:04:48 -0800 Subject: [PATCH 5/8] Allow dockerize step to run for prod from PR temporarily --- .github/workflows/dockerize_production.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/dockerize_production.yml b/.github/workflows/dockerize_production.yml index 2be5e5cb..97fc876d 100644 --- a/.github/workflows/dockerize_production.yml +++ b/.github/workflows/dockerize_production.yml @@ -1,6 +1,7 @@ name: dockerize-prod on: + pull_request: push: tags: - "*" From 0679b2a7bd3e9baf49be484ddf413b6fa24215fc Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 15 Nov 2023 17:05:49 -0800 Subject: [PATCH 6/8] Use embeddings model store from state in text to entries Do not need to instantiating it separately. In all other places we're using the embeddings model store in global state anyway --- src/khoj/processor/text_to_entries.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/khoj/processor/text_to_entries.py b/src/khoj/processor/text_to_entries.py index 66a489eb..ac42105a 100644 --- a/src/khoj/processor/text_to_entries.py +++ b/src/khoj/processor/text_to_entries.py @@ -6,15 +6,15 @@ import logging import uuid from tqdm import tqdm from typing import Callable, List, Tuple, Set, Any +from khoj.utils import state from khoj.utils.helpers import is_none_or_empty, timer, batcher # Internal Packages from khoj.utils.rawconfig import Entry -from khoj.processor.embeddings import EmbeddingsModel from khoj.search_filter.date_filter import DateFilter from database.models import KhojUser, Entry as DbEntry, EntryDates -from database.adapters import EntryAdapters, get_or_create_search_model +from database.adapters import EntryAdapters logger = logging.getLogger(__name__) @@ -22,8 +22,7 @@ logger = logging.getLogger(__name__) class TextToEntries(ABC): def __init__(self, config: Any = None): - bi_encoder_name = get_or_create_search_model().bi_encoder - self.embeddings_model = EmbeddingsModel(bi_encoder_name) + self.embeddings_model = state.embeddings_model self.config = config self.date_filter = DateFilter() From 08a057bdd5b4c425f64cd478f04fe194b2f478c2 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 15 Nov 2023 17:12:54 -0800 Subject: [PATCH 7/8] Rename SearchModel to SearchModelConfig DB model, Require Cross-Encoder --- src/database/adapters/__init__.py | 6 ++-- src/database/admin.py | 4 +-- ...18_searchmodelconfig_delete_searchmodel.py | 30 +++++++++++++++++++ src/database/models/__init__.py | 6 ++-- src/khoj/migrations/migrate_server_pg.py | 8 ++--- tests/helpers.py | 4 +-- 6 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 src/database/migrations/0018_searchmodelconfig_delete_searchmodel.py diff --git a/src/database/adapters/__init__.py b/src/database/adapters/__init__.py index 7d857c7f..8aa71577 100644 --- a/src/database/adapters/__init__.py +++ b/src/database/adapters/__init__.py @@ -31,7 +31,7 @@ from database.models import ( GithubRepoConfig, Conversation, ChatModelOptions, - SearchModel, + SearchModelConfig, Subscription, UserConversationConfig, OpenAIProcessorConversationConfig, @@ -216,9 +216,9 @@ async def set_user_github_config(user: KhojUser, pat_token: str, repos: list): def get_or_create_search_model(): - search_model = SearchModel.objects.filter().first() + search_model = SearchModelConfig.objects.filter().first() if not search_model: - search_model = SearchModel.objects.create() + search_model = SearchModelConfig.objects.create() return search_model diff --git a/src/database/admin.py b/src/database/admin.py index a2aa85e2..8d2130ba 100644 --- a/src/database/admin.py +++ b/src/database/admin.py @@ -8,7 +8,7 @@ from database.models import ( ChatModelOptions, OpenAIProcessorConversationConfig, OfflineChatProcessorConversationConfig, - SearchModel, + SearchModelConfig, Subscription, ) @@ -17,5 +17,5 @@ admin.site.register(KhojUser, UserAdmin) admin.site.register(ChatModelOptions) admin.site.register(OpenAIProcessorConversationConfig) admin.site.register(OfflineChatProcessorConversationConfig) -admin.site.register(SearchModel) +admin.site.register(SearchModelConfig) admin.site.register(Subscription) diff --git a/src/database/migrations/0018_searchmodelconfig_delete_searchmodel.py b/src/database/migrations/0018_searchmodelconfig_delete_searchmodel.py new file mode 100644 index 00000000..a8100370 --- /dev/null +++ b/src/database/migrations/0018_searchmodelconfig_delete_searchmodel.py @@ -0,0 +1,30 @@ +# Generated by Django 4.2.5 on 2023-11-16 01:13 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0017_searchmodel"), + ] + + operations = [ + migrations.CreateModel( + name="SearchModelConfig", + fields=[ + ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ("name", models.CharField(default="default", max_length=200)), + ("model_type", models.CharField(choices=[("text", "Text")], default="text", max_length=200)), + ("bi_encoder", models.CharField(default="thenlper/gte-small", max_length=200)), + ("cross_encoder", models.CharField(default="cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=200)), + ], + options={ + "abstract": False, + }, + ), + migrations.DeleteModel( + name="SearchModel", + ), + ] diff --git a/src/database/models/__init__.py b/src/database/models/__init__.py index 5571c5a7..92848e5c 100644 --- a/src/database/models/__init__.py +++ b/src/database/models/__init__.py @@ -102,16 +102,14 @@ class LocalPlaintextConfig(BaseModel): user = models.ForeignKey(KhojUser, on_delete=models.CASCADE) -class SearchModel(BaseModel): +class SearchModelConfig(BaseModel): class ModelType(models.TextChoices): TEXT = "text" name = models.CharField(max_length=200, default="default") model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.TEXT) bi_encoder = models.CharField(max_length=200, default="thenlper/gte-small") - cross_encoder = models.CharField( - max_length=200, default="cross-encoder/ms-marco-MiniLM-L-6-v2", null=True, blank=True - ) + cross_encoder = models.CharField(max_length=200, default="cross-encoder/ms-marco-MiniLM-L-6-v2") class OpenAIProcessorConversationConfig(BaseModel): diff --git a/src/khoj/migrations/migrate_server_pg.py b/src/khoj/migrations/migrate_server_pg.py index 27226d9f..434e27d7 100644 --- a/src/khoj/migrations/migrate_server_pg.py +++ b/src/khoj/migrations/migrate_server_pg.py @@ -64,7 +64,7 @@ from database.models import ( OpenAIProcessorConversationConfig, OfflineChatProcessorConversationConfig, ChatModelOptions, - SearchModel, + SearchModelConfig, ) logger = logging.getLogger(__name__) @@ -87,12 +87,12 @@ def migrate_server_pg(args): if "search-type" in raw_config and raw_config["search-type"]: if "asymmetric" in raw_config["search-type"]: # Delete all existing search models - SearchModel.objects.filter(model_type=SearchModel.ModelType.TEXT).delete() + SearchModelConfig.objects.filter(model_type=SearchModelConfig.ModelType.TEXT).delete() # Create new search model from existing Khoj YAML config asymmetric_search = raw_config["search-type"]["asymmetric"] - SearchModel.objects.create( + SearchModelConfig.objects.create( name="default", - model_type=SearchModel.ModelType.TEXT, + model_type=SearchModelConfig.ModelType.TEXT, bi_encoder=asymmetric_search.get("encoder"), cross_encoder=asymmetric_search.get("cross-encoder"), ) diff --git a/tests/helpers.py b/tests/helpers.py index bf30a80d..079eb475 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -7,7 +7,7 @@ from database.models import ( ChatModelOptions, OfflineChatProcessorConversationConfig, OpenAIProcessorConversationConfig, - SearchModel, + SearchModelConfig, UserConversationConfig, Conversation, Subscription, @@ -74,7 +74,7 @@ class ConversationFactory(factory.django.DjangoModelFactory): class SearchModelFactory(factory.django.DjangoModelFactory): class Meta: - model = SearchModel + model = SearchModelConfig name = "default" model_type = "text" From 348cc0cf0e116fb27961dd546ad733128c46b4a2 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 15 Nov 2023 17:17:09 -0800 Subject: [PATCH 8/8] Use better name for DB adapter func to create user by Google token --- src/database/adapters/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/database/adapters/__init__.py b/src/database/adapters/__init__.py index 8aa71577..50f850a1 100644 --- a/src/database/adapters/__init__.py +++ b/src/database/adapters/__init__.py @@ -75,11 +75,11 @@ async def delete_khoj_token(user: KhojUser, token: str): async def get_or_create_user(token: dict) -> KhojUser: user = await get_user_by_token(token) if not user: - user = await create_user_by_token(token) + user = await create_user_by_google_token(token) return user -async def create_user_by_token(token: dict) -> KhojUser: +async def create_user_by_google_token(token: dict) -> KhojUser: user, _ = await KhojUser.objects.filter(email=token.get("email")).aupdate_or_create( defaults={"username": token.get("email"), "email": token.get("email")} )