From 4e98acbca754dc485eb257a567361da967871542 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 20 Nov 2023 14:52:37 -0800 Subject: [PATCH 01/30] Update minimum pydantic version to one with model_validate function --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a457aec6..17c11e77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "tiktoken >= 0.3.2", "tenacity >= 8.2.2", "pillow == 9.3.0", - "pydantic >= 1.10.10", + "pydantic >= 2.0.0", "pyyaml == 6.0", "rich >= 13.3.1", "schedule == 1.1.0", From d61b0dd55c0b78db6ea5cf080a03dc7f74ec9471 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 20 Nov 2023 14:55:00 -0800 Subject: [PATCH 02/30] Add Khoj Django app package to sys path to load Django module via pip install --- src/khoj/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/khoj/main.py b/src/khoj/main.py index 9fa65fc3..022efcc1 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -25,6 +25,7 @@ from django.core.asgi import get_asgi_application from django.core.management import call_command # Initialize Django +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings") django.setup() From c07401cf76ae56f298b0c312ef2347ff43245b64 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 20 Nov 2023 16:43:48 -0800 Subject: [PATCH 03/30] Fix, Improve chat config via CLI on first run by using defaults - Fix setting prompt size for online chat - generally improve chat config via cli by using default chat model, prompt size for online and offline chat --- src/khoj/utils/constants.py | 1 + src/khoj/utils/initialization.py | 44 +++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index 8a106153..ec23cddd 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -7,6 +7,7 @@ app_env_filepath = "~/.khoj/env" telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry" content_directory = "~/.khoj/content/" default_offline_chat_model = "mistral-7b-instruct-v0.1.Q4_0.gguf" +default_online_chat_model = "gpt-4" empty_config = { "search-type": { diff --git a/src/khoj/utils/initialization.py b/src/khoj/utils/initialization.py index c797f848..b2d7aad5 100644 --- a/src/khoj/utils/initialization.py +++ b/src/khoj/utils/initialization.py @@ -8,7 +8,8 @@ from database.models import ( ChatModelOptions, ) -from khoj.utils.constants import default_offline_chat_model +from khoj.utils.constants import default_offline_chat_model, default_online_chat_model +from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer from database.adapters import ConversationAdapters @@ -30,11 +31,6 @@ def initialization(): logger.info( "🗣️ Configure chat models available to your server. You can always update these at /server/admin using the credentials of your admin account" ) - try: - # Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page. - input() - except EOFError: - return try: # Note: gpt4all package is not available on all devices. @@ -47,15 +43,27 @@ def initialization(): OfflineChatProcessorConversationConfig.objects.create(enabled=True) offline_chat_model = input( - f"Enter the name of the offline chat model you want to use, based on the models in HuggingFace (press enter to use the default: {default_offline_chat_model}): " + f"Enter the offline chat model you want to use, See GPT4All for supported models (default: {default_offline_chat_model}): " ) if offline_chat_model == "": ChatModelOptions.objects.create( chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE ) else: - max_tokens = input("Enter the maximum number of tokens to use for the offline chat model:") - tokenizer = input("Enter the tokenizer to use for the offline chat model:") + default_max_tokens = model_to_prompt_size.get(offline_chat_model, 2000) + max_tokens = input( + f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):" + ) + max_tokens = max_tokens or default_max_tokens + + default_tokenizer = model_to_tokenizer.get( + offline_chat_model, "hf-internal-testing/llama-tokenizer" + ) + tokenizer = input( + f"Enter the tokenizer to use for the offline chat model (default: {default_tokenizer}):" + ) + tokenizer = tokenizer or default_tokenizer + ChatModelOptions.objects.create( chat_model=offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE, @@ -71,10 +79,19 @@ def initialization(): logger.info("🗣️ Setting up OpenAI chat model") api_key = input("Enter your OpenAI API key: ") OpenAIProcessorConversationConfig.objects.create(api_key=api_key) - openai_chat_model = input("Enter the name of the OpenAI chat model you want to use: ") - max_tokens = input("Enter the maximum number of tokens to use for the OpenAI chat model:") + + openai_chat_model = input( + f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): " + ) + openai_chat_model = openai_chat_model or default_online_chat_model + + default_max_tokens = model_to_prompt_size.get(openai_chat_model, 2000) + max_tokens = input( + f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): " + ) + max_tokens = max_tokens or default_max_tokens ChatModelOptions.objects.create( - chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_tokens=max_tokens + chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens ) logger.info("🗣️ Chat model configuration complete") @@ -94,5 +111,8 @@ def initialization(): try: _create_chat_configuration() break + # Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page. + except EOFError: + return except Exception as e: logger.error(f"🚨 Failed to create chat configuration: {e}", exc_info=True) From f142999bce155ea9c2fac72788a364de69dce9f7 Mon Sep 17 00:00:00 2001 From: Daniel Grossmann-Kavanagh Date: Mon, 20 Nov 2023 17:07:30 -0800 Subject: [PATCH 04/30] fix khoj local server usage --- src/interface/obsidian/src/main.ts | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/interface/obsidian/src/main.ts b/src/interface/obsidian/src/main.ts index 26b0a5a1..b45a7fb5 100644 --- a/src/interface/obsidian/src/main.ts +++ b/src/interface/obsidian/src/main.ts @@ -73,21 +73,19 @@ export default class Khoj extends Plugin { // Check if khoj backend is configured, note if cannot connect to backend let headers = { "Authorization": `Bearer ${this.settings.khojApiKey}` }; - if (this.settings.khojUrl === "https://app.khoj.dev") { - if (this.settings.khojApiKey === "") { - new Notice(`❗️Khoj API key is not configured. Please visit https://app.khoj.dev/config#clients to get an API key.`); - return; - } - - await request({ url: this.settings.khojUrl ,method: "GET", headers: headers }) - .then(response => { - this.settings.connectedToBackend = true; - }) - .catch(error => { - this.settings.connectedToBackend = false; - new Notice(`❗️Ensure Khoj backend is running and Khoj URL is pointing to it in the plugin settings.\n\n${error}`); - }); + if (this.settings.khojApiKey === "" && this.settings.khojUrl === "https://app.khoj.dev") { + new Notice(`❗️Khoj API key is not configured. Please visit https://app.khoj.dev/config#clients to get an API key.`); + return; } + + await request({ url: this.settings.khojUrl ,method: "GET", headers: headers }) + .then(response => { + this.settings.connectedToBackend = true; + }) + .catch(error => { + this.settings.connectedToBackend = false; + new Notice(`❗️Ensure Khoj backend is running and Khoj URL is pointing to it in the plugin settings.\n\n${error}`); + }); } async saveSettings() { From c89bd49973c3c3afe73306339b83112d8b0e811a Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 01:21:59 -0800 Subject: [PATCH 05/30] Fix ranking search results on Obsidian It's reversed since score of entries is now a distance metric on Khoj server. So lesser distance is better. Previously higher score was better --- src/interface/obsidian/src/search_modal.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interface/obsidian/src/search_modal.ts b/src/interface/obsidian/src/search_modal.ts index e841360e..51870934 100644 --- a/src/interface/obsidian/src/search_modal.ts +++ b/src/interface/obsidian/src/search_modal.ts @@ -106,7 +106,7 @@ export class KhojSearchModal extends SuggestModal { // Combine markdown and PDF results and sort them by score let results = mdData.concat(pdfData) - .sort((a: any, b: any) => b.score - a.score) + .sort((a: any, b: any) => a.score - b.score) .map((result: any) => { return { entry: result.entry, file: result.file } as SearchResult; }) this.query = query; From a474c31e02e0400a8253e7ca2c77d9de11df0a85 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 10:56:04 -0800 Subject: [PATCH 06/30] Move the django app into the src/khoj folder for better organization and functionality - Our pypi package currently does not work because the django app and associated database is not included. To remedy this issue, move the app into the src/khoj folder. This has the added benefit of improved organization of the codebase, as all server related code is now in a single folder - Update associated file paths and system references --- Dockerfile | 2 +- prod.Dockerfile | 2 +- pytest.ini | 2 +- src/{ => khoj}/app/README.md | 0 src/{ => khoj}/app/__init__.py | 0 src/{ => khoj}/app/settings.py | 4 ++-- src/{ => khoj}/app/urls.py | 0 src/{ => khoj}/app/wsgi.py | 0 src/khoj/configure.py | 6 +++--- src/{ => khoj}/database/__init__.py | 0 src/{ => khoj}/database/adapters/__init__.py | 2 +- src/{ => khoj}/database/admin.py | 2 +- src/{ => khoj}/database/apps.py | 2 +- src/{ => khoj}/database/migrations/0001_khojuser.py | 0 src/{ => khoj}/database/migrations/0002_googleuser.py | 0 src/{ => khoj}/database/migrations/0003_vector_extension.py | 0 .../database/migrations/0004_content_types_and_more.py | 0 .../database/migrations/0005_embeddings_corpus_id.py | 0 src/{ => khoj}/database/migrations/0006_embeddingsdates.py | 0 src/{ => khoj}/database/migrations/0007_add_conversation.py | 0 .../migrations/0008_alter_conversation_conversation_log.py | 0 src/{ => khoj}/database/migrations/0009_khojapiuser.py | 0 .../database/migrations/0010_chatmodeloptions_and_more.py | 0 .../migrations/0010_rename_embeddings_entry_and_more.py | 0 .../database/migrations/0011_merge_20231102_0138.py | 0 .../database/migrations/0012_entry_file_source.py | 0 src/{ => khoj}/database/migrations/0013_subscription.py | 0 .../database/migrations/0014_alter_googleuser_picture.py | 0 .../database/migrations/0015_alter_subscription_user.py | 0 .../migrations/0016_alter_subscription_renewal_date.py | 0 src/{ => khoj}/database/migrations/0017_searchmodel.py | 0 .../migrations/0018_searchmodelconfig_delete_searchmodel.py | 0 .../0019_alter_googleuser_family_name_and_more.py | 0 src/{ => khoj}/database/migrations/__init__.py | 0 src/{ => khoj}/database/models/__init__.py | 0 src/{ => khoj}/database/tests.py | 0 src/khoj/main.py | 2 +- src/{ => khoj}/manage.py | 2 +- src/khoj/migrations/migrate_server_pg.py | 2 +- src/khoj/processor/github/github_to_entries.py | 2 +- src/khoj/processor/markdown/markdown_to_entries.py | 2 +- src/khoj/processor/notion/notion_to_entries.py | 2 +- src/khoj/processor/org_mode/org_to_entries.py | 2 +- src/khoj/processor/pdf/pdf_to_entries.py | 2 +- src/khoj/processor/plaintext/plaintext_to_entries.py | 2 +- src/khoj/processor/text_to_entries.py | 4 ++-- src/khoj/routers/api.py | 6 +++--- src/khoj/routers/auth.py | 4 ++-- src/khoj/routers/helpers.py | 4 ++-- src/khoj/routers/indexer.py | 2 +- src/khoj/routers/subscription.py | 2 +- src/khoj/routers/web_client.py | 6 +++--- src/khoj/search_type/text_search.py | 4 ++-- src/khoj/utils/fs_syncer.py | 2 +- src/khoj/utils/initialization.py | 4 ++-- tests/conftest.py | 2 +- tests/helpers.py | 2 +- tests/test_client.py | 4 ++-- tests/test_multiple_users.py | 4 ++-- tests/test_openai_chat_director.py | 2 +- tests/test_plaintext_to_entries.py | 2 +- tests/test_text_search.py | 2 +- 62 files changed, 48 insertions(+), 48 deletions(-) rename src/{ => khoj}/app/README.md (100%) rename src/{ => khoj}/app/__init__.py (100%) rename src/{ => khoj}/app/settings.py (97%) rename src/{ => khoj}/app/urls.py (100%) rename src/{ => khoj}/app/wsgi.py (100%) rename src/{ => khoj}/database/__init__.py (100%) rename src/{ => khoj}/database/adapters/__init__.py (99%) rename src/{ => khoj}/database/admin.py (93%) rename src/{ => khoj}/database/apps.py (82%) rename src/{ => khoj}/database/migrations/0001_khojuser.py (100%) rename src/{ => khoj}/database/migrations/0002_googleuser.py (100%) rename src/{ => khoj}/database/migrations/0003_vector_extension.py (100%) rename src/{ => khoj}/database/migrations/0004_content_types_and_more.py (100%) rename src/{ => khoj}/database/migrations/0005_embeddings_corpus_id.py (100%) rename src/{ => khoj}/database/migrations/0006_embeddingsdates.py (100%) rename src/{ => khoj}/database/migrations/0007_add_conversation.py (100%) rename src/{ => khoj}/database/migrations/0008_alter_conversation_conversation_log.py (100%) rename src/{ => khoj}/database/migrations/0009_khojapiuser.py (100%) rename src/{ => khoj}/database/migrations/0010_chatmodeloptions_and_more.py (100%) rename src/{ => khoj}/database/migrations/0010_rename_embeddings_entry_and_more.py (100%) rename src/{ => khoj}/database/migrations/0011_merge_20231102_0138.py (100%) rename src/{ => khoj}/database/migrations/0012_entry_file_source.py (100%) rename src/{ => khoj}/database/migrations/0013_subscription.py (100%) rename src/{ => khoj}/database/migrations/0014_alter_googleuser_picture.py (100%) rename src/{ => khoj}/database/migrations/0015_alter_subscription_user.py (100%) rename src/{ => khoj}/database/migrations/0016_alter_subscription_renewal_date.py (100%) rename src/{ => khoj}/database/migrations/0017_searchmodel.py (100%) rename src/{ => khoj}/database/migrations/0018_searchmodelconfig_delete_searchmodel.py (100%) rename src/{ => khoj}/database/migrations/0019_alter_googleuser_family_name_and_more.py (100%) rename src/{ => khoj}/database/migrations/__init__.py (100%) rename src/{ => khoj}/database/models/__init__.py (100%) rename src/{ => khoj}/database/tests.py (100%) rename src/{ => khoj}/manage.py (89%) diff --git a/Dockerfile b/Dockerfile index 9882a236..4512e884 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ RUN sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && \ COPY . . # Set the PYTHONPATH environment variable in order for it to find the Django app. -ENV PYTHONPATH=/app/src:$PYTHONPATH +ENV PYTHONPATH=/app/src/khoj:$PYTHONPATH # Run the Application # There are more arguments required for the application to run, diff --git a/prod.Dockerfile b/prod.Dockerfile index 693a3a8b..a935f3c6 100644 --- a/prod.Dockerfile +++ b/prod.Dockerfile @@ -20,7 +20,7 @@ COPY . . RUN apt install vim -y # Set the PYTHONPATH environment variable in order for it to find the Django app. -ENV PYTHONPATH=/app/src:$PYTHONPATH +ENV PYTHONPATH=/app/src/khoj:$PYTHONPATH # Run the Application # There are more arguments required for the application to run, diff --git a/pytest.ini b/pytest.ini index b3e418d0..36d842ae 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -DJANGO_SETTINGS_MODULE = app.settings +DJANGO_SETTINGS_MODULE = khoj.app.settings pythonpath = . src testpaths = tests markers = diff --git a/src/app/README.md b/src/khoj/app/README.md similarity index 100% rename from src/app/README.md rename to src/khoj/app/README.md diff --git a/src/app/__init__.py b/src/khoj/app/__init__.py similarity index 100% rename from src/app/__init__.py rename to src/khoj/app/__init__.py diff --git a/src/app/settings.py b/src/khoj/app/settings.py similarity index 97% rename from src/app/settings.py rename to src/khoj/app/settings.py index 0803081d..721bcc87 100644 --- a/src/app/settings.py +++ b/src/khoj/app/settings.py @@ -14,7 +14,7 @@ from pathlib import Path import os # Build paths inside the project like this: BASE_DIR / 'subdir'. -BASE_DIR = Path(__file__).resolve().parent.parent.parent +BASE_DIR = Path(__file__).resolve().parent.parent.parent.parent # Quick-start development settings - unsuitable for production @@ -53,7 +53,7 @@ SESSION_COOKIE_SAMESITE = "None" INSTALLED_APPS = [ "django.contrib.auth", "django.contrib.contenttypes", - "database.apps.DatabaseConfig", + "khoj.database.apps.DatabaseConfig", "django.contrib.admin", "django.contrib.sessions", "django.contrib.messages", diff --git a/src/app/urls.py b/src/khoj/app/urls.py similarity index 100% rename from src/app/urls.py rename to src/khoj/app/urls.py diff --git a/src/app/wsgi.py b/src/khoj/app/wsgi.py similarity index 100% rename from src/app/wsgi.py rename to src/khoj/app/wsgi.py diff --git a/src/khoj/configure.py b/src/khoj/configure.py index 5ed92727..d34205d9 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -20,8 +20,8 @@ from starlette.authentication import ( ) # Internal Packages -from database.models import KhojUser, Subscription -from database.adapters import get_all_users, get_or_create_search_model +from khoj.database.models import KhojUser, Subscription +from khoj.database.adapters import get_all_users, get_or_create_search_model from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel from khoj.routers.indexer import configure_content, load_content, configure_search from khoj.utils import constants, state @@ -45,7 +45,7 @@ class UserAuthenticationBackend(AuthenticationBackend): def __init__( self, ): - from database.models import KhojUser, KhojApiUser + from khoj.database.models import KhojUser, KhojApiUser self.khojuser_manager = KhojUser.objects self.khojapiuser_manager = KhojApiUser.objects diff --git a/src/database/__init__.py b/src/khoj/database/__init__.py similarity index 100% rename from src/database/__init__.py rename to src/khoj/database/__init__.py diff --git a/src/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py similarity index 99% rename from src/database/adapters/__init__.py rename to src/khoj/database/adapters/__init__.py index 4141d3bb..ea0c0a85 100644 --- a/src/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -17,7 +17,7 @@ from asgiref.sync import sync_to_async from fastapi import HTTPException -from database.models import ( +from khoj.database.models import ( KhojUser, GoogleUser, KhojApiUser, diff --git a/src/database/admin.py b/src/khoj/database/admin.py similarity index 93% rename from src/database/admin.py rename to src/khoj/database/admin.py index 8d2130ba..69f15b2f 100644 --- a/src/database/admin.py +++ b/src/khoj/database/admin.py @@ -3,7 +3,7 @@ from django.contrib.auth.admin import UserAdmin # Register your models here. -from database.models import ( +from khoj.database.models import ( KhojUser, ChatModelOptions, OpenAIProcessorConversationConfig, diff --git a/src/database/apps.py b/src/khoj/database/apps.py similarity index 82% rename from src/database/apps.py rename to src/khoj/database/apps.py index a3b71b13..eed64812 100644 --- a/src/database/apps.py +++ b/src/khoj/database/apps.py @@ -3,4 +3,4 @@ from django.apps import AppConfig class DatabaseConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" - name = "database" + name = "khoj.database" diff --git a/src/database/migrations/0001_khojuser.py b/src/khoj/database/migrations/0001_khojuser.py similarity index 100% rename from src/database/migrations/0001_khojuser.py rename to src/khoj/database/migrations/0001_khojuser.py diff --git a/src/database/migrations/0002_googleuser.py b/src/khoj/database/migrations/0002_googleuser.py similarity index 100% rename from src/database/migrations/0002_googleuser.py rename to src/khoj/database/migrations/0002_googleuser.py diff --git a/src/database/migrations/0003_vector_extension.py b/src/khoj/database/migrations/0003_vector_extension.py similarity index 100% rename from src/database/migrations/0003_vector_extension.py rename to src/khoj/database/migrations/0003_vector_extension.py diff --git a/src/database/migrations/0004_content_types_and_more.py b/src/khoj/database/migrations/0004_content_types_and_more.py similarity index 100% rename from src/database/migrations/0004_content_types_and_more.py rename to src/khoj/database/migrations/0004_content_types_and_more.py diff --git a/src/database/migrations/0005_embeddings_corpus_id.py b/src/khoj/database/migrations/0005_embeddings_corpus_id.py similarity index 100% rename from src/database/migrations/0005_embeddings_corpus_id.py rename to src/khoj/database/migrations/0005_embeddings_corpus_id.py diff --git a/src/database/migrations/0006_embeddingsdates.py b/src/khoj/database/migrations/0006_embeddingsdates.py similarity index 100% rename from src/database/migrations/0006_embeddingsdates.py rename to src/khoj/database/migrations/0006_embeddingsdates.py diff --git a/src/database/migrations/0007_add_conversation.py b/src/khoj/database/migrations/0007_add_conversation.py similarity index 100% rename from src/database/migrations/0007_add_conversation.py rename to src/khoj/database/migrations/0007_add_conversation.py diff --git a/src/database/migrations/0008_alter_conversation_conversation_log.py b/src/khoj/database/migrations/0008_alter_conversation_conversation_log.py similarity index 100% rename from src/database/migrations/0008_alter_conversation_conversation_log.py rename to src/khoj/database/migrations/0008_alter_conversation_conversation_log.py diff --git a/src/database/migrations/0009_khojapiuser.py b/src/khoj/database/migrations/0009_khojapiuser.py similarity index 100% rename from src/database/migrations/0009_khojapiuser.py rename to src/khoj/database/migrations/0009_khojapiuser.py diff --git a/src/database/migrations/0010_chatmodeloptions_and_more.py b/src/khoj/database/migrations/0010_chatmodeloptions_and_more.py similarity index 100% rename from src/database/migrations/0010_chatmodeloptions_and_more.py rename to src/khoj/database/migrations/0010_chatmodeloptions_and_more.py diff --git a/src/database/migrations/0010_rename_embeddings_entry_and_more.py b/src/khoj/database/migrations/0010_rename_embeddings_entry_and_more.py similarity index 100% rename from src/database/migrations/0010_rename_embeddings_entry_and_more.py rename to src/khoj/database/migrations/0010_rename_embeddings_entry_and_more.py diff --git a/src/database/migrations/0011_merge_20231102_0138.py b/src/khoj/database/migrations/0011_merge_20231102_0138.py similarity index 100% rename from src/database/migrations/0011_merge_20231102_0138.py rename to src/khoj/database/migrations/0011_merge_20231102_0138.py diff --git a/src/database/migrations/0012_entry_file_source.py b/src/khoj/database/migrations/0012_entry_file_source.py similarity index 100% rename from src/database/migrations/0012_entry_file_source.py rename to src/khoj/database/migrations/0012_entry_file_source.py diff --git a/src/database/migrations/0013_subscription.py b/src/khoj/database/migrations/0013_subscription.py similarity index 100% rename from src/database/migrations/0013_subscription.py rename to src/khoj/database/migrations/0013_subscription.py diff --git a/src/database/migrations/0014_alter_googleuser_picture.py b/src/khoj/database/migrations/0014_alter_googleuser_picture.py similarity index 100% rename from src/database/migrations/0014_alter_googleuser_picture.py rename to src/khoj/database/migrations/0014_alter_googleuser_picture.py diff --git a/src/database/migrations/0015_alter_subscription_user.py b/src/khoj/database/migrations/0015_alter_subscription_user.py similarity index 100% rename from src/database/migrations/0015_alter_subscription_user.py rename to src/khoj/database/migrations/0015_alter_subscription_user.py diff --git a/src/database/migrations/0016_alter_subscription_renewal_date.py b/src/khoj/database/migrations/0016_alter_subscription_renewal_date.py similarity index 100% rename from src/database/migrations/0016_alter_subscription_renewal_date.py rename to src/khoj/database/migrations/0016_alter_subscription_renewal_date.py diff --git a/src/database/migrations/0017_searchmodel.py b/src/khoj/database/migrations/0017_searchmodel.py similarity index 100% rename from src/database/migrations/0017_searchmodel.py rename to src/khoj/database/migrations/0017_searchmodel.py diff --git a/src/database/migrations/0018_searchmodelconfig_delete_searchmodel.py b/src/khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py similarity index 100% rename from src/database/migrations/0018_searchmodelconfig_delete_searchmodel.py rename to src/khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py diff --git a/src/database/migrations/0019_alter_googleuser_family_name_and_more.py b/src/khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py similarity index 100% rename from src/database/migrations/0019_alter_googleuser_family_name_and_more.py rename to src/khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py diff --git a/src/database/migrations/__init__.py b/src/khoj/database/migrations/__init__.py similarity index 100% rename from src/database/migrations/__init__.py rename to src/khoj/database/migrations/__init__.py diff --git a/src/database/models/__init__.py b/src/khoj/database/models/__init__.py similarity index 100% rename from src/database/models/__init__.py rename to src/khoj/database/models/__init__.py diff --git a/src/database/tests.py b/src/khoj/database/tests.py similarity index 100% rename from src/database/tests.py rename to src/khoj/database/tests.py diff --git a/src/khoj/main.py b/src/khoj/main.py index 022efcc1..cded9841 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -26,7 +26,7 @@ from django.core.management import call_command # Initialize Django sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings") +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings") django.setup() # Initialize Django Database diff --git a/src/manage.py b/src/khoj/manage.py similarity index 89% rename from src/manage.py rename to src/khoj/manage.py index 1a64b14a..9b8f4b27 100755 --- a/src/manage.py +++ b/src/khoj/manage.py @@ -6,7 +6,7 @@ import sys def main(): """Run administrative tasks.""" - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings") + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings") try: from django.core.management import execute_from_command_line except ImportError as exc: diff --git a/src/khoj/migrations/migrate_server_pg.py b/src/khoj/migrations/migrate_server_pg.py index 434e27d7..097ae0d6 100644 --- a/src/khoj/migrations/migrate_server_pg.py +++ b/src/khoj/migrations/migrate_server_pg.py @@ -60,7 +60,7 @@ import logging from packaging import version from khoj.utils.yaml import load_config_from_file, save_config_to_file -from database.models import ( +from khoj.database.models import ( OpenAIProcessorConversationConfig, OfflineChatProcessorConversationConfig, ChatModelOptions, diff --git a/src/khoj/processor/github/github_to_entries.py b/src/khoj/processor/github/github_to_entries.py index 56279453..21344eca 100644 --- a/src/khoj/processor/github/github_to_entries.py +++ b/src/khoj/processor/github/github_to_entries.py @@ -13,7 +13,7 @@ from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig from khoj.processor.markdown.markdown_to_entries import MarkdownToEntries from khoj.processor.org_mode.org_to_entries import OrgToEntries from khoj.processor.text_to_entries import TextToEntries -from database.models import Entry as DbEntry, GithubConfig, KhojUser +from khoj.database.models import Entry as DbEntry, GithubConfig, KhojUser logger = logging.getLogger(__name__) diff --git a/src/khoj/processor/markdown/markdown_to_entries.py b/src/khoj/processor/markdown/markdown_to_entries.py index 0dd71740..08ba4a77 100644 --- a/src/khoj/processor/markdown/markdown_to_entries.py +++ b/src/khoj/processor/markdown/markdown_to_entries.py @@ -10,7 +10,7 @@ from khoj.processor.text_to_entries import TextToEntries from khoj.utils.helpers import timer from khoj.utils.constants import empty_escape_sequences from khoj.utils.rawconfig import Entry -from database.models import Entry as DbEntry, KhojUser +from khoj.database.models import Entry as DbEntry, KhojUser logger = logging.getLogger(__name__) diff --git a/src/khoj/processor/notion/notion_to_entries.py b/src/khoj/processor/notion/notion_to_entries.py index 7a88e2a1..1e3c0553 100644 --- a/src/khoj/processor/notion/notion_to_entries.py +++ b/src/khoj/processor/notion/notion_to_entries.py @@ -10,7 +10,7 @@ from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry, NotionContentConfig from khoj.processor.text_to_entries import TextToEntries from khoj.utils.rawconfig import Entry -from database.models import Entry as DbEntry, KhojUser, NotionConfig +from khoj.database.models import Entry as DbEntry, KhojUser, NotionConfig from enum import Enum diff --git a/src/khoj/processor/org_mode/org_to_entries.py b/src/khoj/processor/org_mode/org_to_entries.py index e42b7498..80ccdb08 100644 --- a/src/khoj/processor/org_mode/org_to_entries.py +++ b/src/khoj/processor/org_mode/org_to_entries.py @@ -9,7 +9,7 @@ from khoj.processor.text_to_entries import TextToEntries from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry from khoj.utils import state -from database.models import Entry as DbEntry, KhojUser +from khoj.database.models import Entry as DbEntry, KhojUser logger = logging.getLogger(__name__) diff --git a/src/khoj/processor/pdf/pdf_to_entries.py b/src/khoj/processor/pdf/pdf_to_entries.py index 3a47096a..62f94d32 100644 --- a/src/khoj/processor/pdf/pdf_to_entries.py +++ b/src/khoj/processor/pdf/pdf_to_entries.py @@ -11,7 +11,7 @@ from langchain.document_loaders import PyMuPDFLoader from khoj.processor.text_to_entries import TextToEntries from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry -from database.models import Entry as DbEntry, KhojUser +from khoj.database.models import Entry as DbEntry, KhojUser logger = logging.getLogger(__name__) diff --git a/src/khoj/processor/plaintext/plaintext_to_entries.py b/src/khoj/processor/plaintext/plaintext_to_entries.py index d42dae30..081cc327 100644 --- a/src/khoj/processor/plaintext/plaintext_to_entries.py +++ b/src/khoj/processor/plaintext/plaintext_to_entries.py @@ -9,7 +9,7 @@ from bs4 import BeautifulSoup from khoj.processor.text_to_entries import TextToEntries from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry -from database.models import Entry as DbEntry, KhojUser +from khoj.database.models import Entry as DbEntry, KhojUser logger = logging.getLogger(__name__) diff --git a/src/khoj/processor/text_to_entries.py b/src/khoj/processor/text_to_entries.py index ac42105a..109c58e6 100644 --- a/src/khoj/processor/text_to_entries.py +++ b/src/khoj/processor/text_to_entries.py @@ -13,8 +13,8 @@ from khoj.utils.helpers import is_none_or_empty, timer, batcher # Internal Packages from khoj.utils.rawconfig import Entry from khoj.search_filter.date_filter import DateFilter -from database.models import KhojUser, Entry as DbEntry, EntryDates -from database.adapters import EntryAdapters +from khoj.database.models import KhojUser, Entry as DbEntry, EntryDates +from khoj.database.adapters import EntryAdapters logger = logging.getLogger(__name__) diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 6d67fcbe..f2e5c966 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -44,9 +44,9 @@ from khoj.processor.conversation.openai.gpt import extract_questions from khoj.processor.conversation.gpt4all.chat_model import extract_questions_offline from fastapi.requests import Request -from database import adapters -from database.adapters import EntryAdapters, ConversationAdapters -from database.models import ( +from khoj.database import adapters +from khoj.database.adapters import EntryAdapters, ConversationAdapters +from khoj.database.models import ( LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, diff --git a/src/khoj/routers/auth.py b/src/khoj/routers/auth.py index a9a88325..df119548 100644 --- a/src/khoj/routers/auth.py +++ b/src/khoj/routers/auth.py @@ -15,8 +15,8 @@ from google.oauth2 import id_token from google.auth.transport import requests as google_requests # Internal Packages -from database.adapters import get_khoj_tokens, get_or_create_user, create_khoj_token, delete_khoj_token -from database.models import KhojApiUser +from khoj.database.adapters import get_khoj_tokens, get_or_create_user, create_khoj_token, delete_khoj_token +from khoj.database.models import KhojApiUser from khoj.routers.helpers import update_telemetry_state from khoj.utils import state diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 272f962d..63991263 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -18,8 +18,8 @@ from khoj.utils.helpers import ConversationCommand, log_telemetry from khoj.processor.conversation.openai.gpt import converse from khoj.processor.conversation.gpt4all.chat_model import converse_offline from khoj.processor.conversation.utils import message_to_log, ThreadedGenerator -from database.models import KhojUser, Subscription -from database.adapters import ConversationAdapters +from khoj.database.models import KhojUser, Subscription +from khoj.database.adapters import ConversationAdapters logger = logging.getLogger(__name__) diff --git a/src/khoj/routers/indexer.py b/src/khoj/routers/indexer.py index ccb65063..7a7be03e 100644 --- a/src/khoj/routers/indexer.py +++ b/src/khoj/routers/indexer.py @@ -30,7 +30,7 @@ from khoj.utils.config import ( ContentIndex, SearchModels, ) -from database.models import ( +from khoj.database.models import ( KhojUser, GithubConfig, NotionConfig, diff --git a/src/khoj/routers/subscription.py b/src/khoj/routers/subscription.py index 62e50d72..edcbc135 100644 --- a/src/khoj/routers/subscription.py +++ b/src/khoj/routers/subscription.py @@ -10,7 +10,7 @@ from starlette.authentication import requires import stripe # Internal Packages -from database import adapters +from khoj.database import adapters # Stripe integration for Khoj Cloud Subscription diff --git a/src/khoj/routers/web_client.py b/src/khoj/routers/web_client.py index f30499d8..dab16fa8 100644 --- a/src/khoj/routers/web_client.py +++ b/src/khoj/routers/web_client.py @@ -8,8 +8,8 @@ from fastapi import Request from fastapi.responses import HTMLResponse, FileResponse, RedirectResponse from fastapi.templating import Jinja2Templates from starlette.authentication import requires -from database import adapters -from database.models import KhojUser +from khoj.database import adapters +from khoj.database.models import KhojUser from khoj.utils.rawconfig import ( GithubContentConfig, GithubRepoConfig, @@ -18,7 +18,7 @@ from khoj.utils.rawconfig import ( # Internal Packages from khoj.utils import constants, state -from database.adapters import ( +from khoj.database.adapters import ( EntryAdapters, get_user_github_config, get_user_notion_config, diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py index 7e295903..a78ce522 100644 --- a/src/khoj/search_type/text_search.py +++ b/src/khoj/search_type/text_search.py @@ -19,8 +19,8 @@ from khoj.utils.state import SearchType from khoj.utils.rawconfig import SearchResponse, Entry from khoj.utils.jsonl import load_jsonl from khoj.processor.text_to_entries import TextToEntries -from database.adapters import EntryAdapters -from database.models import KhojUser, Entry as DbEntry +from khoj.database.adapters import EntryAdapters +from khoj.database.models import KhojUser, Entry as DbEntry logger = logging.getLogger(__name__) diff --git a/src/khoj/utils/fs_syncer.py b/src/khoj/utils/fs_syncer.py index fc7e4a2d..57c79d9d 100644 --- a/src/khoj/utils/fs_syncer.py +++ b/src/khoj/utils/fs_syncer.py @@ -7,7 +7,7 @@ from bs4 import BeautifulSoup from khoj.utils.helpers import get_absolute_path, is_none_or_empty from khoj.utils.rawconfig import TextContentConfig from khoj.utils.config import SearchType -from database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig +from khoj.database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig logger = logging.getLogger(__name__) diff --git a/src/khoj/utils/initialization.py b/src/khoj/utils/initialization.py index b2d7aad5..ffc4d47e 100644 --- a/src/khoj/utils/initialization.py +++ b/src/khoj/utils/initialization.py @@ -1,7 +1,7 @@ import logging import os -from database.models import ( +from khoj.database.models import ( KhojUser, OfflineChatProcessorConversationConfig, OpenAIProcessorConversationConfig, @@ -11,7 +11,7 @@ from database.models import ( from khoj.utils.constants import default_offline_chat_model, default_online_chat_model from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer -from database.adapters import ConversationAdapters +from khoj.database.adapters import ConversationAdapters logger = logging.getLogger(__name__) diff --git a/tests/conftest.py b/tests/conftest.py index 902994a0..d1df341a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,7 +26,7 @@ from khoj.utils.rawconfig import ( from khoj.utils import state, fs_syncer from khoj.routers.indexer import configure_content from khoj.processor.org_mode.org_to_entries import OrgToEntries -from database.models import ( +from khoj.database.models import ( KhojApiUser, LocalOrgConfig, LocalMarkdownConfig, diff --git a/tests/helpers.py b/tests/helpers.py index 079eb475..d0f9babc 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -1,7 +1,7 @@ import factory import os -from database.models import ( +from khoj.database.models import ( KhojUser, KhojApiUser, ChatModelOptions, diff --git a/tests/test_client.py b/tests/test_client.py index 5324e3c1..4e707e5f 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -16,8 +16,8 @@ from khoj.utils.state import search_models, content_index, config from khoj.search_type import text_search, image_search from khoj.utils.rawconfig import ContentConfig, SearchConfig from khoj.processor.org_mode.org_to_entries import OrgToEntries -from database.models import KhojUser, KhojApiUser -from database.adapters import EntryAdapters +from khoj.database.models import KhojUser, KhojApiUser +from khoj.database.adapters import EntryAdapters # Test diff --git a/tests/test_multiple_users.py b/tests/test_multiple_users.py index a94c173e..b5785350 100644 --- a/tests/test_multiple_users.py +++ b/tests/test_multiple_users.py @@ -17,8 +17,8 @@ from khoj.utils.state import search_models, content_index, config from khoj.search_type import text_search, image_search from khoj.utils.rawconfig import ContentConfig, SearchConfig from khoj.processor.org_mode.org_to_entries import OrgToEntries -from database.models import KhojUser, KhojApiUser -from database.adapters import EntryAdapters +from khoj.database.models import KhojUser, KhojApiUser +from khoj.database.adapters import EntryAdapters # ---------------------------------------------------------------------------------------------------- diff --git a/tests/test_openai_chat_director.py b/tests/test_openai_chat_director.py index 07c4e0d8..b4e63364 100644 --- a/tests/test_openai_chat_director.py +++ b/tests/test_openai_chat_director.py @@ -10,7 +10,7 @@ from khoj.processor.conversation import prompts # Internal Packages from khoj.processor.conversation.utils import message_to_log from tests.helpers import ConversationFactory -from database.models import KhojUser +from khoj.database.models import KhojUser # Initialize variables for tests api_key = os.getenv("OPENAI_API_KEY") diff --git a/tests/test_plaintext_to_entries.py b/tests/test_plaintext_to_entries.py index 23b0d652..0bbba6de 100644 --- a/tests/test_plaintext_to_entries.py +++ b/tests/test_plaintext_to_entries.py @@ -7,7 +7,7 @@ from pathlib import Path from khoj.utils.fs_syncer import get_plaintext_files from khoj.utils.rawconfig import TextContentConfig from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries -from database.models import LocalPlaintextConfig, KhojUser +from khoj.database.models import LocalPlaintextConfig, KhojUser def test_plaintext_file(tmp_path): diff --git a/tests/test_text_search.py b/tests/test_text_search.py index b4507feb..025c1241 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -13,7 +13,7 @@ from khoj.utils.rawconfig import ContentConfig, SearchConfig from khoj.processor.org_mode.org_to_entries import OrgToEntries from khoj.processor.github.github_to_entries import GithubToEntries from khoj.utils.fs_syncer import collect_files, get_org_files -from database.models import LocalOrgConfig, KhojUser, Entry, GithubConfig +from khoj.database.models import LocalOrgConfig, KhojUser, Entry, GithubConfig logger = logging.getLogger(__name__) From 71e794c26f1bef2d527474e70c76dfc58295911c Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 10:57:21 -0800 Subject: [PATCH 07/30] Remove the sys.append line in the main.py file, as it's not required --- src/khoj/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/khoj/main.py b/src/khoj/main.py index cded9841..23d36b3d 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -25,7 +25,6 @@ from django.core.asgi import get_asgi_application from django.core.management import call_command # Initialize Django -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings") django.setup() From 8932fc0c367abea6440616aaf0dee1dbfe8ff906 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 11:12:50 -0800 Subject: [PATCH 08/30] Ignore w004 check to bypass pypi warnings for check-wheel-contents - PyPi doesn't like to have files that start with numbers, however all of the generated django migration files start with numbers. To accommodate, skip this check. - Refer to https://pypi.org/project/check-wheel-contents/ for documentation and recommendation --- .github/workflows/pypi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 21792f51..0f31102d 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -48,7 +48,7 @@ jobs: - name: 🌡️ Validate Python Package run: | # Validate PyPi Package - pipx run check-wheel-contents dist/*.whl + pipx run check-wheel-contents dist/*.whl --ignore w004 pipx run twine check dist/* - name: ⏫ Upload Python Package Artifacts From a1460a5bf9da6a5955a34cd31fbcdf6a865167c2 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 11:14:40 -0800 Subject: [PATCH 09/30] Set operations to typed empty list in migration file --- src/khoj/database/migrations/0011_merge_20231102_0138.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/khoj/database/migrations/0011_merge_20231102_0138.py b/src/khoj/database/migrations/0011_merge_20231102_0138.py index 112c76a2..89a581f0 100644 --- a/src/khoj/database/migrations/0011_merge_20231102_0138.py +++ b/src/khoj/database/migrations/0011_merge_20231102_0138.py @@ -2,6 +2,8 @@ from django.db import migrations +from typing import List, Any + class Migration(migrations.Migration): dependencies = [ @@ -9,4 +11,4 @@ class Migration(migrations.Migration): ("database", "0010_rename_embeddings_entry_and_more"), ] - operations = [] + operations = List[Any] = [] From 645fd966343e6f90e4858846b3ed33dc4fead7ab Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 11:19:33 -0800 Subject: [PATCH 10/30] Search across all content types from Khoj Obsidian client Previously it was only searching for PDF and Markdown files. This was meant to show only content from current vault as results. But it has not scaled well as other clients also allow syncing PDF and markdown files now. So remove this content type filter for now. A proper solution would limit by using file/dir filters on server or client side. --- src/interface/obsidian/src/search_modal.ts | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/interface/obsidian/src/search_modal.ts b/src/interface/obsidian/src/search_modal.ts index 51870934..7e97d1ea 100644 --- a/src/interface/obsidian/src/search_modal.ts +++ b/src/interface/obsidian/src/search_modal.ts @@ -87,27 +87,18 @@ export class KhojSearchModal extends SuggestModal { } async getSuggestions(query: string): Promise { - // Query Khoj backend for search results + // Setup Query Khoj backend for search results let encodedQuery = encodeURIComponent(query); let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}&client=obsidian`; let headers = { 'Authorization': `Bearer ${this.setting.khojApiKey}` } - // Get search results for markdown and pdf files - let mdResponse = await request({ url: `${searchUrl}&t=markdown`, headers: headers }); - let pdfResponse = await request({ url: `${searchUrl}&t=pdf`, headers: headers }); + // Get search results from Khoj backend + let response = await request({ url: `${searchUrl}`, headers: headers }); // Parse search results - let mdData = JSON.parse(mdResponse) + let results = JSON.parse(response) .filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path)) - .map((result: any) => { return { entry: result.entry, score: result.score, file: result.additional.file }; }); - let pdfData = JSON.parse(pdfResponse) - .filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path)) - .map((result: any) => { return { entry: `## ${result.additional.compiled}`, score: result.score, file: result.additional.file } as SearchResult; }) - - // Combine markdown and PDF results and sort them by score - let results = mdData.concat(pdfData) - .sort((a: any, b: any) => a.score - b.score) - .map((result: any) => { return { entry: result.entry, file: result.file } as SearchResult; }) + .map((result: any) => { return { entry: result.entry, file: result.additional.file } as SearchResult; }); this.query = query; return results; From 333cb3445c1cf051ad2cdf9087c6bb65c8a7c92c Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 11:28:51 -0800 Subject: [PATCH 11/30] Use colon rather than equals to indicate typing --- src/khoj/database/migrations/0011_merge_20231102_0138.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/khoj/database/migrations/0011_merge_20231102_0138.py b/src/khoj/database/migrations/0011_merge_20231102_0138.py index 89a581f0..0206036e 100644 --- a/src/khoj/database/migrations/0011_merge_20231102_0138.py +++ b/src/khoj/database/migrations/0011_merge_20231102_0138.py @@ -11,4 +11,4 @@ class Migration(migrations.Migration): ("database", "0010_rename_embeddings_entry_and_more"), ] - operations = List[Any] = [] + operations: List[Any] = [] From 38144a7a69134821b8888d98aceb92f0b19fc32e Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 11:33:07 -0800 Subject: [PATCH 12/30] pull_request path should be src/khoj rather than src/ --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 697579da..51301491 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,7 +3,7 @@ name: test on: pull_request: paths: - - src/** + - src/khoj/** - tests/** - config/** - pyproject.toml From 61f6b8c0d4334f74cb5d6b1db926044e2c7154bc Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 11:33:43 -0800 Subject: [PATCH 13/30] Ignore-check step failed due to unrecognized code. Try using capital letters for indicator --- .github/workflows/pypi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 0f31102d..1ac735aa 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -48,7 +48,7 @@ jobs: - name: 🌡️ Validate Python Package run: | # Validate PyPi Package - pipx run check-wheel-contents dist/*.whl --ignore w004 + pipx run check-wheel-contents dist/*.whl --ignore W004 pipx run twine check dist/* - name: ⏫ Upload Python Package Artifacts From 244b76ffedddb4dfbf172910727de4264a9d4e98 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 11:27:12 -0800 Subject: [PATCH 14/30] =?UTF-8?q?Add=20isort=20for=20automatic=20import=20?= =?UTF-8?q?sorting=20and=20skip=20main.py=20because=20it's=20a=20drama=20q?= =?UTF-8?q?ueen=20=F0=9F=91=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 6 ++++++ src/khoj/main.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 77d4ec6c..b7253585 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,12 @@ repos: hooks: - id: black +- repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + name: isort (python) + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: diff --git a/src/khoj/main.py b/src/khoj/main.py index 23d36b3d..c5e1f277 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -1,3 +1,7 @@ +""" Main module for Khoj Assistant + isort:skip_file +""" + # Standard Packages import os import sys From 19e042037a0e99f8efd551468a8ac2a447d35759 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 12:48:12 -0800 Subject: [PATCH 15/30] Run isort with black profile to avoid conflicts between the two --- .pre-commit-config.yaml | 1 + pyproject.toml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b7253585..e5fc54cc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,6 +9,7 @@ repos: hooks: - id: isort name: isort (python) + args: ["--profile", "black", "--filter-files"] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 diff --git a/pyproject.toml b/pyproject.toml index 17c11e77..63a50fac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,6 +128,9 @@ warn_unused_ignores = false [tool.black] line-length = 120 +[tool.isort] +profile = "black" + [tool.pytest.ini_options] addopts = "--strict-markers" markers = [ From 341abf03ff0ba3e311ca7bab55060272f4437eed Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 12:55:09 -0800 Subject: [PATCH 16/30] Handle none for search_type and use equals comparator rather than in for determining Notion type --- src/khoj/routers/indexer.py | 42 +++++++++++++------------------------ 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/src/khoj/routers/indexer.py b/src/khoj/routers/indexer.py index 7a7be03e..9362c4b3 100644 --- a/src/khoj/routers/indexer.py +++ b/src/khoj/routers/indexer.py @@ -1,40 +1,25 @@ -# Standard Packages -import logging -from typing import Optional, Union, Dict import asyncio +import logging +from typing import Dict, Optional, Union -# External Packages from fastapi import APIRouter, Header, Request, Response, UploadFile from pydantic import BaseModel from starlette.authentication import requires -# Internal Packages -from khoj.utils import state, constants +from khoj.database.models import GithubConfig, KhojUser, NotionConfig +from khoj.processor.github.github_to_entries import GithubToEntries from khoj.processor.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.notion.notion_to_entries import NotionToEntries from khoj.processor.org_mode.org_to_entries import OrgToEntries from khoj.processor.pdf.pdf_to_entries import PdfToEntries -from khoj.processor.github.github_to_entries import GithubToEntries -from khoj.processor.notion.notion_to_entries import NotionToEntries from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries -from khoj.search_type import text_search, image_search from khoj.routers.helpers import update_telemetry_state -from khoj.utils.yaml import save_config_to_file_updated_state -from khoj.utils.config import SearchModels +from khoj.search_type import image_search, text_search +from khoj.utils import constants, state +from khoj.utils.config import ContentIndex, SearchModels from khoj.utils.helpers import LRU, get_file_type -from khoj.utils.rawconfig import ( - ContentConfig, - FullConfig, - SearchConfig, -) -from khoj.utils.config import ( - ContentIndex, - SearchModels, -) -from khoj.database.models import ( - KhojUser, - GithubConfig, - NotionConfig, -) +from khoj.utils.rawconfig import ContentConfig, FullConfig, SearchConfig +from khoj.utils.yaml import save_config_to_file_updated_state logger = logging.getLogger(__name__) @@ -189,6 +174,9 @@ def configure_content( content_index = ContentIndex() success = True + if t == None: + t = state.SearchType.All + if t is not None and t in [type.value for type in state.SearchType]: t = state.SearchType(t) @@ -315,7 +303,7 @@ def configure_content( # Initialize Notion Search notion_config = NotionConfig.objects.filter(user=user).first() if ( - search_type == state.SearchType.All.value or search_type in state.SearchType.Notion.value + search_type == state.SearchType.All.value or search_type == state.SearchType.Notion.value ) and notion_config: logger.info("🔌 Setting up search for notion") text_search.setup( @@ -328,7 +316,7 @@ def configure_content( ) except Exception as e: - logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True) + logger.error(f"🚨 Failed to setup Notion: {e}", exc_info=True) success = False # Invalidate Query Cache From 6d9091bef5bf260f0601e69319bf8e6bb3d5c9d3 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 13:02:57 -0800 Subject: [PATCH 17/30] Disable isort for now --- .pre-commit-config.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e5fc54cc..77d4ec6c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,13 +4,6 @@ repos: hooks: - id: black -- repo: https://github.com/pycqa/isort - rev: 5.12.0 - hooks: - - id: isort - name: isort (python) - args: ["--profile", "black", "--filter-files"] - - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: From b06628ee31f875aed0217d0d6a90fa2e5ddbfa9f Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 20 Nov 2023 22:10:24 -0800 Subject: [PATCH 18/30] Format Django initializing outputs using Khoj logger format - Collect STDOUT from the `migrate', `collectstatic' commands and output using the Khoj logger format and verbosity settings - Only show Django `collectstatic' command output in verbose mode - Fix showing the Initializing Khoj log line by moving it after logger level set --- src/khoj/app/settings.py | 2 +- src/khoj/main.py | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/khoj/app/settings.py b/src/khoj/app/settings.py index 721bcc87..97a058da 100644 --- a/src/khoj/app/settings.py +++ b/src/khoj/app/settings.py @@ -24,7 +24,7 @@ BASE_DIR = Path(__file__).resolve().parent.parent.parent.parent SECRET_KEY = os.getenv("KHOJ_DJANGO_SECRET_KEY") # SECURITY WARNING: don't run with debug turned on in production! -DEBUG = os.getenv("KHOJ_DEBUG", "False") == "True" +DEBUG = os.getenv("KHOJ_DEBUG") == "True" ALLOWED_HOSTS = [".khoj.dev", "localhost", "127.0.0.1", "[::1]", "beta.khoj.dev"] diff --git a/src/khoj/main.py b/src/khoj/main.py index c5e1f277..b832151c 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -3,6 +3,8 @@ """ # Standard Packages +from contextlib import redirect_stdout +import io import os import sys import locale @@ -33,10 +35,14 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings") django.setup() # Initialize Django Database -call_command("migrate", "--noinput") +db_migrate_output = io.StringIO() +with redirect_stdout(db_migrate_output): + call_command("migrate", "--noinput") # Initialize Django Static Files -call_command("collectstatic", "--noinput") +collectstatic_output = io.StringIO() +with redirect_stdout(collectstatic_output): + call_command("collectstatic", "--noinput") # Initialize the Application Server app = FastAPI() @@ -79,14 +85,16 @@ def run(should_start_server=True): args = cli(state.cli_args) set_state(args) - logger.info(f"🚒 Initializing Khoj v{state.khoj_version}") - # Set Logging Level if args.verbose == 0: logger.setLevel(logging.INFO) elif args.verbose >= 1: logger.setLevel(logging.DEBUG) + logger.info(f"🚒 Initializing Khoj v{state.khoj_version}") + logger.info(f"📦 Initializing DB:\n{db_migrate_output.getvalue().strip()}") + logger.debug(f"🌍 Initializing Web Client:\n{collectstatic_output.getvalue().strip()}") + initialization() # Create app directory, if it doesn't exist From 4aec58130674959512b8c73dc350d3435cf78177 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 20 Nov 2023 22:40:00 -0800 Subject: [PATCH 19/30] Handle image search setup related warning Ideally should rename model_directory to config_directory or some such but the current image search code will need to be migrated soon. So changing the variable name and creating a migration script for old khoj.yml files using model-directory variable isn't worth it Remove the explicity set of number of threads to use by pytorch. Use the default used by it. --- src/khoj/search_type/image_search.py | 4 ---- src/khoj/utils/rawconfig.py | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/khoj/search_type/image_search.py b/src/khoj/search_type/image_search.py index 8c0a3cdb..fd5107ad 100644 --- a/src/khoj/search_type/image_search.py +++ b/src/khoj/search_type/image_search.py @@ -12,7 +12,6 @@ from sentence_transformers import SentenceTransformer, util from PIL import Image from tqdm import trange import torch -from khoj.utils import state # Internal Packages from khoj.utils.helpers import get_absolute_path, get_from_dict, resolve_absolute_path, load_model, timer @@ -26,9 +25,6 @@ logger = logging.getLogger(__name__) def initialize_model(search_config: ImageSearchConfig): - # Initialize Model - torch.set_num_threads(4) - # Convert model directory to absolute path search_config.model_directory = resolve_absolute_path(search_config.model_directory) diff --git a/src/khoj/utils/rawconfig.py b/src/khoj/utils/rawconfig.py index 4c97aedd..d36a36ff 100644 --- a/src/khoj/utils/rawconfig.py +++ b/src/khoj/utils/rawconfig.py @@ -72,6 +72,9 @@ class ImageSearchConfig(ConfigBase): encoder_type: Optional[str] = None model_directory: Optional[Path] = None + class Config: + protected_namespaces = () + class SearchConfig(ConfigBase): image: Optional[ImageSearchConfig] = None From 3f0de45ec68c189ebaa2215e3198491d99020306 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 02:03:33 -0800 Subject: [PATCH 20/30] Pass file source to clients via text search API response Source of entry stored in DB is now passed to clients for processing --- src/khoj/search_type/text_search.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py index a78ce522..ca0ac6d3 100644 --- a/src/khoj/search_type/text_search.py +++ b/src/khoj/search_type/text_search.py @@ -147,6 +147,7 @@ def collate_results(hits, dedupe=True): "score": hit.distance, "corpus_id": str(hit.corpus_id), "additional": { + "source": hit.file_source, "file": hit.file_path, "compiled": hit.compiled, "heading": hit.heading, @@ -169,6 +170,7 @@ def deduplicated_search_responses(hits: List[SearchResponse]): "score": hit.score, "corpus_id": hit.corpus_id, "additional": { + "source": hit.additional["source"], "file": hit.additional["file"], "compiled": hit.additional["compiled"], "heading": hit.additional["heading"], From befcbcdd5d940b60b3e62a1ccce74dc56f234141 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 02:06:00 -0800 Subject: [PATCH 21/30] Use file source to find entries from github, notion on web, desktop client This is a more robust mechanism of identification than via file name including github or notion domain names --- src/interface/desktop/search.html | 6 +++--- src/khoj/interface/web/search.html | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/interface/desktop/search.html b/src/interface/desktop/search.html index aa8aa662..ce368b9b 100644 --- a/src/interface/desktop/search.html +++ b/src/interface/desktop/search.html @@ -112,14 +112,14 @@ } else if ( item.additional.file.endsWith(".md") || item.additional.file.endsWith(".markdown") || - (item.additional.file.includes("issues") && item.additional.file.includes("github.com")) || - (item.additional.file.includes("commit") && item.additional.file.includes("github.com")) + (item.additional.file.includes("issues") && item.additional.source === "github") || + (item.additional.file.includes("commit") && item.additional.source === "github") ) { html += render_markdown(query, [item]); } else if (item.additional.file.endsWith(".pdf")) { html += render_pdf(query, [item]); - } else if (item.additional.file.includes("notion.so")) { + } else if (item.additional.source == "notion") { html += `
` + `${item.additional.heading}` + `

${item.entry}

` + `
`; } else if (item.additional.file.endsWith(".html")) { html += render_html(query, [item]); diff --git a/src/khoj/interface/web/search.html b/src/khoj/interface/web/search.html index 5331ea92..d3ddb595 100644 --- a/src/khoj/interface/web/search.html +++ b/src/khoj/interface/web/search.html @@ -112,14 +112,14 @@ } else if ( item.additional.file.endsWith(".md") || item.additional.file.endsWith(".markdown") || - (item.additional.file.includes("issues") && item.additional.file.includes("github.com")) || - (item.additional.file.includes("commit") && item.additional.file.includes("github.com")) + (item.additional.file.includes("issues") && item.additional.source === "github") || + (item.additional.file.includes("commit") && item.additional.source === "github") ) { html += render_markdown(query, [item]); } else if (item.additional.file.endsWith(".pdf")) { html += render_pdf(query, [item]); - } else if (item.additional.file.includes("notion.so")) { + } else if (item.additional.source === "notion") { html += `
` + `${item.additional.heading}` + `

${item.entry}

` + `
`; } else if (item.additional.file.endsWith(".html")) { html += render_html(query, [item]); From 90d463c12a20da16c10e6b2d99c3336cd9ffa5c3 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 02:16:38 -0800 Subject: [PATCH 22/30] Append chat message to chat logs as TextNodes in web, desktop clients --- src/interface/desktop/chat.html | 3 ++- src/khoj/interface/web/chat.html | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/interface/desktop/chat.html b/src/interface/desktop/chat.html index ebf93195..4997ef99 100644 --- a/src/interface/desktop/chat.html +++ b/src/interface/desktop/chat.html @@ -74,7 +74,8 @@ // Create a new div for the chat message text and append it to the chat message let chatMessageText = document.createElement('div'); chatMessageText.className = `chat-message-text ${by}`; - chatMessageText.innerHTML = formattedMessage; + let textNode = document.createTextNode(formattedMessage); + chatMessageText.appendChild(textNode); chatMessage.appendChild(chatMessageText); // Append annotations div to the chat message diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 82e3233d..1c661a92 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -83,7 +83,8 @@ To get started, just start typing below. You can also type / to see a list of co // Create a new div for the chat message text and append it to the chat message let chatMessageText = document.createElement('div'); chatMessageText.className = `chat-message-text ${by}`; - chatMessageText.innerHTML = formattedMessage; + let textNode = document.createTextNode(formattedMessage); + chatMessageText.appendChild(textNode); chatMessage.appendChild(chatMessageText); // Append annotations div to the chat message From 76d041f633967efdc4777f5621917a77b4af2147 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 13:11:45 -0800 Subject: [PATCH 23/30] Use KHOJ_HOST env var to set allowed/trusted domains to host Khoj Allows hosting Khoj behind other, non "khoj.dev" domains --- src/khoj/app/settings.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/khoj/app/settings.py b/src/khoj/app/settings.py index 97a058da..1126c92e 100644 --- a/src/khoj/app/settings.py +++ b/src/khoj/app/settings.py @@ -26,13 +26,13 @@ SECRET_KEY = os.getenv("KHOJ_DJANGO_SECRET_KEY") # SECURITY WARNING: don't run with debug turned on in production! DEBUG = os.getenv("KHOJ_DEBUG") == "True" -ALLOWED_HOSTS = [".khoj.dev", "localhost", "127.0.0.1", "[::1]", "beta.khoj.dev"] +# All Subdomains of KHOJ_DOMAIN are trusted +KHOJ_DOMAIN = os.getenv("KHOJ_DOMAIN", "khoj.dev") +ALLOWED_HOSTS = [f".{KHOJ_DOMAIN}", "localhost", "127.0.0.1", "[::1]"] CSRF_TRUSTED_ORIGINS = [ - "https://app.khoj.dev", - "https://beta.khoj.dev", - "https://khoj.dev", - "https://*.khoj.dev", + f"https://*.{KHOJ_DOMAIN}", + f"https://{KHOJ_DOMAIN}", ] COOKIE_SAMESITE = "None" @@ -40,8 +40,8 @@ if DEBUG: SESSION_COOKIE_DOMAIN = "localhost" CSRF_COOKIE_DOMAIN = "localhost" else: - SESSION_COOKIE_DOMAIN = "khoj.dev" - CSRF_COOKIE_DOMAIN = "khoj.dev" + SESSION_COOKIE_DOMAIN = KHOJ_DOMAIN + CSRF_COOKIE_DOMAIN = KHOJ_DOMAIN SESSION_COOKIE_SECURE = True CSRF_COOKIE_SECURE = True From 5469e81a87a423332f9e194510de30b4048f56e3 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 13:44:45 -0800 Subject: [PATCH 24/30] Use full path for the static directory in FastAPI and reflect deeper nesting of the django app --- src/khoj/app/settings.py | 4 ++-- src/khoj/main.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/khoj/app/settings.py b/src/khoj/app/settings.py index 721bcc87..1b92cc02 100644 --- a/src/khoj/app/settings.py +++ b/src/khoj/app/settings.py @@ -14,7 +14,7 @@ from pathlib import Path import os # Build paths inside the project like this: BASE_DIR / 'subdir'. -BASE_DIR = Path(__file__).resolve().parent.parent.parent.parent +BASE_DIR = Path(__file__).resolve().parent.parent # Quick-start development settings - unsuitable for production @@ -143,7 +143,7 @@ USE_TZ = True # https://docs.djangoproject.com/en/4.2/howto/static-files/ STATIC_ROOT = BASE_DIR / "static" -STATICFILES_DIRS = [BASE_DIR / "src/khoj/interface/web"] +STATICFILES_DIRS = [BASE_DIR / "interface/web"] STATIC_URL = "/static/" # Default primary key field type diff --git a/src/khoj/main.py b/src/khoj/main.py index c5e1f277..67e840b2 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -107,10 +107,10 @@ def run(should_start_server=True): # Mount Django and Static Files app.mount("/server", django_app, name="server") - static_dir = "static" + static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static") if not os.path.exists(static_dir): os.mkdir(static_dir) - app.mount(f"/{static_dir}", StaticFiles(directory=static_dir), name=static_dir) + app.mount(f"/static", StaticFiles(directory=static_dir), name=static_dir) # Configure Middleware configure_middleware(app) From 9e736d4340f6fcc7852d50990d9d788fd224d3b4 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 14:02:04 -0800 Subject: [PATCH 25/30] Use KHOJ_DOMAIN for CORS allow_origins list as well - Default to app.khoj.dev - Remove unnecesary any_path regex in allow_origins. It only cares about host, paths are not set in origin header --- src/khoj/main.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/khoj/main.py b/src/khoj/main.py index b832151c..89560a3d 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -51,9 +51,16 @@ app = FastAPI() django_app = get_asgi_application() # Add CORS middleware +KHOJ_DOMAIN = os.getenv("KHOJ_DOMAIN", "app.khoj.dev") app.add_middleware( CORSMiddleware, - allow_origins=["app://obsidian.md", "http://localhost:*", "https://app.khoj.dev/*", "app://khoj.dev"], + allow_origins=[ + "app://obsidian.md", + "http://localhost:*", + "http://127.0.0.1:*", + f"https://{KHOJ_DOMAIN}", + "app://khoj.dev", + ], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], From 458e794d001b28122933896e119115c34f7df9de Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 14:40:57 -0800 Subject: [PATCH 26/30] Revert PYTHONPATH to what it was before --- prod.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prod.Dockerfile b/prod.Dockerfile index a935f3c6..693a3a8b 100644 --- a/prod.Dockerfile +++ b/prod.Dockerfile @@ -20,7 +20,7 @@ COPY . . RUN apt install vim -y # Set the PYTHONPATH environment variable in order for it to find the Django app. -ENV PYTHONPATH=/app/src/khoj:$PYTHONPATH +ENV PYTHONPATH=/app/src:$PYTHONPATH # Run the Application # There are more arguments required for the application to run, From 4cdfe8fc4fa94aa9f0841e2220f400d49f8ea801 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 21 Nov 2023 16:33:48 -0800 Subject: [PATCH 27/30] Re-enable Khoj Obsidian plugin for Mobile, as Khoj cloud is available --- manifest.json | 2 +- src/interface/obsidian/manifest.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/manifest.json b/manifest.json index 4d019834..e0a0a9f5 100644 --- a/manifest.json +++ b/manifest.json @@ -6,5 +6,5 @@ "description": "An AI copilot for your Second Brain", "author": "Khoj Inc.", "authorUrl": "https://github.com/khoj-ai", - "isDesktopOnly": true + "isDesktopOnly": false } diff --git a/src/interface/obsidian/manifest.json b/src/interface/obsidian/manifest.json index 4d019834..e0a0a9f5 100644 --- a/src/interface/obsidian/manifest.json +++ b/src/interface/obsidian/manifest.json @@ -6,5 +6,5 @@ "description": "An AI copilot for your Second Brain", "author": "Khoj Inc.", "authorUrl": "https://github.com/khoj-ai", - "isDesktopOnly": true + "isDesktopOnly": false } From 1e2af083f0efe88aba29801464b6b0d727ae922f Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 22:11:32 -0800 Subject: [PATCH 28/30] Rename the data_sources module to content --- .../processor/{data_sources => content}/__init__.py | 0 .../{data_sources => content}/github/__init__.py | 0 .../github/github_to_entries.py | 4 ++-- .../{data_sources => content}/markdown/__init__.py | 0 .../markdown/markdown_to_entries.py | 0 .../notion/notion_to_entries.py | 0 .../{data_sources => content}/org_mode/__init__.py | 0 .../org_mode/org_to_entries.py | 2 +- .../{data_sources => content}/org_mode/orgnode.py | 0 .../{data_sources => content}/pdf/__init__.py | 0 .../{data_sources => content}/pdf/pdf_to_entries.py | 0 .../{data_sources => content}/plaintext/__init__.py | 0 .../plaintext/plaintext_to_entries.py | 0 src/khoj/routers/indexer.py | 12 ++++++------ tests/conftest.py | 4 ++-- tests/test_client.py | 2 +- tests/test_markdown_to_entries.py | 2 +- tests/test_multiple_users.py | 2 +- tests/test_org_to_entries.py | 2 +- tests/test_orgnode.py | 2 +- tests/test_pdf_to_entries.py | 2 +- tests/test_plaintext_to_entries.py | 2 +- tests/test_text_search.py | 4 ++-- 23 files changed, 20 insertions(+), 20 deletions(-) rename src/khoj/processor/{data_sources => content}/__init__.py (100%) rename src/khoj/processor/{data_sources => content}/github/__init__.py (100%) rename src/khoj/processor/{data_sources => content}/github/github_to_entries.py (98%) rename src/khoj/processor/{data_sources => content}/markdown/__init__.py (100%) rename src/khoj/processor/{data_sources => content}/markdown/markdown_to_entries.py (100%) rename src/khoj/processor/{data_sources => content}/notion/notion_to_entries.py (100%) rename src/khoj/processor/{data_sources => content}/org_mode/__init__.py (100%) rename src/khoj/processor/{data_sources => content}/org_mode/org_to_entries.py (99%) rename src/khoj/processor/{data_sources => content}/org_mode/orgnode.py (100%) rename src/khoj/processor/{data_sources => content}/pdf/__init__.py (100%) rename src/khoj/processor/{data_sources => content}/pdf/pdf_to_entries.py (100%) rename src/khoj/processor/{data_sources => content}/plaintext/__init__.py (100%) rename src/khoj/processor/{data_sources => content}/plaintext/plaintext_to_entries.py (100%) diff --git a/src/khoj/processor/data_sources/__init__.py b/src/khoj/processor/content/__init__.py similarity index 100% rename from src/khoj/processor/data_sources/__init__.py rename to src/khoj/processor/content/__init__.py diff --git a/src/khoj/processor/data_sources/github/__init__.py b/src/khoj/processor/content/github/__init__.py similarity index 100% rename from src/khoj/processor/data_sources/github/__init__.py rename to src/khoj/processor/content/github/__init__.py diff --git a/src/khoj/processor/data_sources/github/github_to_entries.py b/src/khoj/processor/content/github/github_to_entries.py similarity index 98% rename from src/khoj/processor/data_sources/github/github_to_entries.py rename to src/khoj/processor/content/github/github_to_entries.py index 05d18d8a..b0854850 100644 --- a/src/khoj/processor/data_sources/github/github_to_entries.py +++ b/src/khoj/processor/content/github/github_to_entries.py @@ -9,8 +9,8 @@ import requests from khoj.database.models import Entry as DbEntry from khoj.database.models import GithubConfig, KhojUser -from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries -from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries +from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.content.org_mode.org_to_entries import OrgToEntries from khoj.processor.text_to_entries import TextToEntries # Internal Packages diff --git a/src/khoj/processor/data_sources/markdown/__init__.py b/src/khoj/processor/content/markdown/__init__.py similarity index 100% rename from src/khoj/processor/data_sources/markdown/__init__.py rename to src/khoj/processor/content/markdown/__init__.py diff --git a/src/khoj/processor/data_sources/markdown/markdown_to_entries.py b/src/khoj/processor/content/markdown/markdown_to_entries.py similarity index 100% rename from src/khoj/processor/data_sources/markdown/markdown_to_entries.py rename to src/khoj/processor/content/markdown/markdown_to_entries.py diff --git a/src/khoj/processor/data_sources/notion/notion_to_entries.py b/src/khoj/processor/content/notion/notion_to_entries.py similarity index 100% rename from src/khoj/processor/data_sources/notion/notion_to_entries.py rename to src/khoj/processor/content/notion/notion_to_entries.py diff --git a/src/khoj/processor/data_sources/org_mode/__init__.py b/src/khoj/processor/content/org_mode/__init__.py similarity index 100% rename from src/khoj/processor/data_sources/org_mode/__init__.py rename to src/khoj/processor/content/org_mode/__init__.py diff --git a/src/khoj/processor/data_sources/org_mode/org_to_entries.py b/src/khoj/processor/content/org_mode/org_to_entries.py similarity index 99% rename from src/khoj/processor/data_sources/org_mode/org_to_entries.py rename to src/khoj/processor/content/org_mode/org_to_entries.py index 9e0d8e54..37c569e9 100644 --- a/src/khoj/processor/data_sources/org_mode/org_to_entries.py +++ b/src/khoj/processor/content/org_mode/org_to_entries.py @@ -7,7 +7,7 @@ from khoj.database.models import Entry as DbEntry from khoj.database.models import KhojUser # Internal Packages -from khoj.processor.data_sources.org_mode import orgnode +from khoj.processor.content.org_mode import orgnode from khoj.processor.text_to_entries import TextToEntries from khoj.utils import state from khoj.utils.helpers import timer diff --git a/src/khoj/processor/data_sources/org_mode/orgnode.py b/src/khoj/processor/content/org_mode/orgnode.py similarity index 100% rename from src/khoj/processor/data_sources/org_mode/orgnode.py rename to src/khoj/processor/content/org_mode/orgnode.py diff --git a/src/khoj/processor/data_sources/pdf/__init__.py b/src/khoj/processor/content/pdf/__init__.py similarity index 100% rename from src/khoj/processor/data_sources/pdf/__init__.py rename to src/khoj/processor/content/pdf/__init__.py diff --git a/src/khoj/processor/data_sources/pdf/pdf_to_entries.py b/src/khoj/processor/content/pdf/pdf_to_entries.py similarity index 100% rename from src/khoj/processor/data_sources/pdf/pdf_to_entries.py rename to src/khoj/processor/content/pdf/pdf_to_entries.py diff --git a/src/khoj/processor/data_sources/plaintext/__init__.py b/src/khoj/processor/content/plaintext/__init__.py similarity index 100% rename from src/khoj/processor/data_sources/plaintext/__init__.py rename to src/khoj/processor/content/plaintext/__init__.py diff --git a/src/khoj/processor/data_sources/plaintext/plaintext_to_entries.py b/src/khoj/processor/content/plaintext/plaintext_to_entries.py similarity index 100% rename from src/khoj/processor/data_sources/plaintext/plaintext_to_entries.py rename to src/khoj/processor/content/plaintext/plaintext_to_entries.py diff --git a/src/khoj/routers/indexer.py b/src/khoj/routers/indexer.py index 6fcf3348..0432eed0 100644 --- a/src/khoj/routers/indexer.py +++ b/src/khoj/routers/indexer.py @@ -7,12 +7,12 @@ from pydantic import BaseModel from starlette.authentication import requires from khoj.database.models import GithubConfig, KhojUser, NotionConfig -from khoj.processor.data_sources.github.github_to_entries import GithubToEntries -from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries -from khoj.processor.data_sources.notion.notion_to_entries import NotionToEntries -from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries -from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries -from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries +from khoj.processor.content.github.github_to_entries import GithubToEntries +from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.content.notion.notion_to_entries import NotionToEntries +from khoj.processor.content.org_mode.org_to_entries import OrgToEntries +from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries +from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries from khoj.routers.helpers import update_telemetry_state from khoj.search_type import image_search, text_search from khoj.utils import constants, state diff --git a/tests/conftest.py b/tests/conftest.py index c18c5f80..54c664d5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,8 +18,8 @@ from khoj.database.models import ( LocalOrgConfig, LocalPlaintextConfig, ) -from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries -from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries +from khoj.processor.content.org_mode.org_to_entries import OrgToEntries +from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel from khoj.routers.indexer import configure_content from khoj.search_type import image_search, text_search diff --git a/tests/test_client.py b/tests/test_client.py index aecd0498..19aba03b 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -13,7 +13,7 @@ from PIL import Image from khoj.configure import configure_routes, configure_search_types from khoj.database.adapters import EntryAdapters from khoj.database.models import KhojApiUser, KhojUser -from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries +from khoj.processor.content.org_mode.org_to_entries import OrgToEntries from khoj.search_type import image_search, text_search from khoj.utils import state from khoj.utils.rawconfig import ContentConfig, SearchConfig diff --git a/tests/test_markdown_to_entries.py b/tests/test_markdown_to_entries.py index 9ec88382..b20040f9 100644 --- a/tests/test_markdown_to_entries.py +++ b/tests/test_markdown_to_entries.py @@ -4,7 +4,7 @@ from pathlib import Path import os # Internal Packages -from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries from khoj.utils.fs_syncer import get_markdown_files from khoj.utils.rawconfig import TextContentConfig diff --git a/tests/test_multiple_users.py b/tests/test_multiple_users.py index d2d7737b..dae881bf 100644 --- a/tests/test_multiple_users.py +++ b/tests/test_multiple_users.py @@ -5,7 +5,7 @@ from urllib.parse import quote import pytest from khoj.database.models import KhojApiUser, KhojUser -from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries +from khoj.processor.content.org_mode.org_to_entries import OrgToEntries # Internal Packages from khoj.search_type import text_search diff --git a/tests/test_org_to_entries.py b/tests/test_org_to_entries.py index fb37426a..742c9f8e 100644 --- a/tests/test_org_to_entries.py +++ b/tests/test_org_to_entries.py @@ -3,7 +3,7 @@ import json import os # Internal Packages -from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries +from khoj.processor.content.org_mode.org_to_entries import OrgToEntries from khoj.processor.text_to_entries import TextToEntries from khoj.utils.helpers import is_none_or_empty from khoj.utils.rawconfig import Entry diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py index aa6a3cb9..a543ceb5 100644 --- a/tests/test_orgnode.py +++ b/tests/test_orgnode.py @@ -2,7 +2,7 @@ import datetime # Internal Packages -from khoj.processor.data_sources.org_mode import orgnode +from khoj.processor.content.org_mode import orgnode # Test diff --git a/tests/test_pdf_to_entries.py b/tests/test_pdf_to_entries.py index ebf53025..9aeb438f 100644 --- a/tests/test_pdf_to_entries.py +++ b/tests/test_pdf_to_entries.py @@ -3,7 +3,7 @@ import json import os # Internal Packages -from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries +from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries from khoj.utils.fs_syncer import get_pdf_files from khoj.utils.rawconfig import TextContentConfig diff --git a/tests/test_plaintext_to_entries.py b/tests/test_plaintext_to_entries.py index 393fac17..d3d50c94 100644 --- a/tests/test_plaintext_to_entries.py +++ b/tests/test_plaintext_to_entries.py @@ -4,7 +4,7 @@ import os from pathlib import Path from khoj.database.models import KhojUser, LocalPlaintextConfig -from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries +from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries # Internal Packages from khoj.utils.fs_syncer import get_plaintext_files diff --git a/tests/test_text_search.py b/tests/test_text_search.py index f19903ae..186ef57c 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -8,8 +8,8 @@ from pathlib import Path import pytest from khoj.database.models import Entry, GithubConfig, KhojUser, LocalOrgConfig -from khoj.processor.data_sources.github.github_to_entries import GithubToEntries -from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries +from khoj.processor.content.github.github_to_entries import GithubToEntries +from khoj.processor.content.org_mode.org_to_entries import OrgToEntries # Internal Packages from khoj.search_type import text_search From c652a7fd2d72120ee319f8dce01a6e3803cd687e Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 22:25:17 -0800 Subject: [PATCH 29/30] Move text_to_entries under the new content folder --- src/khoj/processor/content/github/github_to_entries.py | 2 +- src/khoj/processor/content/markdown/markdown_to_entries.py | 2 +- src/khoj/processor/content/notion/notion_to_entries.py | 2 +- src/khoj/processor/content/org_mode/org_to_entries.py | 2 +- src/khoj/processor/content/pdf/pdf_to_entries.py | 2 +- .../processor/content/plaintext/plaintext_to_entries.py | 2 +- src/khoj/processor/{ => content}/text_to_entries.py | 0 src/khoj/processor/conversation/prompts.py | 7 ++++--- src/khoj/search_type/text_search.py | 4 ++-- tests/test_org_to_entries.py | 2 +- 10 files changed, 13 insertions(+), 12 deletions(-) rename src/khoj/processor/{ => content}/text_to_entries.py (100%) diff --git a/src/khoj/processor/content/github/github_to_entries.py b/src/khoj/processor/content/github/github_to_entries.py index b0854850..1149227e 100644 --- a/src/khoj/processor/content/github/github_to_entries.py +++ b/src/khoj/processor/content/github/github_to_entries.py @@ -11,7 +11,7 @@ from khoj.database.models import Entry as DbEntry from khoj.database.models import GithubConfig, KhojUser from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries from khoj.processor.content.org_mode.org_to_entries import OrgToEntries -from khoj.processor.text_to_entries import TextToEntries +from khoj.processor.content.text_to_entries import TextToEntries # Internal Packages from khoj.utils.helpers import timer diff --git a/src/khoj/processor/content/markdown/markdown_to_entries.py b/src/khoj/processor/content/markdown/markdown_to_entries.py index 05531d70..2a4b6348 100644 --- a/src/khoj/processor/content/markdown/markdown_to_entries.py +++ b/src/khoj/processor/content/markdown/markdown_to_entries.py @@ -10,7 +10,7 @@ from khoj.database.models import Entry as DbEntry from khoj.database.models import KhojUser # Internal Packages -from khoj.processor.text_to_entries import TextToEntries +from khoj.processor.content.text_to_entries import TextToEntries from khoj.utils.constants import empty_escape_sequences from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry diff --git a/src/khoj/processor/content/notion/notion_to_entries.py b/src/khoj/processor/content/notion/notion_to_entries.py index 4a2fd817..21c7232d 100644 --- a/src/khoj/processor/content/notion/notion_to_entries.py +++ b/src/khoj/processor/content/notion/notion_to_entries.py @@ -8,7 +8,7 @@ import requests from khoj.database.models import Entry as DbEntry from khoj.database.models import KhojUser, NotionConfig -from khoj.processor.text_to_entries import TextToEntries +from khoj.processor.content.text_to_entries import TextToEntries # Internal Packages from khoj.utils.helpers import timer diff --git a/src/khoj/processor/content/org_mode/org_to_entries.py b/src/khoj/processor/content/org_mode/org_to_entries.py index 37c569e9..c3f345f0 100644 --- a/src/khoj/processor/content/org_mode/org_to_entries.py +++ b/src/khoj/processor/content/org_mode/org_to_entries.py @@ -8,7 +8,7 @@ from khoj.database.models import KhojUser # Internal Packages from khoj.processor.content.org_mode import orgnode -from khoj.processor.text_to_entries import TextToEntries +from khoj.processor.content.text_to_entries import TextToEntries from khoj.utils import state from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry diff --git a/src/khoj/processor/content/pdf/pdf_to_entries.py b/src/khoj/processor/content/pdf/pdf_to_entries.py index f1769a49..caa93636 100644 --- a/src/khoj/processor/content/pdf/pdf_to_entries.py +++ b/src/khoj/processor/content/pdf/pdf_to_entries.py @@ -11,7 +11,7 @@ from khoj.database.models import Entry as DbEntry from khoj.database.models import KhojUser # Internal Packages -from khoj.processor.text_to_entries import TextToEntries +from khoj.processor.content.text_to_entries import TextToEntries from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry diff --git a/src/khoj/processor/content/plaintext/plaintext_to_entries.py b/src/khoj/processor/content/plaintext/plaintext_to_entries.py index 39966bef..cae88837 100644 --- a/src/khoj/processor/content/plaintext/plaintext_to_entries.py +++ b/src/khoj/processor/content/plaintext/plaintext_to_entries.py @@ -9,7 +9,7 @@ from khoj.database.models import Entry as DbEntry from khoj.database.models import KhojUser # Internal Packages -from khoj.processor.text_to_entries import TextToEntries +from khoj.processor.content.text_to_entries import TextToEntries from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry diff --git a/src/khoj/processor/text_to_entries.py b/src/khoj/processor/content/text_to_entries.py similarity index 100% rename from src/khoj/processor/text_to_entries.py rename to src/khoj/processor/content/text_to_entries.py diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index cba7cb59..b0e316da 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -10,7 +10,7 @@ You are Khoj, a smart, inquisitive and helpful personal assistant. Use your general knowledge and the past conversation with the user as context to inform your responses. You were created by Khoj Inc. with the following capabilities: -- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. They can share files with you using the Khoj desktop application. +- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. They can share files with you using any Khoj client, including the native Desktop app, the Obsidian or Emacs plugins, or the web app. - You cannot set reminders. - Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question. - Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations. @@ -128,8 +128,9 @@ The user has a question which you can use the internet to respond to. Can you br Today's date in UTC: {current_date} Here are some examples of questions and subqueries: -Q: What is the weather like in New York? -A: ["weather in new york"] + +Q: Posts about vector databases on Hacker News +A: ["site:"news.ycombinator.com vector database"] Q: What is the weather like in New York and San Francisco? A: ["weather in new york", "weather in san francisco"] diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py index ca0ac6d3..d04d4c6a 100644 --- a/src/khoj/search_type/text_search.py +++ b/src/khoj/search_type/text_search.py @@ -18,7 +18,7 @@ from khoj.utils.models import BaseEncoder from khoj.utils.state import SearchType from khoj.utils.rawconfig import SearchResponse, Entry from khoj.utils.jsonl import load_jsonl -from khoj.processor.text_to_entries import TextToEntries +from khoj.processor.content.text_to_entries import TextToEntries from khoj.database.adapters import EntryAdapters from khoj.database.models import KhojUser, Entry as DbEntry @@ -141,7 +141,7 @@ def collate_results(hits, dedupe=True): else: hit_ids.add(hit.corpus_id) - yield SearchResponse.parse_obj( + yield SearchResponse.model_validate( { "entry": hit.raw, "score": hit.distance, diff --git a/tests/test_org_to_entries.py b/tests/test_org_to_entries.py index 742c9f8e..6cd5e3ce 100644 --- a/tests/test_org_to_entries.py +++ b/tests/test_org_to_entries.py @@ -4,7 +4,7 @@ import os # Internal Packages from khoj.processor.content.org_mode.org_to_entries import OrgToEntries -from khoj.processor.text_to_entries import TextToEntries +from khoj.processor.content.text_to_entries import TextToEntries from khoj.utils.helpers import is_none_or_empty from khoj.utils.rawconfig import Entry from khoj.utils.fs_syncer import get_org_files From 60c23d9e3a73e1381f11b88e0b52045c352a3aea Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 21 Nov 2023 23:08:36 -0800 Subject: [PATCH 30/30] Add online search chat director tests --- tests/test_gpt4all_chat_director.py | 21 +++++++++++++++++++++ tests/test_openai_chat_director.py | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/tests/test_gpt4all_chat_director.py b/tests/test_gpt4all_chat_director.py index d978fc99..d44a3c07 100644 --- a/tests/test_gpt4all_chat_director.py +++ b/tests/test_gpt4all_chat_director.py @@ -1,5 +1,6 @@ # Standard Packages import urllib.parse +from urllib.parse import quote # External Packages import pytest @@ -54,6 +55,26 @@ def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_c ) +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.chatquality +@pytest.mark.django_db(transaction=True) +def test_chat_with_online_content(chat_client): + # Act + q = "/online give me the link to paul graham's essay how to do great work" + encoded_q = quote(q, safe="") + response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true") + response_message = response.content.decode("utf-8") + + response_message = response_message.split("### compiled references")[0] + + # Assert + expected_responses = ["http://www.paulgraham.com/greatwork.html"] + assert response.status_code == 200 + assert any([expected_response in response_message for expected_response in expected_responses]), ( + "Expected assistants name, [K|k]hoj, in response but got: " + response_message + ) + + # ---------------------------------------------------------------------------------------------------- @pytest.mark.chatquality @pytest.mark.django_db(transaction=True) diff --git a/tests/test_openai_chat_director.py b/tests/test_openai_chat_director.py index b4e63364..7b98d4da 100644 --- a/tests/test_openai_chat_director.py +++ b/tests/test_openai_chat_director.py @@ -1,6 +1,7 @@ # Standard Packages import os import urllib.parse +from urllib.parse import quote # External Packages import pytest @@ -54,6 +55,26 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client): ) +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.chatquality +@pytest.mark.django_db(transaction=True) +def test_chat_with_online_content(chat_client): + # Act + q = "/online give me the link to paul graham's essay how to do great work" + encoded_q = quote(q, safe="") + response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true") + response_message = response.content.decode("utf-8") + + response_message = response_message.split("### compiled references")[0] + + # Assert + expected_responses = ["http://www.paulgraham.com/greatwork.html"] + assert response.status_code == 200 + assert any([expected_response in response_message for expected_response in expected_responses]), ( + "Expected assistants name, [K|k]hoj, in response but got: " + response_message + ) + + # ---------------------------------------------------------------------------------------------------- @pytest.mark.django_db(transaction=True) @pytest.mark.chatquality