From 3f8cc71aca28c3b2ae84b02ab1d87737d64c0ea5 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Thu, 3 Jul 2025 15:04:55 -0700 Subject: [PATCH] Drop old pre 1.0 khoj config migration scripts These were used when khoj was configured using khoj.yml file --- src/khoj/migrations/__init__.py | 0 .../migrate_offline_chat_default_model.py | 69 --------- .../migrate_offline_chat_default_model_2.py | 71 ---------- .../migrations/migrate_offline_chat_schema.py | 83 ----------- src/khoj/migrations/migrate_offline_model.py | 29 ---- .../migrate_processor_config_openai.py | 67 --------- src/khoj/migrations/migrate_server_pg.py | 132 ------------------ src/khoj/migrations/migrate_version.py | 17 --- src/khoj/utils/cli.py | 24 ---- 9 files changed, 492 deletions(-) delete mode 100644 src/khoj/migrations/__init__.py delete mode 100644 src/khoj/migrations/migrate_offline_chat_default_model.py delete mode 100644 src/khoj/migrations/migrate_offline_chat_default_model_2.py delete mode 100644 src/khoj/migrations/migrate_offline_chat_schema.py delete mode 100644 src/khoj/migrations/migrate_offline_model.py delete mode 100644 src/khoj/migrations/migrate_processor_config_openai.py delete mode 100644 src/khoj/migrations/migrate_server_pg.py delete mode 100644 src/khoj/migrations/migrate_version.py diff --git a/src/khoj/migrations/__init__.py b/src/khoj/migrations/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/khoj/migrations/migrate_offline_chat_default_model.py b/src/khoj/migrations/migrate_offline_chat_default_model.py deleted file mode 100644 index 831f2d9d..00000000 --- a/src/khoj/migrations/migrate_offline_chat_default_model.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Current format of khoj.yml ---- -app: - ... -content-type: - ... -processor: - conversation: - offline-chat: - enable-offline-chat: false - chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin - ... -search-type: - ... - -New format of khoj.yml ---- -app: - ... -content-type: - ... -processor: - conversation: - offline-chat: - enable-offline-chat: false - chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf - ... -search-type: - ... -""" -import logging - -from packaging import version - -from khoj.utils.yaml import load_config_from_file, save_config_to_file - -logger = logging.getLogger(__name__) - - -def migrate_offline_chat_default_model(args): - schema_version = "0.12.4" - raw_config = load_config_from_file(args.config_file) - previous_version = raw_config.get("version") - - if "processor" not in raw_config: - return args - if raw_config["processor"] is None: - return args - if "conversation" not in raw_config["processor"]: - return args - if "offline-chat" not in raw_config["processor"]["conversation"]: - return args - if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]: - return args - - if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"): - logger.info( - f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF" - ) - raw_config["version"] = schema_version - - # Update offline chat model to mistral in GGUF format to use latest GPT4All - offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] - if offline_chat_model.endswith(".bin"): - raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf" - - save_config_to_file(raw_config, args.config_file) - return args diff --git a/src/khoj/migrations/migrate_offline_chat_default_model_2.py b/src/khoj/migrations/migrate_offline_chat_default_model_2.py deleted file mode 100644 index 107b7130..00000000 --- a/src/khoj/migrations/migrate_offline_chat_default_model_2.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -Current format of khoj.yml ---- -app: - ... -content-type: - ... -processor: - conversation: - offline-chat: - enable-offline-chat: false - chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf - ... -search-type: - ... - -New format of khoj.yml ---- -app: - ... -content-type: - ... -processor: - conversation: - offline-chat: - enable-offline-chat: false - chat-model: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF - ... -search-type: - ... -""" -import logging - -from packaging import version - -from khoj.utils.yaml import load_config_from_file, save_config_to_file - -logger = logging.getLogger(__name__) - - -def migrate_offline_chat_default_model(args): - schema_version = "1.7.0" - raw_config = load_config_from_file(args.config_file) - previous_version = raw_config.get("version") - - if "processor" not in raw_config: - return args - if raw_config["processor"] is None: - return args - if "conversation" not in raw_config["processor"]: - return args - if "offline-chat" not in raw_config["processor"]["conversation"]: - return args - if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]: - return args - - if previous_version is None or version.parse(previous_version) < version.parse(schema_version): - logger.info( - f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF" - ) - raw_config["version"] = schema_version - - # Update offline chat model to use Nous Research's Hermes-2-Pro GGUF in path format suitable for llama-cpp - offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] - if offline_chat_model == "mistral-7b-instruct-v0.1.Q4_0.gguf": - raw_config["processor"]["conversation"]["offline-chat"][ - "chat-model" - ] = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF" - - save_config_to_file(raw_config, args.config_file) - return args diff --git a/src/khoj/migrations/migrate_offline_chat_schema.py b/src/khoj/migrations/migrate_offline_chat_schema.py deleted file mode 100644 index 0c221652..00000000 --- a/src/khoj/migrations/migrate_offline_chat_schema.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Current format of khoj.yml ---- -app: - ... -content-type: - ... -processor: - conversation: - enable-offline-chat: false - conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json - openai: - ... -search-type: - ... - -New format of khoj.yml ---- -app: - ... -content-type: - ... -processor: - conversation: - offline-chat: - enable-offline-chat: false - chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin - tokenizer: null - max_prompt_size: null - conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json - openai: - ... -search-type: - ... -""" -import logging - -from packaging import version - -from khoj.utils.yaml import load_config_from_file, save_config_to_file - -logger = logging.getLogger(__name__) - - -def migrate_offline_chat_schema(args): - schema_version = "0.12.3" - raw_config = load_config_from_file(args.config_file) - previous_version = raw_config.get("version") - - if "processor" not in raw_config: - return args - if raw_config["processor"] is None: - return args - if "conversation" not in raw_config["processor"]: - return args - - if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"): - logger.info( - f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration" - ) - raw_config["version"] = schema_version - - # Create max-prompt-size field in conversation processor schema - raw_config["processor"]["conversation"]["max-prompt-size"] = None - raw_config["processor"]["conversation"]["tokenizer"] = None - - # Create offline chat schema based on existing enable_offline_chat field in khoj config schema - offline_chat_model = ( - raw_config["processor"]["conversation"] - .get("offline-chat", {}) - .get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin") - ) - raw_config["processor"]["conversation"]["offline-chat"] = { - "enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False), - "chat-model": offline_chat_model, - } - - # Delete old enable-offline-chat field from conversation processor schema - if "enable-offline-chat" in raw_config["processor"]["conversation"]: - del raw_config["processor"]["conversation"]["enable-offline-chat"] - - save_config_to_file(raw_config, args.config_file) - return args diff --git a/src/khoj/migrations/migrate_offline_model.py b/src/khoj/migrations/migrate_offline_model.py deleted file mode 100644 index 6294a4e8..00000000 --- a/src/khoj/migrations/migrate_offline_model.py +++ /dev/null @@ -1,29 +0,0 @@ -import logging -import os - -from packaging import version - -from khoj.utils.yaml import load_config_from_file, save_config_to_file - -logger = logging.getLogger(__name__) - - -def migrate_offline_model(args): - schema_version = "0.10.1" - raw_config = load_config_from_file(args.config_file) - previous_version = raw_config.get("version") - - if previous_version is None or version.parse(previous_version) < version.parse("0.10.1"): - logger.info( - f"Migrating offline model used for version {previous_version} to latest version for {args.version_no}" - ) - raw_config["version"] = schema_version - - # If the user has downloaded the offline model, remove it from the cache. - offline_model_path = os.path.expanduser("~/.cache/gpt4all/llama-2-7b-chat.ggmlv3.q4_K_S.bin") - if os.path.exists(offline_model_path): - os.remove(offline_model_path) - - save_config_to_file(raw_config, args.config_file) - - return args diff --git a/src/khoj/migrations/migrate_processor_config_openai.py b/src/khoj/migrations/migrate_processor_config_openai.py deleted file mode 100644 index c25e5306..00000000 --- a/src/khoj/migrations/migrate_processor_config_openai.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Current format of khoj.yml ---- -app: - should-log-telemetry: true -content-type: - ... -processor: - conversation: - chat-model: gpt-3.5-turbo - conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json - model: text-davinci-003 - openai-api-key: sk-secret-key -search-type: - ... - -New format of khoj.yml ---- -app: - should-log-telemetry: true -content-type: - ... -processor: - conversation: - openai: - chat-model: gpt-3.5-turbo - openai-api-key: sk-secret-key - conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json - enable-offline-chat: false -search-type: - ... -""" -from khoj.utils.yaml import load_config_from_file, save_config_to_file - - -def migrate_processor_conversation_schema(args): - schema_version = "0.10.0" - raw_config = load_config_from_file(args.config_file) - - if "processor" not in raw_config: - return args - if raw_config["processor"] is None: - return args - if "conversation" not in raw_config["processor"]: - return args - - current_openai_api_key = raw_config["processor"]["conversation"].get("openai-api-key", None) - current_chat_model = raw_config["processor"]["conversation"].get("chat-model", None) - if current_openai_api_key is None and current_chat_model is None: - return args - - raw_config["version"] = schema_version - - # Add enable_offline_chat to khoj config schema - if "enable-offline-chat" not in raw_config["processor"]["conversation"]: - raw_config["processor"]["conversation"]["enable-offline-chat"] = False - - # Update conversation processor schema - conversation_logfile = raw_config["processor"]["conversation"].get("conversation-logfile", None) - raw_config["processor"]["conversation"] = { - "openai": {"chat-model": current_chat_model, "api-key": current_openai_api_key}, - "conversation-logfile": conversation_logfile, - "enable-offline-chat": False, - } - - save_config_to_file(raw_config, args.config_file) - return args diff --git a/src/khoj/migrations/migrate_server_pg.py b/src/khoj/migrations/migrate_server_pg.py deleted file mode 100644 index 316704b9..00000000 --- a/src/khoj/migrations/migrate_server_pg.py +++ /dev/null @@ -1,132 +0,0 @@ -""" -The application config currently looks like this: -app: - should-log-telemetry: true -content-type: - ... -processor: - conversation: - conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json - max-prompt-size: null - offline-chat: - chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf - enable-offline-chat: false - openai: - api-key: sk-blah - chat-model: gpt-3.5-turbo - tokenizer: null -search-type: - asymmetric: - cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2 - encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1 - encoder-type: null - model-directory: /Users/si/.khoj/search/asymmetric - image: - encoder: sentence-transformers/clip-ViT-B-32 - encoder-type: null - model-directory: /Users/si/.khoj/search/image - symmetric: - cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2 - encoder: sentence-transformers/all-MiniLM-L6-v2 - encoder-type: null - model-directory: ~/.khoj/search/symmetric -version: 0.14.0 - - -The new version will looks like this: -app: - should-log-telemetry: true -processor: - conversation: - offline-chat: - enabled: false - openai: - api-key: sk-blah - chat-model-options: - - chat-model: gpt-3.5-turbo - tokenizer: null - type: openai - - chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf - tokenizer: null - type: offline -search-type: - asymmetric: - cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2 - encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1 -version: 0.15.0 -""" - -import logging - -from packaging import version - -from khoj.database.models import AiModelApi, ChatModel, SearchModelConfig -from khoj.utils.yaml import load_config_from_file, save_config_to_file - -logger = logging.getLogger(__name__) - - -def migrate_server_pg(args): - schema_version = "0.15.0" - raw_config = load_config_from_file(args.config_file) - previous_version = raw_config.get("version") - - if previous_version is None or version.parse(previous_version) < version.parse(schema_version): - logger.info( - f"Migrating configuration used for version {previous_version} to latest version for server with postgres in {args.version_no}" - ) - raw_config["version"] = schema_version - - if raw_config is None: - return args - - if "search-type" in raw_config and raw_config["search-type"]: - if "asymmetric" in raw_config["search-type"]: - # Delete all existing search models - SearchModelConfig.objects.filter(model_type=SearchModelConfig.ModelType.TEXT).delete() - # Create new search model from existing Khoj YAML config - asymmetric_search = raw_config["search-type"]["asymmetric"] - SearchModelConfig.objects.create( - name="default", - model_type=SearchModelConfig.ModelType.TEXT, - bi_encoder=asymmetric_search.get("encoder"), - cross_encoder=asymmetric_search.get("cross-encoder"), - ) - - if "processor" in raw_config and raw_config["processor"] and "conversation" in raw_config["processor"]: - processor_conversation = raw_config["processor"]["conversation"] - - if "offline-chat" in raw_config["processor"]["conversation"]: - offline_chat = raw_config["processor"]["conversation"]["offline-chat"] - ChatModel.objects.create( - name=offline_chat.get("chat-model"), - tokenizer=processor_conversation.get("tokenizer"), - max_prompt_size=processor_conversation.get("max-prompt-size"), - model_type=ChatModel.ModelType.OFFLINE, - ) - - if ( - "openai" in raw_config["processor"]["conversation"] - and raw_config["processor"]["conversation"]["openai"] - ): - openai = raw_config["processor"]["conversation"]["openai"] - - if openai.get("api-key") is None: - logger.error("OpenAI API Key is not set. Will not be migrating OpenAI config.") - else: - if openai.get("chat-model") is None: - openai["chat-model"] = "gpt-3.5-turbo" - - openai_model_api = AiModelApi.objects.create(api_key=openai.get("api-key"), name="default") - - ChatModel.objects.create( - name=openai.get("chat-model"), - tokenizer=processor_conversation.get("tokenizer"), - max_prompt_size=processor_conversation.get("max-prompt-size"), - model_type=ChatModel.ModelType.OPENAI, - ai_model_api=openai_model_api, - ) - - save_config_to_file(raw_config, args.config_file) - - return args diff --git a/src/khoj/migrations/migrate_version.py b/src/khoj/migrations/migrate_version.py deleted file mode 100644 index de8b9571..00000000 --- a/src/khoj/migrations/migrate_version.py +++ /dev/null @@ -1,17 +0,0 @@ -from khoj.utils.yaml import load_config_from_file, save_config_to_file - - -def migrate_config_to_version(args): - schema_version = "0.9.0" - raw_config = load_config_from_file(args.config_file) - - # Add version to khoj config schema - if "version" not in raw_config: - raw_config["version"] = schema_version - save_config_to_file(raw_config, args.config_file) - - # regenerate khoj index on first start of this version - # this should refresh index and apply index corruption fixes from #325 - args.regenerate = True - - return args diff --git a/src/khoj/utils/cli.py b/src/khoj/utils/cli.py index 14581f41..786cbb62 100644 --- a/src/khoj/utils/cli.py +++ b/src/khoj/utils/cli.py @@ -6,16 +6,6 @@ from importlib.metadata import version logger = logging.getLogger(__name__) -from khoj.migrations.migrate_offline_chat_default_model import ( - migrate_offline_chat_default_model, -) -from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema -from khoj.migrations.migrate_offline_model import migrate_offline_model -from khoj.migrations.migrate_processor_config_openai import ( - migrate_processor_conversation_schema, -) -from khoj.migrations.migrate_server_pg import migrate_server_pg -from khoj.migrations.migrate_version import migrate_config_to_version from khoj.utils.helpers import is_env_var_true, resolve_absolute_path from khoj.utils.yaml import parse_config_from_file @@ -85,17 +75,3 @@ def cli(args=None): args.config.app.should_log_telemetry = False return args - - -def run_migrations(args): - migrations = [ - migrate_config_to_version, - migrate_processor_conversation_schema, - migrate_offline_model, - migrate_offline_chat_schema, - migrate_offline_chat_default_model, - migrate_server_pg, - ] - for migration in migrations: - args = migration(args) - return args