Drop old pre 1.0 khoj config migration scripts

These were used when khoj was configured using khoj.yml file
This commit is contained in:
Debanjum
2025-07-03 15:04:55 -07:00
parent 9096f628d0
commit 3f8cc71aca
9 changed files with 0 additions and 492 deletions

View File

@@ -1,69 +0,0 @@
"""
Current format of khoj.yml
---
app:
...
content-type:
...
processor:
conversation:
offline-chat:
enable-offline-chat: false
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
...
search-type:
...
New format of khoj.yml
---
app:
...
content-type:
...
processor:
conversation:
offline-chat:
enable-offline-chat: false
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
...
search-type:
...
"""
import logging
from packaging import version
from khoj.utils.yaml import load_config_from_file, save_config_to_file
logger = logging.getLogger(__name__)
def migrate_offline_chat_default_model(args):
schema_version = "0.12.4"
raw_config = load_config_from_file(args.config_file)
previous_version = raw_config.get("version")
if "processor" not in raw_config:
return args
if raw_config["processor"] is None:
return args
if "conversation" not in raw_config["processor"]:
return args
if "offline-chat" not in raw_config["processor"]["conversation"]:
return args
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
return args
if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
logger.info(
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
)
raw_config["version"] = schema_version
# Update offline chat model to mistral in GGUF format to use latest GPT4All
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
if offline_chat_model.endswith(".bin"):
raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
save_config_to_file(raw_config, args.config_file)
return args

View File

@@ -1,71 +0,0 @@
"""
Current format of khoj.yml
---
app:
...
content-type:
...
processor:
conversation:
offline-chat:
enable-offline-chat: false
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
...
search-type:
...
New format of khoj.yml
---
app:
...
content-type:
...
processor:
conversation:
offline-chat:
enable-offline-chat: false
chat-model: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
...
search-type:
...
"""
import logging
from packaging import version
from khoj.utils.yaml import load_config_from_file, save_config_to_file
logger = logging.getLogger(__name__)
def migrate_offline_chat_default_model(args):
schema_version = "1.7.0"
raw_config = load_config_from_file(args.config_file)
previous_version = raw_config.get("version")
if "processor" not in raw_config:
return args
if raw_config["processor"] is None:
return args
if "conversation" not in raw_config["processor"]:
return args
if "offline-chat" not in raw_config["processor"]["conversation"]:
return args
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
return args
if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
logger.info(
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
)
raw_config["version"] = schema_version
# Update offline chat model to use Nous Research's Hermes-2-Pro GGUF in path format suitable for llama-cpp
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
if offline_chat_model == "mistral-7b-instruct-v0.1.Q4_0.gguf":
raw_config["processor"]["conversation"]["offline-chat"][
"chat-model"
] = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF"
save_config_to_file(raw_config, args.config_file)
return args

View File

@@ -1,83 +0,0 @@
"""
Current format of khoj.yml
---
app:
...
content-type:
...
processor:
conversation:
enable-offline-chat: false
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
openai:
...
search-type:
...
New format of khoj.yml
---
app:
...
content-type:
...
processor:
conversation:
offline-chat:
enable-offline-chat: false
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
tokenizer: null
max_prompt_size: null
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
openai:
...
search-type:
...
"""
import logging
from packaging import version
from khoj.utils.yaml import load_config_from_file, save_config_to_file
logger = logging.getLogger(__name__)
def migrate_offline_chat_schema(args):
schema_version = "0.12.3"
raw_config = load_config_from_file(args.config_file)
previous_version = raw_config.get("version")
if "processor" not in raw_config:
return args
if raw_config["processor"] is None:
return args
if "conversation" not in raw_config["processor"]:
return args
if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
logger.info(
f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
)
raw_config["version"] = schema_version
# Create max-prompt-size field in conversation processor schema
raw_config["processor"]["conversation"]["max-prompt-size"] = None
raw_config["processor"]["conversation"]["tokenizer"] = None
# Create offline chat schema based on existing enable_offline_chat field in khoj config schema
offline_chat_model = (
raw_config["processor"]["conversation"]
.get("offline-chat", {})
.get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
)
raw_config["processor"]["conversation"]["offline-chat"] = {
"enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
"chat-model": offline_chat_model,
}
# Delete old enable-offline-chat field from conversation processor schema
if "enable-offline-chat" in raw_config["processor"]["conversation"]:
del raw_config["processor"]["conversation"]["enable-offline-chat"]
save_config_to_file(raw_config, args.config_file)
return args

View File

@@ -1,29 +0,0 @@
import logging
import os
from packaging import version
from khoj.utils.yaml import load_config_from_file, save_config_to_file
logger = logging.getLogger(__name__)
def migrate_offline_model(args):
schema_version = "0.10.1"
raw_config = load_config_from_file(args.config_file)
previous_version = raw_config.get("version")
if previous_version is None or version.parse(previous_version) < version.parse("0.10.1"):
logger.info(
f"Migrating offline model used for version {previous_version} to latest version for {args.version_no}"
)
raw_config["version"] = schema_version
# If the user has downloaded the offline model, remove it from the cache.
offline_model_path = os.path.expanduser("~/.cache/gpt4all/llama-2-7b-chat.ggmlv3.q4_K_S.bin")
if os.path.exists(offline_model_path):
os.remove(offline_model_path)
save_config_to_file(raw_config, args.config_file)
return args

View File

@@ -1,67 +0,0 @@
"""
Current format of khoj.yml
---
app:
should-log-telemetry: true
content-type:
...
processor:
conversation:
chat-model: gpt-3.5-turbo
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
model: text-davinci-003
openai-api-key: sk-secret-key
search-type:
...
New format of khoj.yml
---
app:
should-log-telemetry: true
content-type:
...
processor:
conversation:
openai:
chat-model: gpt-3.5-turbo
openai-api-key: sk-secret-key
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
enable-offline-chat: false
search-type:
...
"""
from khoj.utils.yaml import load_config_from_file, save_config_to_file
def migrate_processor_conversation_schema(args):
schema_version = "0.10.0"
raw_config = load_config_from_file(args.config_file)
if "processor" not in raw_config:
return args
if raw_config["processor"] is None:
return args
if "conversation" not in raw_config["processor"]:
return args
current_openai_api_key = raw_config["processor"]["conversation"].get("openai-api-key", None)
current_chat_model = raw_config["processor"]["conversation"].get("chat-model", None)
if current_openai_api_key is None and current_chat_model is None:
return args
raw_config["version"] = schema_version
# Add enable_offline_chat to khoj config schema
if "enable-offline-chat" not in raw_config["processor"]["conversation"]:
raw_config["processor"]["conversation"]["enable-offline-chat"] = False
# Update conversation processor schema
conversation_logfile = raw_config["processor"]["conversation"].get("conversation-logfile", None)
raw_config["processor"]["conversation"] = {
"openai": {"chat-model": current_chat_model, "api-key": current_openai_api_key},
"conversation-logfile": conversation_logfile,
"enable-offline-chat": False,
}
save_config_to_file(raw_config, args.config_file)
return args

View File

@@ -1,132 +0,0 @@
"""
The application config currently looks like this:
app:
should-log-telemetry: true
content-type:
...
processor:
conversation:
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
max-prompt-size: null
offline-chat:
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
enable-offline-chat: false
openai:
api-key: sk-blah
chat-model: gpt-3.5-turbo
tokenizer: null
search-type:
asymmetric:
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
encoder-type: null
model-directory: /Users/si/.khoj/search/asymmetric
image:
encoder: sentence-transformers/clip-ViT-B-32
encoder-type: null
model-directory: /Users/si/.khoj/search/image
symmetric:
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
encoder: sentence-transformers/all-MiniLM-L6-v2
encoder-type: null
model-directory: ~/.khoj/search/symmetric
version: 0.14.0
The new version will looks like this:
app:
should-log-telemetry: true
processor:
conversation:
offline-chat:
enabled: false
openai:
api-key: sk-blah
chat-model-options:
- chat-model: gpt-3.5-turbo
tokenizer: null
type: openai
- chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
tokenizer: null
type: offline
search-type:
asymmetric:
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
version: 0.15.0
"""
import logging
from packaging import version
from khoj.database.models import AiModelApi, ChatModel, SearchModelConfig
from khoj.utils.yaml import load_config_from_file, save_config_to_file
logger = logging.getLogger(__name__)
def migrate_server_pg(args):
schema_version = "0.15.0"
raw_config = load_config_from_file(args.config_file)
previous_version = raw_config.get("version")
if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
logger.info(
f"Migrating configuration used for version {previous_version} to latest version for server with postgres in {args.version_no}"
)
raw_config["version"] = schema_version
if raw_config is None:
return args
if "search-type" in raw_config and raw_config["search-type"]:
if "asymmetric" in raw_config["search-type"]:
# Delete all existing search models
SearchModelConfig.objects.filter(model_type=SearchModelConfig.ModelType.TEXT).delete()
# Create new search model from existing Khoj YAML config
asymmetric_search = raw_config["search-type"]["asymmetric"]
SearchModelConfig.objects.create(
name="default",
model_type=SearchModelConfig.ModelType.TEXT,
bi_encoder=asymmetric_search.get("encoder"),
cross_encoder=asymmetric_search.get("cross-encoder"),
)
if "processor" in raw_config and raw_config["processor"] and "conversation" in raw_config["processor"]:
processor_conversation = raw_config["processor"]["conversation"]
if "offline-chat" in raw_config["processor"]["conversation"]:
offline_chat = raw_config["processor"]["conversation"]["offline-chat"]
ChatModel.objects.create(
name=offline_chat.get("chat-model"),
tokenizer=processor_conversation.get("tokenizer"),
max_prompt_size=processor_conversation.get("max-prompt-size"),
model_type=ChatModel.ModelType.OFFLINE,
)
if (
"openai" in raw_config["processor"]["conversation"]
and raw_config["processor"]["conversation"]["openai"]
):
openai = raw_config["processor"]["conversation"]["openai"]
if openai.get("api-key") is None:
logger.error("OpenAI API Key is not set. Will not be migrating OpenAI config.")
else:
if openai.get("chat-model") is None:
openai["chat-model"] = "gpt-3.5-turbo"
openai_model_api = AiModelApi.objects.create(api_key=openai.get("api-key"), name="default")
ChatModel.objects.create(
name=openai.get("chat-model"),
tokenizer=processor_conversation.get("tokenizer"),
max_prompt_size=processor_conversation.get("max-prompt-size"),
model_type=ChatModel.ModelType.OPENAI,
ai_model_api=openai_model_api,
)
save_config_to_file(raw_config, args.config_file)
return args

View File

@@ -1,17 +0,0 @@
from khoj.utils.yaml import load_config_from_file, save_config_to_file
def migrate_config_to_version(args):
schema_version = "0.9.0"
raw_config = load_config_from_file(args.config_file)
# Add version to khoj config schema
if "version" not in raw_config:
raw_config["version"] = schema_version
save_config_to_file(raw_config, args.config_file)
# regenerate khoj index on first start of this version
# this should refresh index and apply index corruption fixes from #325
args.regenerate = True
return args

View File

@@ -6,16 +6,6 @@ from importlib.metadata import version
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from khoj.migrations.migrate_offline_chat_default_model import (
migrate_offline_chat_default_model,
)
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
from khoj.migrations.migrate_offline_model import migrate_offline_model
from khoj.migrations.migrate_processor_config_openai import (
migrate_processor_conversation_schema,
)
from khoj.migrations.migrate_server_pg import migrate_server_pg
from khoj.migrations.migrate_version import migrate_config_to_version
from khoj.utils.helpers import is_env_var_true, resolve_absolute_path from khoj.utils.helpers import is_env_var_true, resolve_absolute_path
from khoj.utils.yaml import parse_config_from_file from khoj.utils.yaml import parse_config_from_file
@@ -85,17 +75,3 @@ def cli(args=None):
args.config.app.should_log_telemetry = False args.config.app.should_log_telemetry = False
return args return args
def run_migrations(args):
migrations = [
migrate_config_to_version,
migrate_processor_conversation_schema,
migrate_offline_model,
migrate_offline_chat_schema,
migrate_offline_chat_default_model,
migrate_server_pg,
]
for migration in migrations:
args = migration(args)
return args