mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Drop old pre 1.0 khoj config migration scripts
These were used when khoj was configured using khoj.yml file
This commit is contained in:
@@ -1,69 +0,0 @@
|
||||
"""
|
||||
Current format of khoj.yml
|
||||
---
|
||||
app:
|
||||
...
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
offline-chat:
|
||||
enable-offline-chat: false
|
||||
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
||||
...
|
||||
search-type:
|
||||
...
|
||||
|
||||
New format of khoj.yml
|
||||
---
|
||||
app:
|
||||
...
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
offline-chat:
|
||||
enable-offline-chat: false
|
||||
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||
...
|
||||
search-type:
|
||||
...
|
||||
"""
|
||||
import logging
|
||||
|
||||
from packaging import version
|
||||
|
||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def migrate_offline_chat_default_model(args):
|
||||
schema_version = "0.12.4"
|
||||
raw_config = load_config_from_file(args.config_file)
|
||||
previous_version = raw_config.get("version")
|
||||
|
||||
if "processor" not in raw_config:
|
||||
return args
|
||||
if raw_config["processor"] is None:
|
||||
return args
|
||||
if "conversation" not in raw_config["processor"]:
|
||||
return args
|
||||
if "offline-chat" not in raw_config["processor"]["conversation"]:
|
||||
return args
|
||||
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
|
||||
return args
|
||||
|
||||
if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
|
||||
logger.info(
|
||||
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
|
||||
)
|
||||
raw_config["version"] = schema_version
|
||||
|
||||
# Update offline chat model to mistral in GGUF format to use latest GPT4All
|
||||
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
|
||||
if offline_chat_model.endswith(".bin"):
|
||||
raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||
|
||||
save_config_to_file(raw_config, args.config_file)
|
||||
return args
|
||||
@@ -1,71 +0,0 @@
|
||||
"""
|
||||
Current format of khoj.yml
|
||||
---
|
||||
app:
|
||||
...
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
offline-chat:
|
||||
enable-offline-chat: false
|
||||
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||
...
|
||||
search-type:
|
||||
...
|
||||
|
||||
New format of khoj.yml
|
||||
---
|
||||
app:
|
||||
...
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
offline-chat:
|
||||
enable-offline-chat: false
|
||||
chat-model: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
|
||||
...
|
||||
search-type:
|
||||
...
|
||||
"""
|
||||
import logging
|
||||
|
||||
from packaging import version
|
||||
|
||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def migrate_offline_chat_default_model(args):
|
||||
schema_version = "1.7.0"
|
||||
raw_config = load_config_from_file(args.config_file)
|
||||
previous_version = raw_config.get("version")
|
||||
|
||||
if "processor" not in raw_config:
|
||||
return args
|
||||
if raw_config["processor"] is None:
|
||||
return args
|
||||
if "conversation" not in raw_config["processor"]:
|
||||
return args
|
||||
if "offline-chat" not in raw_config["processor"]["conversation"]:
|
||||
return args
|
||||
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
|
||||
return args
|
||||
|
||||
if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
|
||||
logger.info(
|
||||
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
|
||||
)
|
||||
raw_config["version"] = schema_version
|
||||
|
||||
# Update offline chat model to use Nous Research's Hermes-2-Pro GGUF in path format suitable for llama-cpp
|
||||
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
|
||||
if offline_chat_model == "mistral-7b-instruct-v0.1.Q4_0.gguf":
|
||||
raw_config["processor"]["conversation"]["offline-chat"][
|
||||
"chat-model"
|
||||
] = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF"
|
||||
|
||||
save_config_to_file(raw_config, args.config_file)
|
||||
return args
|
||||
@@ -1,83 +0,0 @@
|
||||
"""
|
||||
Current format of khoj.yml
|
||||
---
|
||||
app:
|
||||
...
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
enable-offline-chat: false
|
||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
||||
openai:
|
||||
...
|
||||
search-type:
|
||||
...
|
||||
|
||||
New format of khoj.yml
|
||||
---
|
||||
app:
|
||||
...
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
offline-chat:
|
||||
enable-offline-chat: false
|
||||
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
||||
tokenizer: null
|
||||
max_prompt_size: null
|
||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
||||
openai:
|
||||
...
|
||||
search-type:
|
||||
...
|
||||
"""
|
||||
import logging
|
||||
|
||||
from packaging import version
|
||||
|
||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def migrate_offline_chat_schema(args):
|
||||
schema_version = "0.12.3"
|
||||
raw_config = load_config_from_file(args.config_file)
|
||||
previous_version = raw_config.get("version")
|
||||
|
||||
if "processor" not in raw_config:
|
||||
return args
|
||||
if raw_config["processor"] is None:
|
||||
return args
|
||||
if "conversation" not in raw_config["processor"]:
|
||||
return args
|
||||
|
||||
if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
|
||||
logger.info(
|
||||
f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
|
||||
)
|
||||
raw_config["version"] = schema_version
|
||||
|
||||
# Create max-prompt-size field in conversation processor schema
|
||||
raw_config["processor"]["conversation"]["max-prompt-size"] = None
|
||||
raw_config["processor"]["conversation"]["tokenizer"] = None
|
||||
|
||||
# Create offline chat schema based on existing enable_offline_chat field in khoj config schema
|
||||
offline_chat_model = (
|
||||
raw_config["processor"]["conversation"]
|
||||
.get("offline-chat", {})
|
||||
.get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
|
||||
)
|
||||
raw_config["processor"]["conversation"]["offline-chat"] = {
|
||||
"enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
|
||||
"chat-model": offline_chat_model,
|
||||
}
|
||||
|
||||
# Delete old enable-offline-chat field from conversation processor schema
|
||||
if "enable-offline-chat" in raw_config["processor"]["conversation"]:
|
||||
del raw_config["processor"]["conversation"]["enable-offline-chat"]
|
||||
|
||||
save_config_to_file(raw_config, args.config_file)
|
||||
return args
|
||||
@@ -1,29 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
from packaging import version
|
||||
|
||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def migrate_offline_model(args):
|
||||
schema_version = "0.10.1"
|
||||
raw_config = load_config_from_file(args.config_file)
|
||||
previous_version = raw_config.get("version")
|
||||
|
||||
if previous_version is None or version.parse(previous_version) < version.parse("0.10.1"):
|
||||
logger.info(
|
||||
f"Migrating offline model used for version {previous_version} to latest version for {args.version_no}"
|
||||
)
|
||||
raw_config["version"] = schema_version
|
||||
|
||||
# If the user has downloaded the offline model, remove it from the cache.
|
||||
offline_model_path = os.path.expanduser("~/.cache/gpt4all/llama-2-7b-chat.ggmlv3.q4_K_S.bin")
|
||||
if os.path.exists(offline_model_path):
|
||||
os.remove(offline_model_path)
|
||||
|
||||
save_config_to_file(raw_config, args.config_file)
|
||||
|
||||
return args
|
||||
@@ -1,67 +0,0 @@
|
||||
"""
|
||||
Current format of khoj.yml
|
||||
---
|
||||
app:
|
||||
should-log-telemetry: true
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
chat-model: gpt-3.5-turbo
|
||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
||||
model: text-davinci-003
|
||||
openai-api-key: sk-secret-key
|
||||
search-type:
|
||||
...
|
||||
|
||||
New format of khoj.yml
|
||||
---
|
||||
app:
|
||||
should-log-telemetry: true
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
openai:
|
||||
chat-model: gpt-3.5-turbo
|
||||
openai-api-key: sk-secret-key
|
||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
||||
enable-offline-chat: false
|
||||
search-type:
|
||||
...
|
||||
"""
|
||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||
|
||||
|
||||
def migrate_processor_conversation_schema(args):
|
||||
schema_version = "0.10.0"
|
||||
raw_config = load_config_from_file(args.config_file)
|
||||
|
||||
if "processor" not in raw_config:
|
||||
return args
|
||||
if raw_config["processor"] is None:
|
||||
return args
|
||||
if "conversation" not in raw_config["processor"]:
|
||||
return args
|
||||
|
||||
current_openai_api_key = raw_config["processor"]["conversation"].get("openai-api-key", None)
|
||||
current_chat_model = raw_config["processor"]["conversation"].get("chat-model", None)
|
||||
if current_openai_api_key is None and current_chat_model is None:
|
||||
return args
|
||||
|
||||
raw_config["version"] = schema_version
|
||||
|
||||
# Add enable_offline_chat to khoj config schema
|
||||
if "enable-offline-chat" not in raw_config["processor"]["conversation"]:
|
||||
raw_config["processor"]["conversation"]["enable-offline-chat"] = False
|
||||
|
||||
# Update conversation processor schema
|
||||
conversation_logfile = raw_config["processor"]["conversation"].get("conversation-logfile", None)
|
||||
raw_config["processor"]["conversation"] = {
|
||||
"openai": {"chat-model": current_chat_model, "api-key": current_openai_api_key},
|
||||
"conversation-logfile": conversation_logfile,
|
||||
"enable-offline-chat": False,
|
||||
}
|
||||
|
||||
save_config_to_file(raw_config, args.config_file)
|
||||
return args
|
||||
@@ -1,132 +0,0 @@
|
||||
"""
|
||||
The application config currently looks like this:
|
||||
app:
|
||||
should-log-telemetry: true
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
||||
max-prompt-size: null
|
||||
offline-chat:
|
||||
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||
enable-offline-chat: false
|
||||
openai:
|
||||
api-key: sk-blah
|
||||
chat-model: gpt-3.5-turbo
|
||||
tokenizer: null
|
||||
search-type:
|
||||
asymmetric:
|
||||
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
||||
encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
|
||||
encoder-type: null
|
||||
model-directory: /Users/si/.khoj/search/asymmetric
|
||||
image:
|
||||
encoder: sentence-transformers/clip-ViT-B-32
|
||||
encoder-type: null
|
||||
model-directory: /Users/si/.khoj/search/image
|
||||
symmetric:
|
||||
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
||||
encoder: sentence-transformers/all-MiniLM-L6-v2
|
||||
encoder-type: null
|
||||
model-directory: ~/.khoj/search/symmetric
|
||||
version: 0.14.0
|
||||
|
||||
|
||||
The new version will looks like this:
|
||||
app:
|
||||
should-log-telemetry: true
|
||||
processor:
|
||||
conversation:
|
||||
offline-chat:
|
||||
enabled: false
|
||||
openai:
|
||||
api-key: sk-blah
|
||||
chat-model-options:
|
||||
- chat-model: gpt-3.5-turbo
|
||||
tokenizer: null
|
||||
type: openai
|
||||
- chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||
tokenizer: null
|
||||
type: offline
|
||||
search-type:
|
||||
asymmetric:
|
||||
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
||||
encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
|
||||
version: 0.15.0
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from packaging import version
|
||||
|
||||
from khoj.database.models import AiModelApi, ChatModel, SearchModelConfig
|
||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def migrate_server_pg(args):
|
||||
schema_version = "0.15.0"
|
||||
raw_config = load_config_from_file(args.config_file)
|
||||
previous_version = raw_config.get("version")
|
||||
|
||||
if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
|
||||
logger.info(
|
||||
f"Migrating configuration used for version {previous_version} to latest version for server with postgres in {args.version_no}"
|
||||
)
|
||||
raw_config["version"] = schema_version
|
||||
|
||||
if raw_config is None:
|
||||
return args
|
||||
|
||||
if "search-type" in raw_config and raw_config["search-type"]:
|
||||
if "asymmetric" in raw_config["search-type"]:
|
||||
# Delete all existing search models
|
||||
SearchModelConfig.objects.filter(model_type=SearchModelConfig.ModelType.TEXT).delete()
|
||||
# Create new search model from existing Khoj YAML config
|
||||
asymmetric_search = raw_config["search-type"]["asymmetric"]
|
||||
SearchModelConfig.objects.create(
|
||||
name="default",
|
||||
model_type=SearchModelConfig.ModelType.TEXT,
|
||||
bi_encoder=asymmetric_search.get("encoder"),
|
||||
cross_encoder=asymmetric_search.get("cross-encoder"),
|
||||
)
|
||||
|
||||
if "processor" in raw_config and raw_config["processor"] and "conversation" in raw_config["processor"]:
|
||||
processor_conversation = raw_config["processor"]["conversation"]
|
||||
|
||||
if "offline-chat" in raw_config["processor"]["conversation"]:
|
||||
offline_chat = raw_config["processor"]["conversation"]["offline-chat"]
|
||||
ChatModel.objects.create(
|
||||
name=offline_chat.get("chat-model"),
|
||||
tokenizer=processor_conversation.get("tokenizer"),
|
||||
max_prompt_size=processor_conversation.get("max-prompt-size"),
|
||||
model_type=ChatModel.ModelType.OFFLINE,
|
||||
)
|
||||
|
||||
if (
|
||||
"openai" in raw_config["processor"]["conversation"]
|
||||
and raw_config["processor"]["conversation"]["openai"]
|
||||
):
|
||||
openai = raw_config["processor"]["conversation"]["openai"]
|
||||
|
||||
if openai.get("api-key") is None:
|
||||
logger.error("OpenAI API Key is not set. Will not be migrating OpenAI config.")
|
||||
else:
|
||||
if openai.get("chat-model") is None:
|
||||
openai["chat-model"] = "gpt-3.5-turbo"
|
||||
|
||||
openai_model_api = AiModelApi.objects.create(api_key=openai.get("api-key"), name="default")
|
||||
|
||||
ChatModel.objects.create(
|
||||
name=openai.get("chat-model"),
|
||||
tokenizer=processor_conversation.get("tokenizer"),
|
||||
max_prompt_size=processor_conversation.get("max-prompt-size"),
|
||||
model_type=ChatModel.ModelType.OPENAI,
|
||||
ai_model_api=openai_model_api,
|
||||
)
|
||||
|
||||
save_config_to_file(raw_config, args.config_file)
|
||||
|
||||
return args
|
||||
@@ -1,17 +0,0 @@
|
||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||
|
||||
|
||||
def migrate_config_to_version(args):
|
||||
schema_version = "0.9.0"
|
||||
raw_config = load_config_from_file(args.config_file)
|
||||
|
||||
# Add version to khoj config schema
|
||||
if "version" not in raw_config:
|
||||
raw_config["version"] = schema_version
|
||||
save_config_to_file(raw_config, args.config_file)
|
||||
|
||||
# regenerate khoj index on first start of this version
|
||||
# this should refresh index and apply index corruption fixes from #325
|
||||
args.regenerate = True
|
||||
|
||||
return args
|
||||
@@ -6,16 +6,6 @@ from importlib.metadata import version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from khoj.migrations.migrate_offline_chat_default_model import (
|
||||
migrate_offline_chat_default_model,
|
||||
)
|
||||
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
||||
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
||||
from khoj.migrations.migrate_processor_config_openai import (
|
||||
migrate_processor_conversation_schema,
|
||||
)
|
||||
from khoj.migrations.migrate_server_pg import migrate_server_pg
|
||||
from khoj.migrations.migrate_version import migrate_config_to_version
|
||||
from khoj.utils.helpers import is_env_var_true, resolve_absolute_path
|
||||
from khoj.utils.yaml import parse_config_from_file
|
||||
|
||||
@@ -85,17 +75,3 @@ def cli(args=None):
|
||||
args.config.app.should_log_telemetry = False
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def run_migrations(args):
|
||||
migrations = [
|
||||
migrate_config_to_version,
|
||||
migrate_processor_conversation_schema,
|
||||
migrate_offline_model,
|
||||
migrate_offline_chat_schema,
|
||||
migrate_offline_chat_default_model,
|
||||
migrate_server_pg,
|
||||
]
|
||||
for migration in migrations:
|
||||
args = migration(args)
|
||||
return args
|
||||
|
||||
Reference in New Issue
Block a user