mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Drop old pre 1.0 khoj config migration scripts
These were used when khoj was configured using khoj.yml file
This commit is contained in:
@@ -1,69 +0,0 @@
|
|||||||
"""
|
|
||||||
Current format of khoj.yml
|
|
||||||
---
|
|
||||||
app:
|
|
||||||
...
|
|
||||||
content-type:
|
|
||||||
...
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
offline-chat:
|
|
||||||
enable-offline-chat: false
|
|
||||||
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
|
||||||
...
|
|
||||||
search-type:
|
|
||||||
...
|
|
||||||
|
|
||||||
New format of khoj.yml
|
|
||||||
---
|
|
||||||
app:
|
|
||||||
...
|
|
||||||
content-type:
|
|
||||||
...
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
offline-chat:
|
|
||||||
enable-offline-chat: false
|
|
||||||
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
|
||||||
...
|
|
||||||
search-type:
|
|
||||||
...
|
|
||||||
"""
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from packaging import version
|
|
||||||
|
|
||||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_offline_chat_default_model(args):
|
|
||||||
schema_version = "0.12.4"
|
|
||||||
raw_config = load_config_from_file(args.config_file)
|
|
||||||
previous_version = raw_config.get("version")
|
|
||||||
|
|
||||||
if "processor" not in raw_config:
|
|
||||||
return args
|
|
||||||
if raw_config["processor"] is None:
|
|
||||||
return args
|
|
||||||
if "conversation" not in raw_config["processor"]:
|
|
||||||
return args
|
|
||||||
if "offline-chat" not in raw_config["processor"]["conversation"]:
|
|
||||||
return args
|
|
||||||
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
|
|
||||||
return args
|
|
||||||
|
|
||||||
if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
|
|
||||||
logger.info(
|
|
||||||
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
|
|
||||||
)
|
|
||||||
raw_config["version"] = schema_version
|
|
||||||
|
|
||||||
# Update offline chat model to mistral in GGUF format to use latest GPT4All
|
|
||||||
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
|
|
||||||
if offline_chat_model.endswith(".bin"):
|
|
||||||
raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
|
||||||
|
|
||||||
save_config_to_file(raw_config, args.config_file)
|
|
||||||
return args
|
|
||||||
@@ -1,71 +0,0 @@
|
|||||||
"""
|
|
||||||
Current format of khoj.yml
|
|
||||||
---
|
|
||||||
app:
|
|
||||||
...
|
|
||||||
content-type:
|
|
||||||
...
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
offline-chat:
|
|
||||||
enable-offline-chat: false
|
|
||||||
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
|
||||||
...
|
|
||||||
search-type:
|
|
||||||
...
|
|
||||||
|
|
||||||
New format of khoj.yml
|
|
||||||
---
|
|
||||||
app:
|
|
||||||
...
|
|
||||||
content-type:
|
|
||||||
...
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
offline-chat:
|
|
||||||
enable-offline-chat: false
|
|
||||||
chat-model: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
|
|
||||||
...
|
|
||||||
search-type:
|
|
||||||
...
|
|
||||||
"""
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from packaging import version
|
|
||||||
|
|
||||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_offline_chat_default_model(args):
|
|
||||||
schema_version = "1.7.0"
|
|
||||||
raw_config = load_config_from_file(args.config_file)
|
|
||||||
previous_version = raw_config.get("version")
|
|
||||||
|
|
||||||
if "processor" not in raw_config:
|
|
||||||
return args
|
|
||||||
if raw_config["processor"] is None:
|
|
||||||
return args
|
|
||||||
if "conversation" not in raw_config["processor"]:
|
|
||||||
return args
|
|
||||||
if "offline-chat" not in raw_config["processor"]["conversation"]:
|
|
||||||
return args
|
|
||||||
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
|
|
||||||
return args
|
|
||||||
|
|
||||||
if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
|
|
||||||
logger.info(
|
|
||||||
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
|
|
||||||
)
|
|
||||||
raw_config["version"] = schema_version
|
|
||||||
|
|
||||||
# Update offline chat model to use Nous Research's Hermes-2-Pro GGUF in path format suitable for llama-cpp
|
|
||||||
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
|
|
||||||
if offline_chat_model == "mistral-7b-instruct-v0.1.Q4_0.gguf":
|
|
||||||
raw_config["processor"]["conversation"]["offline-chat"][
|
|
||||||
"chat-model"
|
|
||||||
] = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF"
|
|
||||||
|
|
||||||
save_config_to_file(raw_config, args.config_file)
|
|
||||||
return args
|
|
||||||
@@ -1,83 +0,0 @@
|
|||||||
"""
|
|
||||||
Current format of khoj.yml
|
|
||||||
---
|
|
||||||
app:
|
|
||||||
...
|
|
||||||
content-type:
|
|
||||||
...
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
enable-offline-chat: false
|
|
||||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
|
||||||
openai:
|
|
||||||
...
|
|
||||||
search-type:
|
|
||||||
...
|
|
||||||
|
|
||||||
New format of khoj.yml
|
|
||||||
---
|
|
||||||
app:
|
|
||||||
...
|
|
||||||
content-type:
|
|
||||||
...
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
offline-chat:
|
|
||||||
enable-offline-chat: false
|
|
||||||
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
|
||||||
tokenizer: null
|
|
||||||
max_prompt_size: null
|
|
||||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
|
||||||
openai:
|
|
||||||
...
|
|
||||||
search-type:
|
|
||||||
...
|
|
||||||
"""
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from packaging import version
|
|
||||||
|
|
||||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_offline_chat_schema(args):
|
|
||||||
schema_version = "0.12.3"
|
|
||||||
raw_config = load_config_from_file(args.config_file)
|
|
||||||
previous_version = raw_config.get("version")
|
|
||||||
|
|
||||||
if "processor" not in raw_config:
|
|
||||||
return args
|
|
||||||
if raw_config["processor"] is None:
|
|
||||||
return args
|
|
||||||
if "conversation" not in raw_config["processor"]:
|
|
||||||
return args
|
|
||||||
|
|
||||||
if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
|
|
||||||
logger.info(
|
|
||||||
f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
|
|
||||||
)
|
|
||||||
raw_config["version"] = schema_version
|
|
||||||
|
|
||||||
# Create max-prompt-size field in conversation processor schema
|
|
||||||
raw_config["processor"]["conversation"]["max-prompt-size"] = None
|
|
||||||
raw_config["processor"]["conversation"]["tokenizer"] = None
|
|
||||||
|
|
||||||
# Create offline chat schema based on existing enable_offline_chat field in khoj config schema
|
|
||||||
offline_chat_model = (
|
|
||||||
raw_config["processor"]["conversation"]
|
|
||||||
.get("offline-chat", {})
|
|
||||||
.get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
|
|
||||||
)
|
|
||||||
raw_config["processor"]["conversation"]["offline-chat"] = {
|
|
||||||
"enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
|
|
||||||
"chat-model": offline_chat_model,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Delete old enable-offline-chat field from conversation processor schema
|
|
||||||
if "enable-offline-chat" in raw_config["processor"]["conversation"]:
|
|
||||||
del raw_config["processor"]["conversation"]["enable-offline-chat"]
|
|
||||||
|
|
||||||
save_config_to_file(raw_config, args.config_file)
|
|
||||||
return args
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
import logging
|
|
||||||
import os
|
|
||||||
|
|
||||||
from packaging import version
|
|
||||||
|
|
||||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_offline_model(args):
|
|
||||||
schema_version = "0.10.1"
|
|
||||||
raw_config = load_config_from_file(args.config_file)
|
|
||||||
previous_version = raw_config.get("version")
|
|
||||||
|
|
||||||
if previous_version is None or version.parse(previous_version) < version.parse("0.10.1"):
|
|
||||||
logger.info(
|
|
||||||
f"Migrating offline model used for version {previous_version} to latest version for {args.version_no}"
|
|
||||||
)
|
|
||||||
raw_config["version"] = schema_version
|
|
||||||
|
|
||||||
# If the user has downloaded the offline model, remove it from the cache.
|
|
||||||
offline_model_path = os.path.expanduser("~/.cache/gpt4all/llama-2-7b-chat.ggmlv3.q4_K_S.bin")
|
|
||||||
if os.path.exists(offline_model_path):
|
|
||||||
os.remove(offline_model_path)
|
|
||||||
|
|
||||||
save_config_to_file(raw_config, args.config_file)
|
|
||||||
|
|
||||||
return args
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
"""
|
|
||||||
Current format of khoj.yml
|
|
||||||
---
|
|
||||||
app:
|
|
||||||
should-log-telemetry: true
|
|
||||||
content-type:
|
|
||||||
...
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
chat-model: gpt-3.5-turbo
|
|
||||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
|
||||||
model: text-davinci-003
|
|
||||||
openai-api-key: sk-secret-key
|
|
||||||
search-type:
|
|
||||||
...
|
|
||||||
|
|
||||||
New format of khoj.yml
|
|
||||||
---
|
|
||||||
app:
|
|
||||||
should-log-telemetry: true
|
|
||||||
content-type:
|
|
||||||
...
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
openai:
|
|
||||||
chat-model: gpt-3.5-turbo
|
|
||||||
openai-api-key: sk-secret-key
|
|
||||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
|
||||||
enable-offline-chat: false
|
|
||||||
search-type:
|
|
||||||
...
|
|
||||||
"""
|
|
||||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_processor_conversation_schema(args):
|
|
||||||
schema_version = "0.10.0"
|
|
||||||
raw_config = load_config_from_file(args.config_file)
|
|
||||||
|
|
||||||
if "processor" not in raw_config:
|
|
||||||
return args
|
|
||||||
if raw_config["processor"] is None:
|
|
||||||
return args
|
|
||||||
if "conversation" not in raw_config["processor"]:
|
|
||||||
return args
|
|
||||||
|
|
||||||
current_openai_api_key = raw_config["processor"]["conversation"].get("openai-api-key", None)
|
|
||||||
current_chat_model = raw_config["processor"]["conversation"].get("chat-model", None)
|
|
||||||
if current_openai_api_key is None and current_chat_model is None:
|
|
||||||
return args
|
|
||||||
|
|
||||||
raw_config["version"] = schema_version
|
|
||||||
|
|
||||||
# Add enable_offline_chat to khoj config schema
|
|
||||||
if "enable-offline-chat" not in raw_config["processor"]["conversation"]:
|
|
||||||
raw_config["processor"]["conversation"]["enable-offline-chat"] = False
|
|
||||||
|
|
||||||
# Update conversation processor schema
|
|
||||||
conversation_logfile = raw_config["processor"]["conversation"].get("conversation-logfile", None)
|
|
||||||
raw_config["processor"]["conversation"] = {
|
|
||||||
"openai": {"chat-model": current_chat_model, "api-key": current_openai_api_key},
|
|
||||||
"conversation-logfile": conversation_logfile,
|
|
||||||
"enable-offline-chat": False,
|
|
||||||
}
|
|
||||||
|
|
||||||
save_config_to_file(raw_config, args.config_file)
|
|
||||||
return args
|
|
||||||
@@ -1,132 +0,0 @@
|
|||||||
"""
|
|
||||||
The application config currently looks like this:
|
|
||||||
app:
|
|
||||||
should-log-telemetry: true
|
|
||||||
content-type:
|
|
||||||
...
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
|
||||||
max-prompt-size: null
|
|
||||||
offline-chat:
|
|
||||||
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
|
||||||
enable-offline-chat: false
|
|
||||||
openai:
|
|
||||||
api-key: sk-blah
|
|
||||||
chat-model: gpt-3.5-turbo
|
|
||||||
tokenizer: null
|
|
||||||
search-type:
|
|
||||||
asymmetric:
|
|
||||||
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
|
||||||
encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
|
|
||||||
encoder-type: null
|
|
||||||
model-directory: /Users/si/.khoj/search/asymmetric
|
|
||||||
image:
|
|
||||||
encoder: sentence-transformers/clip-ViT-B-32
|
|
||||||
encoder-type: null
|
|
||||||
model-directory: /Users/si/.khoj/search/image
|
|
||||||
symmetric:
|
|
||||||
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
|
||||||
encoder: sentence-transformers/all-MiniLM-L6-v2
|
|
||||||
encoder-type: null
|
|
||||||
model-directory: ~/.khoj/search/symmetric
|
|
||||||
version: 0.14.0
|
|
||||||
|
|
||||||
|
|
||||||
The new version will looks like this:
|
|
||||||
app:
|
|
||||||
should-log-telemetry: true
|
|
||||||
processor:
|
|
||||||
conversation:
|
|
||||||
offline-chat:
|
|
||||||
enabled: false
|
|
||||||
openai:
|
|
||||||
api-key: sk-blah
|
|
||||||
chat-model-options:
|
|
||||||
- chat-model: gpt-3.5-turbo
|
|
||||||
tokenizer: null
|
|
||||||
type: openai
|
|
||||||
- chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
|
||||||
tokenizer: null
|
|
||||||
type: offline
|
|
||||||
search-type:
|
|
||||||
asymmetric:
|
|
||||||
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
|
||||||
encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
|
|
||||||
version: 0.15.0
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from packaging import version
|
|
||||||
|
|
||||||
from khoj.database.models import AiModelApi, ChatModel, SearchModelConfig
|
|
||||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_server_pg(args):
|
|
||||||
schema_version = "0.15.0"
|
|
||||||
raw_config = load_config_from_file(args.config_file)
|
|
||||||
previous_version = raw_config.get("version")
|
|
||||||
|
|
||||||
if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
|
|
||||||
logger.info(
|
|
||||||
f"Migrating configuration used for version {previous_version} to latest version for server with postgres in {args.version_no}"
|
|
||||||
)
|
|
||||||
raw_config["version"] = schema_version
|
|
||||||
|
|
||||||
if raw_config is None:
|
|
||||||
return args
|
|
||||||
|
|
||||||
if "search-type" in raw_config and raw_config["search-type"]:
|
|
||||||
if "asymmetric" in raw_config["search-type"]:
|
|
||||||
# Delete all existing search models
|
|
||||||
SearchModelConfig.objects.filter(model_type=SearchModelConfig.ModelType.TEXT).delete()
|
|
||||||
# Create new search model from existing Khoj YAML config
|
|
||||||
asymmetric_search = raw_config["search-type"]["asymmetric"]
|
|
||||||
SearchModelConfig.objects.create(
|
|
||||||
name="default",
|
|
||||||
model_type=SearchModelConfig.ModelType.TEXT,
|
|
||||||
bi_encoder=asymmetric_search.get("encoder"),
|
|
||||||
cross_encoder=asymmetric_search.get("cross-encoder"),
|
|
||||||
)
|
|
||||||
|
|
||||||
if "processor" in raw_config and raw_config["processor"] and "conversation" in raw_config["processor"]:
|
|
||||||
processor_conversation = raw_config["processor"]["conversation"]
|
|
||||||
|
|
||||||
if "offline-chat" in raw_config["processor"]["conversation"]:
|
|
||||||
offline_chat = raw_config["processor"]["conversation"]["offline-chat"]
|
|
||||||
ChatModel.objects.create(
|
|
||||||
name=offline_chat.get("chat-model"),
|
|
||||||
tokenizer=processor_conversation.get("tokenizer"),
|
|
||||||
max_prompt_size=processor_conversation.get("max-prompt-size"),
|
|
||||||
model_type=ChatModel.ModelType.OFFLINE,
|
|
||||||
)
|
|
||||||
|
|
||||||
if (
|
|
||||||
"openai" in raw_config["processor"]["conversation"]
|
|
||||||
and raw_config["processor"]["conversation"]["openai"]
|
|
||||||
):
|
|
||||||
openai = raw_config["processor"]["conversation"]["openai"]
|
|
||||||
|
|
||||||
if openai.get("api-key") is None:
|
|
||||||
logger.error("OpenAI API Key is not set. Will not be migrating OpenAI config.")
|
|
||||||
else:
|
|
||||||
if openai.get("chat-model") is None:
|
|
||||||
openai["chat-model"] = "gpt-3.5-turbo"
|
|
||||||
|
|
||||||
openai_model_api = AiModelApi.objects.create(api_key=openai.get("api-key"), name="default")
|
|
||||||
|
|
||||||
ChatModel.objects.create(
|
|
||||||
name=openai.get("chat-model"),
|
|
||||||
tokenizer=processor_conversation.get("tokenizer"),
|
|
||||||
max_prompt_size=processor_conversation.get("max-prompt-size"),
|
|
||||||
model_type=ChatModel.ModelType.OPENAI,
|
|
||||||
ai_model_api=openai_model_api,
|
|
||||||
)
|
|
||||||
|
|
||||||
save_config_to_file(raw_config, args.config_file)
|
|
||||||
|
|
||||||
return args
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
|
||||||
|
|
||||||
|
|
||||||
def migrate_config_to_version(args):
|
|
||||||
schema_version = "0.9.0"
|
|
||||||
raw_config = load_config_from_file(args.config_file)
|
|
||||||
|
|
||||||
# Add version to khoj config schema
|
|
||||||
if "version" not in raw_config:
|
|
||||||
raw_config["version"] = schema_version
|
|
||||||
save_config_to_file(raw_config, args.config_file)
|
|
||||||
|
|
||||||
# regenerate khoj index on first start of this version
|
|
||||||
# this should refresh index and apply index corruption fixes from #325
|
|
||||||
args.regenerate = True
|
|
||||||
|
|
||||||
return args
|
|
||||||
@@ -6,16 +6,6 @@ from importlib.metadata import version
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from khoj.migrations.migrate_offline_chat_default_model import (
|
|
||||||
migrate_offline_chat_default_model,
|
|
||||||
)
|
|
||||||
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
|
||||||
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
|
||||||
from khoj.migrations.migrate_processor_config_openai import (
|
|
||||||
migrate_processor_conversation_schema,
|
|
||||||
)
|
|
||||||
from khoj.migrations.migrate_server_pg import migrate_server_pg
|
|
||||||
from khoj.migrations.migrate_version import migrate_config_to_version
|
|
||||||
from khoj.utils.helpers import is_env_var_true, resolve_absolute_path
|
from khoj.utils.helpers import is_env_var_true, resolve_absolute_path
|
||||||
from khoj.utils.yaml import parse_config_from_file
|
from khoj.utils.yaml import parse_config_from_file
|
||||||
|
|
||||||
@@ -85,17 +75,3 @@ def cli(args=None):
|
|||||||
args.config.app.should_log_telemetry = False
|
args.config.app.should_log_telemetry = False
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
def run_migrations(args):
|
|
||||||
migrations = [
|
|
||||||
migrate_config_to_version,
|
|
||||||
migrate_processor_conversation_schema,
|
|
||||||
migrate_offline_model,
|
|
||||||
migrate_offline_chat_schema,
|
|
||||||
migrate_offline_chat_default_model,
|
|
||||||
migrate_server_pg,
|
|
||||||
]
|
|
||||||
for migration in migrations:
|
|
||||||
args = migration(args)
|
|
||||||
return args
|
|
||||||
|
|||||||
Reference in New Issue
Block a user