mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 05:39:12 +00:00
Create khoj_sample file with all configurable fields in one place
- Reason - Simplifies code. No merge_dict required - 1 place for user to see all configurables, defaults and required values - Details - Remove default_config from code. Set defaults in khoj_sample.yml itself - Keep fields required to be set by user as empty in khoj_sample to YAML - Set defaults for fields not requiring configuration by user
This commit is contained in:
@@ -1,53 +1,51 @@
|
|||||||
content-type:
|
content-type:
|
||||||
# The /data/folder/ prefix to the folders is here because this is
|
|
||||||
# the directory to which the local files are copied in the docker-compose.
|
|
||||||
# If changing, the docker-compose volumes should also be changed to match.
|
|
||||||
org:
|
org:
|
||||||
input-files: null
|
input-files: # ["/path/to/org-file.org"] REQUIRED IF input-filter IS NOT SET OR
|
||||||
input-filter: "/data/notes/*.org"
|
input-filter: # /path/to/org/*.org REQUIRED IF input-files IS NOT SET
|
||||||
compressed-jsonl: "/data/embeddings/notes.jsonl.gz"
|
compressed-jsonl: "~/.khoj/content/org/org.jsonl.gz"
|
||||||
embeddings-file: "/data/embeddings/note_embeddings.pt"
|
embeddings-file: "~/.khoj/content/org/org_embeddings.pt"
|
||||||
|
|
||||||
markdown:
|
markdown:
|
||||||
input-files: null
|
input-files: # ["/path/to/markdown-file.md"] REQUIRED IF input-filter IS NOT SET OR
|
||||||
input-filter: "/data/markdown/*.md"
|
input-filter: # "/path/to/markdown/*.md" REQUIRED IF input-files IS NOT SET
|
||||||
compressed-jsonl: "/data/embeddings/markdown.jsonl.gz"
|
compressed-jsonl: "~/.khoj/content/markdown/markdown.jsonl.gz"
|
||||||
embeddings-file: "/data/embeddings/markdown_embeddings.pt"
|
embeddings-file: "~/.khoj/content/markdown/markdown_embeddings.pt"
|
||||||
|
|
||||||
ledger:
|
ledger:
|
||||||
input-files: null
|
input-files: # ["/path/to/ledger-file.beancount"] REQUIRED IF input-filter is not set OR
|
||||||
input-filter: /data/ledger/*.beancount
|
input-filter: # /path/to/ledger/*.beancount REQUIRED IF input-files is not set
|
||||||
compressed-jsonl: /data/embeddings/transactions.jsonl.gz
|
compressed-jsonl: "~/.khoj/content/ledger/ledger.jsonl.gz"
|
||||||
embeddings-file: /data/embeddings/transaction_embeddings.pt
|
embeddings-file: "~/.khoj/content/ledger/ledger_embeddings.pt"
|
||||||
|
|
||||||
# image:
|
image:
|
||||||
# input-directories: ["/data/images/"]
|
input-directories: # ["/path/to/images/"] REQUIRED IF input-filter IS NOT SET OR
|
||||||
# embeddings-file: "/data/embeddings/image_embeddings.pt"
|
input-filter: # /path/to/images/*.jpg REQUIRED IF input-directories IS NOT SET
|
||||||
# batch-size: 50
|
embeddings-file: "~/.khoj/content/image/image_embeddings.pt"
|
||||||
# use-xmp-metadata: true
|
batch-size: 50
|
||||||
|
use-xmp-metadata: true
|
||||||
|
|
||||||
music:
|
music:
|
||||||
input-files: ["/data/music/music.org"]
|
input-files: # ["/path/to/music-file.org"] REQUIRED IF input-filter IS NOT SET OR
|
||||||
input-filter: null
|
input-filter: # /path/to/music/*.org REQUIRED IF input-files IS NOT SET
|
||||||
compressed-jsonl: "/data/embeddings/songs.jsonl.gz"
|
compressed-jsonl: "~/.khoj/content/music/music.jsonl.gz"
|
||||||
embeddings-file: "/data/embeddings/song_embeddings.pt"
|
embeddings-file: "~/.khoj/content/music/music_embeddings.pt"
|
||||||
|
|
||||||
search-type:
|
search-type:
|
||||||
symmetric:
|
symmetric:
|
||||||
encoder: "sentence-transformers/all-MiniLM-L6-v2"
|
encoder: "sentence-transformers/all-MiniLM-L6-v2"
|
||||||
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
||||||
model_directory: "/data/models/symmetric"
|
model_directory: "~/.khoj/search/symmetric/"
|
||||||
|
|
||||||
asymmetric:
|
asymmetric:
|
||||||
encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
|
encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
|
||||||
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
||||||
model_directory: "/data/models/asymmetric"
|
model_directory: "~/.khoj/search/asymmetric/"
|
||||||
|
|
||||||
image:
|
image:
|
||||||
encoder: "clip-ViT-B-32"
|
encoder: "sentence-transformers/clip-ViT-B-32"
|
||||||
model_directory: "/data/models/image_encoder"
|
model_directory: "~/.khoj/search/image/"
|
||||||
|
|
||||||
processor:
|
processor:
|
||||||
conversation:
|
conversation:
|
||||||
openai-api-key: null
|
openai-api-key: # "YOUR_OPENAI_API_KEY"
|
||||||
conversation-logfile: "/data/embeddings/conversation_logs.json"
|
conversation-logfile: "~/.khoj/processor/conversation/conversation_logs.json"
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
# Standard Packages
|
# Standard Packages
|
||||||
import argparse
|
import argparse
|
||||||
import pathlib
|
import pathlib
|
||||||
import json
|
|
||||||
|
|
||||||
# External Packages
|
# External Packages
|
||||||
import yaml
|
import yaml
|
||||||
@@ -28,55 +27,16 @@ def cli(args=None):
|
|||||||
if not (args.config_file):
|
if not (args.config_file):
|
||||||
print(f"Need --config-file flag to be passed from commandline")
|
print(f"Need --config-file flag to be passed from commandline")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
elif not resolve_absolute_path(args.config_file).exists():
|
||||||
|
print(f"Config file {args.config_file} does not exist")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
# Config Priority: Config File > Default Config
|
# Read Config from YML file
|
||||||
args.config = default_config
|
config_from_file = None
|
||||||
if args.config_file and resolve_absolute_path(args.config_file).exists():
|
with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file:
|
||||||
with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file:
|
config_from_file = yaml.safe_load(config_file)
|
||||||
config_from_file = yaml.safe_load(config_file)
|
|
||||||
args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config)
|
|
||||||
|
|
||||||
args.config = FullConfig.parse_obj(args.config)
|
# Parse, Validate Config in YML file
|
||||||
|
args.config = FullConfig.parse_obj(config_from_file)
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
default_config = {
|
|
||||||
'content-type':
|
|
||||||
{
|
|
||||||
'org': None,
|
|
||||||
'ledger': None,
|
|
||||||
'image': None,
|
|
||||||
'music': None,
|
|
||||||
'markdown': None,
|
|
||||||
},
|
|
||||||
'search-type':
|
|
||||||
{
|
|
||||||
'symmetric':
|
|
||||||
{
|
|
||||||
'encoder': "sentence-transformers/all-MiniLM-L6-v2",
|
|
||||||
'cross-encoder': "cross-encoder/ms-marco-MiniLM-L-6-v2",
|
|
||||||
'model_directory': None
|
|
||||||
},
|
|
||||||
'asymmetric':
|
|
||||||
{
|
|
||||||
'encoder': "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
|
||||||
'cross-encoder': "cross-encoder/ms-marco-MiniLM-L-6-v2",
|
|
||||||
'model_directory': None
|
|
||||||
},
|
|
||||||
'image':
|
|
||||||
{
|
|
||||||
'encoder': "clip-ViT-B-32",
|
|
||||||
'model_directory': None
|
|
||||||
},
|
|
||||||
},
|
|
||||||
'processor':
|
|
||||||
{
|
|
||||||
'conversation':
|
|
||||||
{
|
|
||||||
'openai-api-key': "",
|
|
||||||
'conversation-logfile': ".conversation_logs.json",
|
|
||||||
'conversation-history': ""
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user