diff --git a/Readme.md b/Readme.md index da5ba39a..9e932dac 100644 --- a/Readme.md +++ b/Readme.md @@ -136,7 +136,7 @@ pip install --upgrade khoj-assistant ``` shell git clone https://github.com/debanjum/khoj && cd khoj python -m venv .venv && source .venv/bin/activate - pip install . + pip install -e . ``` ##### 2. Configure - Set `input-files` or `input-filter` in each relevant `content-type` section of `khoj_sample.yml` @@ -158,8 +158,10 @@ pip install --upgrade khoj-assistant # To Upgrade To Latest Pre-Release pip install --upgrade --pre khoj-assistant -# To Upgrade To Specific Development Release -pip install -r testpypi khoj-assistant==0.1.5.dev491659577806 +# To Upgrade To Specific Development Release. +# Useful to test, review a PR. +# Note: khoj-assistant is published to test PyPi on creating a PR +pip install -i https://test.pypi.org/simple/ khoj-assistant==0.1.5.dev57166025766 ``` #### Using Docker diff --git a/src/interface/desktop/configure_screen.py b/src/interface/desktop/configure_screen.py index 86585391..ff7b86b3 100644 --- a/src/interface/desktop/configure_screen.py +++ b/src/interface/desktop/configure_screen.py @@ -11,7 +11,7 @@ from src.interface.desktop.file_browser import FileBrowser from src.utils import constants, state, yaml as yaml_utils from src.utils.cli import cli from src.utils.config import SearchType, ProcessorType -from src.utils.helpers import merge_dicts +from src.utils.helpers import merge_dicts, resolve_absolute_path class ConfigureScreen(QtWidgets.QDialog): @@ -27,9 +27,10 @@ class ConfigureScreen(QtWidgets.QDialog): self.config_file = config_file # Load config from existing config, if exists, else load from default config - self.current_config = yaml_utils.load_config_from_file(self.config_file) - if self.current_config is None: - self.current_config = yaml_utils.load_config_from_file(constants.app_root_directory / 'config/khoj_sample.yml') + if resolve_absolute_path(self.config_file).exists(): + self.current_config = yaml_utils.load_config_from_file(self.config_file) + else: + self.current_config = constants.default_config self.new_config = self.current_config # Initialize Configure Window @@ -126,7 +127,7 @@ class ConfigureScreen(QtWidgets.QDialog): def get_default_config(self, search_type:SearchType=None, processor_type:ProcessorType=None): "Get default config" - config = yaml_utils.load_config_from_file(constants.app_root_directory / 'config/khoj_sample.yml') + config = constants.default_config if search_type: return config['content-type'][search_type] elif processor_type: diff --git a/src/utils/cli.py b/src/utils/cli.py index 0e8a61ea..e855f706 100644 --- a/src/utils/cli.py +++ b/src/utils/cli.py @@ -3,7 +3,7 @@ import argparse import pathlib # Internal Packages -from src.utils.helpers import resolve_absolute_path +from src.utils.helpers import get_absolute_path, resolve_absolute_path from src.utils.yaml import parse_config_from_file @@ -20,9 +20,12 @@ def cli(args=None): args = parser.parse_args(args) - if not resolve_absolute_path(args.config_file).exists(): + # Normalize config_file path to absolute path + args.config_file = resolve_absolute_path(args.config_file) + + if not args.config_file.exists(): args.config = None else: args.config = parse_config_from_file(args.config_file) - return args \ No newline at end of file + return args diff --git a/src/utils/constants.py b/src/utils/constants.py index 8e45d03c..569c5bcb 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -3,3 +3,62 @@ from pathlib import Path app_root_directory = Path(__file__).parent.parent.parent web_directory = app_root_directory / 'src/interface/web/' empty_escape_sequences = r'\n|\r\t ' + +# default app config to use +default_config = { + 'content-type': { + 'org': { + 'input-files': None, + 'input-filter': None, + 'compressed-jsonl': '~/.khoj/content/org/org.jsonl.gz', + 'embeddings-file': '~/.khoj/content/org/org_embeddings.pt' + }, + 'markdown': { + 'input-files': None, + 'input-filter': None, + 'compressed-jsonl': '~/.khoj/content/markdown/markdown.jsonl.gz', + 'embeddings-file': '~/.khoj/content/markdown/markdown_embeddings.pt' + }, + 'ledger': { + 'input-files': None, + 'input-filter': None, + 'compressed-jsonl': '~/.khoj/content/ledger/ledger.jsonl.gz', + 'embeddings-file': '~/.khoj/content/ledger/ledger_embeddings.pt' + }, + 'image': { + 'input-directories': None, + 'input-filter': None, + 'embeddings-file': '~/.khoj/content/image/image_embeddings.pt', + 'batch-size': 50, + 'use-xmp-metadata': True + }, + 'music': { + 'input-files': None, + 'input-filter': None, + 'compressed-jsonl': '~/.khoj/content/music/music.jsonl.gz', + 'embeddings-file': '~/.khoj/content/music/music_embeddings.pt' + } + }, + 'search-type': { + 'symmetric': { + 'encoder': 'sentence-transformers/all-MiniLM-L6-v2', + 'cross-encoder': 'cross-encoder/ms-marco-MiniLM-L-6-v2', + 'model_directory': '~/.khoj/search/symmetric/' + }, + 'asymmetric': { + 'encoder': 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1', + 'cross-encoder': 'cross-encoder/ms-marco-MiniLM-L-6-v2', + 'model_directory': '~/.khoj/search/asymmetric/' + }, + 'image': { + 'encoder': 'sentence-transformers/clip-ViT-B-32', + 'model_directory': '~/.khoj/search/image/' + } + }, + 'processor': { + 'conversation': { + 'openai-api-key': None, + 'conversation-logfile': '~/.khoj/processor/conversation/conversation_logs.json' + } + } +} \ No newline at end of file diff --git a/src/utils/yaml.py b/src/utils/yaml.py index 588acbda..46ddb788 100644 --- a/src/utils/yaml.py +++ b/src/utils/yaml.py @@ -5,7 +5,7 @@ from pathlib import Path import yaml # Internal Packages -from src.utils.helpers import get_absolute_path +from src.utils.helpers import get_absolute_path, resolve_absolute_path from src.utils.rawconfig import FullConfig # Do not emit tags when dumping to YAML @@ -13,14 +13,17 @@ yaml.emitter.Emitter.process_tag = lambda self, *args, **kwargs: None def save_config_to_file(yaml_config: dict, yaml_config_file: Path): "Write config to YML file" - with open(get_absolute_path(yaml_config_file), 'w', encoding='utf-8') as config_file: + # Create output directory, if it doesn't exist + yaml_config_file.parent.mkdir(parents=True, exist_ok=True) + + with open(yaml_config_file, 'w', encoding='utf-8') as config_file: yaml.safe_dump(yaml_config, config_file, allow_unicode=True) def load_config_from_file(yaml_config_file: Path) -> dict: "Read config from YML file" config_from_file = None - with open(get_absolute_path(yaml_config_file), 'r', encoding='utf-8') as config_file: + with open(yaml_config_file, 'r', encoding='utf-8') as config_file: config_from_file = yaml.safe_load(config_file) return config_from_file diff --git a/tests/test_cli.py b/tests/test_cli.py index 7e7531fb..4cbf1209 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -7,6 +7,7 @@ import pytest # Internal Packages from src.utils.cli import cli +from src.utils.helpers import resolve_absolute_path # Test @@ -16,7 +17,7 @@ def test_cli_minimal_default(): actual_args = cli([]) # Assert - assert actual_args.config_file == Path('~/.khoj/khoj.yml') + assert actual_args.config_file == resolve_absolute_path(Path('~/.khoj/khoj.yml')) assert actual_args.regenerate == False assert actual_args.no_gui == False assert actual_args.verbose == 0 @@ -30,7 +31,7 @@ def test_cli_invalid_config_file_path(): actual_args = cli([f'-c={non_existent_config_file}']) # Assert - assert actual_args.config_file == Path(non_existent_config_file) + assert actual_args.config_file == resolve_absolute_path(non_existent_config_file) assert actual_args.config == None # ---------------------------------------------------------------------------------------------------- @@ -42,7 +43,7 @@ def test_cli_config_from_file(): '-vvv']) # Assert - assert actual_args.config_file == Path('tests/data/config.yml') + assert actual_args.config_file == resolve_absolute_path(Path('tests/data/config.yml')) assert actual_args.no_gui == True assert actual_args.regenerate == True assert actual_args.config is not None