From 44fe70513af826e9c5ed45e237bb919e39c3aed0 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 12 Aug 2022 01:17:34 +0300 Subject: [PATCH 1/6] Handle situation where default config directory or file does not exist - Include khoj_sample.yml in pip package to load default config from - Create khoj config directory if it doesn't exist - Load config from khoj_sample.yml if khoj.yml config doesn't exist --- MANIFEST.in | 1 + src/interface/desktop/configure_screen.py | 7 ++++--- src/utils/yaml.py | 3 +++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 595ab4e1..30a81879 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ include Readme.md +include config/khoj_sample.yml graft src/interface/* prune src/interface/web/images* prune docs* diff --git a/src/interface/desktop/configure_screen.py b/src/interface/desktop/configure_screen.py index 86585391..a6cc84f2 100644 --- a/src/interface/desktop/configure_screen.py +++ b/src/interface/desktop/configure_screen.py @@ -11,7 +11,7 @@ from src.interface.desktop.file_browser import FileBrowser from src.utils import constants, state, yaml as yaml_utils from src.utils.cli import cli from src.utils.config import SearchType, ProcessorType -from src.utils.helpers import merge_dicts +from src.utils.helpers import merge_dicts, resolve_absolute_path class ConfigureScreen(QtWidgets.QDialog): @@ -27,8 +27,9 @@ class ConfigureScreen(QtWidgets.QDialog): self.config_file = config_file # Load config from existing config, if exists, else load from default config - self.current_config = yaml_utils.load_config_from_file(self.config_file) - if self.current_config is None: + if resolve_absolute_path(self.config_file).exists(): + self.current_config = yaml_utils.load_config_from_file(self.config_file) + else: self.current_config = yaml_utils.load_config_from_file(constants.app_root_directory / 'config/khoj_sample.yml') self.new_config = self.current_config diff --git a/src/utils/yaml.py b/src/utils/yaml.py index 588acbda..e313c1f1 100644 --- a/src/utils/yaml.py +++ b/src/utils/yaml.py @@ -13,6 +13,9 @@ yaml.emitter.Emitter.process_tag = lambda self, *args, **kwargs: None def save_config_to_file(yaml_config: dict, yaml_config_file: Path): "Write config to YML file" + # Create output directory, if it doesn't exist + yaml_config_file.parent.mkdir(parents=True, exist_ok=True) + with open(get_absolute_path(yaml_config_file), 'w', encoding='utf-8') as config_file: yaml.safe_dump(yaml_config, config_file, allow_unicode=True) From fad2f3a2e75fb116d210fb9c5beb2d20ffde73f3 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 12 Aug 2022 01:34:08 +0300 Subject: [PATCH 2/6] Resolve config_file to absolute right at start on parsing args in cli - Assume path is absolute in yaml util module while saving, loading file - This follows same convention as jsonl. Which just operates on passed file path, assuming it is of appropriate form. Responsibility to put it in appropriate form is on the caller, for now --- src/utils/cli.py | 9 ++++++--- src/utils/yaml.py | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/utils/cli.py b/src/utils/cli.py index 0e8a61ea..e855f706 100644 --- a/src/utils/cli.py +++ b/src/utils/cli.py @@ -3,7 +3,7 @@ import argparse import pathlib # Internal Packages -from src.utils.helpers import resolve_absolute_path +from src.utils.helpers import get_absolute_path, resolve_absolute_path from src.utils.yaml import parse_config_from_file @@ -20,9 +20,12 @@ def cli(args=None): args = parser.parse_args(args) - if not resolve_absolute_path(args.config_file).exists(): + # Normalize config_file path to absolute path + args.config_file = resolve_absolute_path(args.config_file) + + if not args.config_file.exists(): args.config = None else: args.config = parse_config_from_file(args.config_file) - return args \ No newline at end of file + return args diff --git a/src/utils/yaml.py b/src/utils/yaml.py index e313c1f1..46ddb788 100644 --- a/src/utils/yaml.py +++ b/src/utils/yaml.py @@ -5,7 +5,7 @@ from pathlib import Path import yaml # Internal Packages -from src.utils.helpers import get_absolute_path +from src.utils.helpers import get_absolute_path, resolve_absolute_path from src.utils.rawconfig import FullConfig # Do not emit tags when dumping to YAML @@ -16,14 +16,14 @@ def save_config_to_file(yaml_config: dict, yaml_config_file: Path): # Create output directory, if it doesn't exist yaml_config_file.parent.mkdir(parents=True, exist_ok=True) - with open(get_absolute_path(yaml_config_file), 'w', encoding='utf-8') as config_file: + with open(yaml_config_file, 'w', encoding='utf-8') as config_file: yaml.safe_dump(yaml_config, config_file, allow_unicode=True) def load_config_from_file(yaml_config_file: Path) -> dict: "Read config from YML file" config_from_file = None - with open(get_absolute_path(yaml_config_file), 'r', encoding='utf-8') as config_file: + with open(yaml_config_file, 'r', encoding='utf-8') as config_file: config_from_file = yaml.safe_load(config_file) return config_from_file From fd952e72733442444c052786b626acb4494492ea Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 12 Aug 2022 01:47:52 +0300 Subject: [PATCH 3/6] Fix CLI tests as config_file path made absolute during CLI parsing --- tests/test_cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 7e7531fb..4cbf1209 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -7,6 +7,7 @@ import pytest # Internal Packages from src.utils.cli import cli +from src.utils.helpers import resolve_absolute_path # Test @@ -16,7 +17,7 @@ def test_cli_minimal_default(): actual_args = cli([]) # Assert - assert actual_args.config_file == Path('~/.khoj/khoj.yml') + assert actual_args.config_file == resolve_absolute_path(Path('~/.khoj/khoj.yml')) assert actual_args.regenerate == False assert actual_args.no_gui == False assert actual_args.verbose == 0 @@ -30,7 +31,7 @@ def test_cli_invalid_config_file_path(): actual_args = cli([f'-c={non_existent_config_file}']) # Assert - assert actual_args.config_file == Path(non_existent_config_file) + assert actual_args.config_file == resolve_absolute_path(non_existent_config_file) assert actual_args.config == None # ---------------------------------------------------------------------------------------------------- @@ -42,7 +43,7 @@ def test_cli_config_from_file(): '-vvv']) # Assert - assert actual_args.config_file == Path('tests/data/config.yml') + assert actual_args.config_file == resolve_absolute_path(Path('tests/data/config.yml')) assert actual_args.no_gui == True assert actual_args.regenerate == True assert actual_args.config is not None From 3f293a8766ac28565d697ef5dd3481a6e777e39b Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 12 Aug 2022 01:49:04 +0300 Subject: [PATCH 4/6] Add more details to Readme for pip install, upgrade during development --- Readme.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Readme.md b/Readme.md index da5ba39a..e7f8bad3 100644 --- a/Readme.md +++ b/Readme.md @@ -136,7 +136,7 @@ pip install --upgrade khoj-assistant ``` shell git clone https://github.com/debanjum/khoj && cd khoj python -m venv .venv && source .venv/bin/activate - pip install . + pip install -e . ``` ##### 2. Configure - Set `input-files` or `input-filter` in each relevant `content-type` section of `khoj_sample.yml` @@ -159,6 +159,8 @@ pip install --upgrade khoj-assistant pip install --upgrade --pre khoj-assistant # To Upgrade To Specific Development Release +# Useful to test, review a PR. +# A khoj-assistant package is automatically uploaded to Test PyPi pip install -r testpypi khoj-assistant==0.1.5.dev491659577806 ``` From 110e3df0b75d78d7cd16d1ac01314f4480963984 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 12 Aug 2022 02:18:46 +0300 Subject: [PATCH 5/6] Set default config in the constant module. Use from there to configure app - Avoid having to pass the khoj_sample.yml data file into pip, native apps - Packaging data files into python packages is annoying. - There's `MANIFEST.in`, `data_files` and `package_data` in setup.py - Bdist, wheel, generated source tarball use different set of these fields and put the data files in different locations - Rather just code the default config into a constant. Avoid pointless file reads as well this way --- MANIFEST.in | 1 - src/interface/desktop/configure_screen.py | 4 +- src/utils/constants.py | 59 +++++++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 30a81879..595ab4e1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,4 @@ include Readme.md -include config/khoj_sample.yml graft src/interface/* prune src/interface/web/images* prune docs* diff --git a/src/interface/desktop/configure_screen.py b/src/interface/desktop/configure_screen.py index a6cc84f2..ff7b86b3 100644 --- a/src/interface/desktop/configure_screen.py +++ b/src/interface/desktop/configure_screen.py @@ -30,7 +30,7 @@ class ConfigureScreen(QtWidgets.QDialog): if resolve_absolute_path(self.config_file).exists(): self.current_config = yaml_utils.load_config_from_file(self.config_file) else: - self.current_config = yaml_utils.load_config_from_file(constants.app_root_directory / 'config/khoj_sample.yml') + self.current_config = constants.default_config self.new_config = self.current_config # Initialize Configure Window @@ -127,7 +127,7 @@ class ConfigureScreen(QtWidgets.QDialog): def get_default_config(self, search_type:SearchType=None, processor_type:ProcessorType=None): "Get default config" - config = yaml_utils.load_config_from_file(constants.app_root_directory / 'config/khoj_sample.yml') + config = constants.default_config if search_type: return config['content-type'][search_type] elif processor_type: diff --git a/src/utils/constants.py b/src/utils/constants.py index 8e45d03c..569c5bcb 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -3,3 +3,62 @@ from pathlib import Path app_root_directory = Path(__file__).parent.parent.parent web_directory = app_root_directory / 'src/interface/web/' empty_escape_sequences = r'\n|\r\t ' + +# default app config to use +default_config = { + 'content-type': { + 'org': { + 'input-files': None, + 'input-filter': None, + 'compressed-jsonl': '~/.khoj/content/org/org.jsonl.gz', + 'embeddings-file': '~/.khoj/content/org/org_embeddings.pt' + }, + 'markdown': { + 'input-files': None, + 'input-filter': None, + 'compressed-jsonl': '~/.khoj/content/markdown/markdown.jsonl.gz', + 'embeddings-file': '~/.khoj/content/markdown/markdown_embeddings.pt' + }, + 'ledger': { + 'input-files': None, + 'input-filter': None, + 'compressed-jsonl': '~/.khoj/content/ledger/ledger.jsonl.gz', + 'embeddings-file': '~/.khoj/content/ledger/ledger_embeddings.pt' + }, + 'image': { + 'input-directories': None, + 'input-filter': None, + 'embeddings-file': '~/.khoj/content/image/image_embeddings.pt', + 'batch-size': 50, + 'use-xmp-metadata': True + }, + 'music': { + 'input-files': None, + 'input-filter': None, + 'compressed-jsonl': '~/.khoj/content/music/music.jsonl.gz', + 'embeddings-file': '~/.khoj/content/music/music_embeddings.pt' + } + }, + 'search-type': { + 'symmetric': { + 'encoder': 'sentence-transformers/all-MiniLM-L6-v2', + 'cross-encoder': 'cross-encoder/ms-marco-MiniLM-L-6-v2', + 'model_directory': '~/.khoj/search/symmetric/' + }, + 'asymmetric': { + 'encoder': 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1', + 'cross-encoder': 'cross-encoder/ms-marco-MiniLM-L-6-v2', + 'model_directory': '~/.khoj/search/asymmetric/' + }, + 'image': { + 'encoder': 'sentence-transformers/clip-ViT-B-32', + 'model_directory': '~/.khoj/search/image/' + } + }, + 'processor': { + 'conversation': { + 'openai-api-key': None, + 'conversation-logfile': '~/.khoj/processor/conversation/conversation_logs.json' + } + } +} \ No newline at end of file From 34d5aa6a77f0f9d986734b48b850b22bf9f225d4 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 12 Aug 2022 02:24:07 +0300 Subject: [PATCH 6/6] Fix Readme on how to install, upgrade to development khoj via pip --- Readme.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Readme.md b/Readme.md index e7f8bad3..9e932dac 100644 --- a/Readme.md +++ b/Readme.md @@ -158,10 +158,10 @@ pip install --upgrade khoj-assistant # To Upgrade To Latest Pre-Release pip install --upgrade --pre khoj-assistant -# To Upgrade To Specific Development Release +# To Upgrade To Specific Development Release. # Useful to test, review a PR. -# A khoj-assistant package is automatically uploaded to Test PyPi -pip install -r testpypi khoj-assistant==0.1.5.dev491659577806 +# Note: khoj-assistant is published to test PyPi on creating a PR +pip install -i https://test.pypi.org/simple/ khoj-assistant==0.1.5.dev57166025766 ``` #### Using Docker