From 38aede68f20adc7aff4ea95683593bb895ba37ae Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 31 Jul 2022 22:02:03 +0300 Subject: [PATCH 1/7] Only configure org via config file for consistency across search types - Previously org-files were configurable via cmdline args. Where as none of the other search types are - This is an artifact of how the application grew - It can be removed for better consistency and equal preference given all search types --- src/utils/cli.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/utils/cli.py b/src/utils/cli.py index a426ba8c..7d8b3328 100644 --- a/src/utils/cli.py +++ b/src/utils/cli.py @@ -16,8 +16,6 @@ def cli(args=None): # Setup Argument Parser for the Commandline Interface parser = argparse.ArgumentParser(description="Expose API for Khoj") - parser.add_argument('--org-files', '-i', nargs='*', help="List of org-mode files to process") - parser.add_argument('--org-filter', type=str, default=None, help="Regex filter for org-mode files to process") parser.add_argument('--config-file', '-c', type=pathlib.Path, help="YAML file with user configuration") parser.add_argument('--regenerate', action='store_true', default=False, help="Regenerate model embeddings from source files. Default: false") parser.add_argument('--verbose', '-v', action='count', default=0, help="Show verbose conversion logs. Default: 0") @@ -27,25 +25,18 @@ def cli(args=None): args = parser.parse_args(args) - if not (args.config_file or args.org_files): - print(f"Require at least 1 of --org-file, --org-filter or --config-file flags to be passed from commandline") + if not (args.config_file): + print(f"Need --config-file flag to be passed from commandline") exit(1) - # Config Priority: Cmd Args > Config File > Default Config + # Config Priority: Config File > Default Config args.config = default_config if args.config_file and resolve_absolute_path(args.config_file).exists(): with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file: config_from_file = yaml.safe_load(config_file) args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config) - args.config = FullConfig.parse_obj(args.config) - else: - args.config = FullConfig.parse_obj(args.config) - if args.org_files: - args.config.content_type.org.input_files = args.org_files - - if args.org_filter: - args.config.content_type.org.input_filter = args.org_filter + args.config = FullConfig.parse_obj(args.config) return args From b83021a723cce826ec18be410a65ad5cdf2f22a0 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 31 Jul 2022 22:07:56 +0300 Subject: [PATCH 2/7] Improve code readability of merge_dicts helper method --- src/utils/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils/helpers.py b/src/utils/helpers.py index 3c19b935..55dc77e5 100644 --- a/src/utils/helpers.py +++ b/src/utils/helpers.py @@ -32,9 +32,9 @@ def get_from_dict(dictionary, *args): def merge_dicts(priority_dict, default_dict): merged_dict = priority_dict.copy() - for k, v in default_dict.items(): - if k not in priority_dict: - merged_dict[k] = default_dict[k] + for key, _ in default_dict.items(): + if key not in priority_dict: + merged_dict[key] = default_dict[key] return merged_dict From 17c38b526a1b454243be86875e22c8bd4b5a2bb8 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 31 Jul 2022 22:14:20 +0300 Subject: [PATCH 3/7] Default config for each search types to None - Setting up default compressed-jsonl, embeddings-file was only required for org search_type, while org-files and org-filter were allowed to be passed as command line argument - This avoided having to set compressed-jsonl and embeddings-file via command line argument as well for org search type - Now that all search types are only configurable via config file, We can default all search types to None. The default config for the rest of the search types wasn't being used anyway --- src/utils/cli.py | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/src/utils/cli.py b/src/utils/cli.py index 7d8b3328..549ad056 100644 --- a/src/utils/cli.py +++ b/src/utils/cli.py @@ -44,27 +44,11 @@ def cli(args=None): default_config = { 'content-type': { - 'org': - { - 'compressed-jsonl': '.notes.jsonl.gz', - 'embeddings-file': '.note_embeddings.pt' - }, - 'ledger': - { - 'compressed-jsonl': '.transactions.jsonl.gz', - 'embeddings-file': '.transaction_embeddings.pt' - }, - 'image': - { - 'embeddings-file': '.image_embeddings.pt', - 'batch-size': 50, - 'use-xmp-metadata': 'no' - }, - 'music': - { - 'compressed-jsonl': '.songs.jsonl.gz', - 'embeddings-file': '.song_embeddings.pt' - }, + 'org': None, + 'ledger': None, + 'image': None, + 'music': None, + 'markdown': None, }, 'search-type': { From 0abd40aeb7ad991a8b614f9f4acf002c3452a58a Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 31 Jul 2022 22:29:23 +0300 Subject: [PATCH 4/7] Only set query field when appropriate query param passed via URL - Setting query value to default option when query param wasn't passed via URL was overriding placeholder text in query field - We wanted placeholder text in field, not the query field to actually be populated by placeholder text - This clears field when user starts typing query into the query field, instead of them having to manually delete the default text populated --- src/interface/web/index.html | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/interface/web/index.html b/src/interface/web/index.html index 7ef95b42..b364f670 100644 --- a/src/interface/web/index.html +++ b/src/interface/web/index.html @@ -85,7 +85,10 @@ // Fill search form with values passed in URL query parameters, if any. window.onload = function () { document.getElementById("type").value = new URLSearchParams(window.location.search).get("t") || "org"; - document.getElementById("query").value = new URLSearchParams(window.location.search).get("q") || "What is the meaning of life?"; + var query_via_url = new URLSearchParams(window.location.search).get("q"); + if (query_via_url) { + document.getElementById("query").value = query_via_url; + } } From be253bab39a00ae1aaf11eb7110eb21126dddd78 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 31 Jul 2022 23:07:40 +0300 Subject: [PATCH 5/7] Populate type dropdown with only enabled search types in web interface - Get /config API and check config for which available search types is populated. This gives us the list of enabled search types - Dynamically populate search type field with enabled search types only --- src/interface/web/index.html | 37 ++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/interface/web/index.html b/src/interface/web/index.html index b364f670..1229d9b8 100644 --- a/src/interface/web/index.html +++ b/src/interface/web/index.html @@ -82,9 +82,30 @@ event.key === 'Enter' ? search(rerank=true) : search(rerank=false); } - // Fill search form with values passed in URL query parameters, if any. + function populate_type_dropdown() { + // Populate type dropdown field with enabled search types only + var possible_search_types = ["org", "markdown", "ledger", "music", "image"]; + fetch("/config") + .then(response => response.json()) + .then(data => { + document.getElementById("type").innerHTML = + possible_search_types + .filter(type => data["content-type"].hasOwnProperty(type) && data["content-type"][type]) + .map(type => ``) + .join(''); + }); + + // Set type field to search type passed in URL query parameter, if valid + var type_via_url = new URLSearchParams(window.location.search).get("t"); + if (type_via_url && type_via_url in possible_search_types) + document.getElementById("type").value = type_via_url; + } + window.onload = function () { - document.getElementById("type").value = new URLSearchParams(window.location.search).get("t") || "org"; + // Dynamically populate type dropdown based on enabled search types and type passed as URL query parameter + populate_type_dropdown(); + + // Fill query field with value passed in URL query parameters, if any. var query_via_url = new URLSearchParams(window.location.search).get("q"); if (query_via_url) { document.getElementById("query").value = query_via_url; @@ -98,16 +119,8 @@ - - + + From 7d7259bd926595aef8fda7c9b3a76d50354ce203 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 31 Jul 2022 23:35:29 +0300 Subject: [PATCH 6/7] Remove tests that validate configuring org using commandline arguments --- tests/test_cli.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 58808fb4..a266ebc0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -42,29 +42,3 @@ def test_cli_config_from_file(): assert actual_args.config is not None assert actual_args.config.content_type.org.input_files == ['~/first_from_config.org', '~/second_from_config.org'] assert actual_args.verbose == 3 - - -# ---------------------------------------------------------------------------------------------------- -def test_cli_config_from_cmd_args(): - "" - # Act - actual_args = cli(['--org-files=first.org']) - - # Assert - assert actual_args.org_files == ['first.org'] - assert actual_args.config_file is None - assert actual_args.config is not None - assert actual_args.config.content_type.org.input_files == ['first.org'] - - -# ---------------------------------------------------------------------------------------------------- -def test_cli_config_from_cmd_args_override_config_file(): - # Act - actual_args = cli(['--config-file=tests/data/config.yml', - '--org-files=first.org']) - - # Assert - assert actual_args.org_files == ['first.org'] - assert actual_args.config_file == Path('tests/data/config.yml') - assert actual_args.config is not None - assert actual_args.config.content_type.org.input_files == ['first.org'] From 8b6058c879606ccd4fb12a1138728e21c6dccb46 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 1 Aug 2022 00:03:52 +0300 Subject: [PATCH 7/7] Fix instantiating type field with value from URL query parameter - Populate via `.then` after enabled search types in dropdown are populated - Call to `/config` API is async and will usually complete after the value of type field is set from url - So value of type field would earlier be overridden when search types dropdown is populated after the call to `/config` API completes --- src/interface/web/index.html | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/interface/web/index.html b/src/interface/web/index.html index 1229d9b8..333abc19 100644 --- a/src/interface/web/index.html +++ b/src/interface/web/index.html @@ -93,12 +93,13 @@ .filter(type => data["content-type"].hasOwnProperty(type) && data["content-type"][type]) .map(type => ``) .join(''); + }) + .then(() => { + // Set type field to search type passed in URL query parameter, if valid + var type_via_url = new URLSearchParams(window.location.search).get("t"); + if (type_via_url && possible_search_types.includes(type_via_url)) + document.getElementById("type").value = type_via_url; }); - - // Set type field to search type passed in URL query parameter, if valid - var type_via_url = new URLSearchParams(window.location.search).get("t"); - if (type_via_url && type_via_url in possible_search_types) - document.getElementById("type").value = type_via_url; } window.onload = function () { @@ -107,9 +108,8 @@ // Fill query field with value passed in URL query parameters, if any. var query_via_url = new URLSearchParams(window.location.search).get("q"); - if (query_via_url) { + if (query_via_url) document.getElementById("query").value = query_via_url; - } }