mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 05:39:12 +00:00
Move CLI into a separate module. Move CLI tests into a separate file
This commit is contained in:
61
src/main.py
61
src/main.py
@@ -1,18 +1,16 @@
|
|||||||
# Standard Packages
|
# Standard Packages
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
# External Packages
|
# External Packages
|
||||||
import uvicorn
|
import uvicorn
|
||||||
import yaml
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from search_type import asymmetric
|
from search_type import asymmetric
|
||||||
from processor.org_mode.org_to_jsonl import org_to_jsonl
|
from utils.helpers import get_from_dict
|
||||||
from utils.helpers import is_none_or_empty, get_absolute_path, get_from_dict, merge_dicts
|
from utils.cli import cli
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
@@ -62,61 +60,8 @@ def regenerate(t: Optional[str] = None):
|
|||||||
return {'status': 'ok', 'message': 'regeneration completed'}
|
return {'status': 'ok', 'message': 'regeneration completed'}
|
||||||
|
|
||||||
|
|
||||||
def cli(args=None):
|
|
||||||
if is_none_or_empty(args):
|
|
||||||
args = sys.argv[1:]
|
|
||||||
|
|
||||||
# Setup Argument Parser for the Commandline Interface
|
|
||||||
parser = argparse.ArgumentParser(description="Expose API for Semantic Search")
|
|
||||||
parser.add_argument('--org-files', '-i', nargs='*', help="List of org-mode files to process")
|
|
||||||
parser.add_argument('--org-filter', type=str, default=None, help="Regex filter for org-mode files to process")
|
|
||||||
parser.add_argument('--config-file', '-c', type=pathlib.Path, help="YAML file with user configuration")
|
|
||||||
parser.add_argument('--regenerate', action='store_true', default=False, help="Regenerate model embeddings from source files. Default: false")
|
|
||||||
parser.add_argument('--verbose', '-v', action='count', default=0, help="Show verbose conversion logs. Default: 0")
|
|
||||||
args = parser.parse_args(args)
|
|
||||||
|
|
||||||
if not (args.config_file or args.org_files):
|
|
||||||
print(f"Require at least 1 of --org-file, --org-filter or --config-file flags to be passed from commandline")
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
# Config Priority: Cmd Args > Config File > Default Config
|
|
||||||
args.config = default_config
|
|
||||||
if args.config_file and args.config_file.exists():
|
|
||||||
with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file:
|
|
||||||
config_from_file = yaml.safe_load(config_file)
|
|
||||||
args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config)
|
|
||||||
|
|
||||||
if args.org_files:
|
|
||||||
args.config['content-type']['org']['input-files'] = args.org_files
|
|
||||||
|
|
||||||
if args.org_filter:
|
|
||||||
args.config['content-type']['org']['input-filter'] = args.org_filter
|
|
||||||
|
|
||||||
return args
|
|
||||||
|
|
||||||
|
|
||||||
default_config = {
|
|
||||||
'content-type':
|
|
||||||
{
|
|
||||||
'org':
|
|
||||||
{
|
|
||||||
'compressed-jsonl': '.notes.jsonl.gz',
|
|
||||||
'embeddings-file': '.note_embeddings.pt'
|
|
||||||
}
|
|
||||||
},
|
|
||||||
'search-type':
|
|
||||||
{
|
|
||||||
'asymmetric':
|
|
||||||
{
|
|
||||||
'encoder': "sentence-transformers/msmarco-MiniLM-L-6-v3",
|
|
||||||
'cross-encoder': "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
args = cli()
|
args = cli(sys.argv[1:])
|
||||||
org_config = get_from_dict(args.config, 'content-type', 'org')
|
org_config = get_from_dict(args.config, 'content-type', 'org')
|
||||||
|
|
||||||
notes_search_enabled = False
|
notes_search_enabled = False
|
||||||
|
|||||||
70
src/tests/test_cli.py
Normal file
70
src/tests/test_cli.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# Standard Modules
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Internal Packages
|
||||||
|
from utils.cli import cli
|
||||||
|
|
||||||
|
|
||||||
|
# Test
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_cli_minimal_default():
|
||||||
|
# Act
|
||||||
|
actual_args = cli(['--config-file=tests/data/config.yml'])
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert actual_args.config_file == Path('tests/data/config.yml')
|
||||||
|
assert actual_args.regenerate == False
|
||||||
|
assert actual_args.verbose == 0
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_cli_flags():
|
||||||
|
# Act
|
||||||
|
actual_args = cli(['--config-file=tests/data/config.yml',
|
||||||
|
'--regenerate',
|
||||||
|
'-vvv'])
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert actual_args.config_file == Path('tests/data/config.yml')
|
||||||
|
assert actual_args.regenerate == True
|
||||||
|
assert actual_args.verbose == 3
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_cli_config_from_file():
|
||||||
|
# Act
|
||||||
|
actual_args = cli(['--config-file=tests/data/config.yml',
|
||||||
|
'--regenerate',
|
||||||
|
'-vvv'])
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert actual_args.config_file == Path('tests/data/config.yml')
|
||||||
|
assert actual_args.regenerate == True
|
||||||
|
assert actual_args.config is not None
|
||||||
|
assert actual_args.config['content-type']['org']['input-files'] == ['~/first_from_config.org', '~/second_from_config.org']
|
||||||
|
assert actual_args.verbose == 3
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_cli_config_from_cmd_args():
|
||||||
|
""
|
||||||
|
# Act
|
||||||
|
actual_args = cli(['--org-files=first.org'])
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert actual_args.org_files == ['first.org']
|
||||||
|
assert actual_args.config_file is None
|
||||||
|
assert actual_args.config is not None
|
||||||
|
assert actual_args.config['content-type']['org']['input-files'] == ['first.org']
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_cli_config_from_cmd_args_override_config_file():
|
||||||
|
# Act
|
||||||
|
actual_args = cli(['--config-file=tests/data/config.yml',
|
||||||
|
'--org-files=first.org'])
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert actual_args.org_files == ['first.org']
|
||||||
|
assert actual_args.config_file == Path('tests/data/config.yml')
|
||||||
|
assert actual_args.config is not None
|
||||||
|
assert actual_args.config['content-type']['org']['input-files'] == ['first.org']
|
||||||
@@ -6,7 +6,7 @@ import pytest
|
|||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from main import app, cli
|
from main import app
|
||||||
from search_type import asymmetric
|
from search_type import asymmetric
|
||||||
|
|
||||||
|
|
||||||
@@ -14,82 +14,21 @@ from search_type import asymmetric
|
|||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
client = TestClient(app)
|
client = TestClient(app)
|
||||||
|
|
||||||
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')]
|
|
||||||
input_filter = None
|
|
||||||
compressed_jsonl = Path('tests/data/.test.jsonl.gz')
|
|
||||||
embeddings = Path('tests/data/.test_embeddings.pt')
|
|
||||||
regenerate = False
|
|
||||||
verbose = 1
|
|
||||||
|
|
||||||
|
|
||||||
|
# Test
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_asymmetric_setup():
|
def test_asymmetric_setup():
|
||||||
|
# Arrange
|
||||||
|
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')]
|
||||||
|
input_filter = None
|
||||||
|
compressed_jsonl = Path('tests/data/.test.jsonl.gz')
|
||||||
|
embeddings = Path('tests/data/.test_embeddings.pt')
|
||||||
|
regenerate = False
|
||||||
|
verbose = 1
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
entries, corpus_embeddings, bi_encoder, cross_encoder, top_k = asymmetric.setup(input_files, input_filter, compressed_jsonl, embeddings, regenerate, verbose)
|
entries, corpus_embeddings, bi_encoder, cross_encoder, top_k = asymmetric.setup(input_files, input_filter, compressed_jsonl, embeddings, regenerate, verbose)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert len(entries) == 10
|
assert len(entries) == 10
|
||||||
assert len(corpus_embeddings) == 10
|
assert len(corpus_embeddings) == 10
|
||||||
|
|
||||||
|
|
||||||
def test_cli_minimal_default():
|
|
||||||
# Act
|
|
||||||
actual_args = cli(['--config-file=tests/data/config.yml'])
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert actual_args.config_file == Path('tests/data/config.yml')
|
|
||||||
assert actual_args.regenerate == False
|
|
||||||
assert actual_args.verbose == 0
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
|
||||||
def test_cli_flags():
|
|
||||||
# Act
|
|
||||||
actual_args = cli(['--config-file=tests/data/config.yml',
|
|
||||||
'--regenerate',
|
|
||||||
'-vvv'])
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert actual_args.config_file == Path('tests/data/config.yml')
|
|
||||||
assert actual_args.regenerate == True
|
|
||||||
assert actual_args.verbose == 3
|
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
|
||||||
def test_cli_config_from_file():
|
|
||||||
# Act
|
|
||||||
actual_args = cli(['--config-file=tests/data/config.yml',
|
|
||||||
'--regenerate',
|
|
||||||
'-vvv'])
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert actual_args.config_file == Path('tests/data/config.yml')
|
|
||||||
assert actual_args.regenerate == True
|
|
||||||
assert actual_args.config is not None
|
|
||||||
assert actual_args.config['content-type']['org']['input-files'] == ['~/first_from_config.org', '~/second_from_config.org']
|
|
||||||
assert actual_args.verbose == 3
|
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
|
||||||
def test_cli_config_from_cmd_args():
|
|
||||||
""
|
|
||||||
# Act
|
|
||||||
actual_args = cli(['--org-files=first.org'])
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert actual_args.org_files == ['first.org']
|
|
||||||
assert actual_args.config_file is None
|
|
||||||
assert actual_args.config is not None
|
|
||||||
assert actual_args.config['content-type']['org']['input-files'] == ['first.org']
|
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
|
||||||
def test_cli_config_from_cmd_args_override_config_file():
|
|
||||||
# Act
|
|
||||||
actual_args = cli(['--config-file=tests/data/config.yml',
|
|
||||||
'--org-files=first.org'])
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert actual_args.org_files == ['first.org']
|
|
||||||
assert actual_args.config_file == Path('tests/data/config.yml')
|
|
||||||
assert actual_args.config is not None
|
|
||||||
assert actual_args.config['content-type']['org']['input-files'] == ['first.org']
|
|
||||||
|
|||||||
61
src/utils/cli.py
Normal file
61
src/utils/cli.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
# Standard Packages
|
||||||
|
import argparse
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
# External Packages
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
# Internal Packages
|
||||||
|
from utils.helpers import is_none_or_empty, get_absolute_path, get_from_dict, merge_dicts
|
||||||
|
|
||||||
|
def cli(args=None):
|
||||||
|
if is_none_or_empty(args):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Setup Argument Parser for the Commandline Interface
|
||||||
|
parser = argparse.ArgumentParser(description="Expose API for Semantic Search")
|
||||||
|
parser.add_argument('--org-files', '-i', nargs='*', help="List of org-mode files to process")
|
||||||
|
parser.add_argument('--org-filter', type=str, default=None, help="Regex filter for org-mode files to process")
|
||||||
|
parser.add_argument('--config-file', '-c', type=pathlib.Path, help="YAML file with user configuration")
|
||||||
|
parser.add_argument('--regenerate', action='store_true', default=False, help="Regenerate model embeddings from source files. Default: false")
|
||||||
|
parser.add_argument('--verbose', '-v', action='count', default=0, help="Show verbose conversion logs. Default: 0")
|
||||||
|
args = parser.parse_args(args)
|
||||||
|
|
||||||
|
if not (args.config_file or args.org_files):
|
||||||
|
print(f"Require at least 1 of --org-file, --org-filter or --config-file flags to be passed from commandline")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Config Priority: Cmd Args > Config File > Default Config
|
||||||
|
args.config = default_config
|
||||||
|
if args.config_file and args.config_file.exists():
|
||||||
|
with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file:
|
||||||
|
config_from_file = yaml.safe_load(config_file)
|
||||||
|
args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config)
|
||||||
|
|
||||||
|
if args.org_files:
|
||||||
|
args.config['content-type']['org']['input-files'] = args.org_files
|
||||||
|
|
||||||
|
if args.org_filter:
|
||||||
|
args.config['content-type']['org']['input-filter'] = args.org_filter
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
default_config = {
|
||||||
|
'content-type':
|
||||||
|
{
|
||||||
|
'org':
|
||||||
|
{
|
||||||
|
'compressed-jsonl': '.notes.jsonl.gz',
|
||||||
|
'embeddings-file': '.note_embeddings.pt'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'search-type':
|
||||||
|
{
|
||||||
|
'asymmetric':
|
||||||
|
{
|
||||||
|
'encoder': "sentence-transformers/msmarco-MiniLM-L-6-v3",
|
||||||
|
'cross-encoder': "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user