diff --git a/.gitignore b/.gitignore index 4bb1a18e..25c4b25f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,10 @@ __pycache__ tests/data/models tests/data/embeddings src/.data +/src/interface/web/images .vscode *.gz *.pt -/src/interface/web/*.jpg -/src/interface/web/*.png \ No newline at end of file +/build/ +/dist/ +/khoj_assistant.egg-info/ diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..c907129d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include Readme.md +graft docs* +global-exclude .DS_Store *.py[cod] diff --git a/Readme.md b/Readme.md index 98f48979..9d7fbc07 100644 --- a/Readme.md +++ b/Readme.md @@ -12,9 +12,9 @@ - [Analysis](#Analysis) - [Architecture](#Architecture) - [Setup](#Setup) - - [Clone](#Clone) - - [Configure](#Configure) - - [Run](#Run) + - [Clone](#1.-Clone) + - [Configure](#2.-Configure) + - [Run](#3.-Run) - [Use](#Use) - [Upgrade](#Upgrade) - [Troubleshoot](#Troubleshoot) @@ -117,6 +117,34 @@ docker-compose build --pull ### Setup on Local Machine +#### Using Pip +1. Install Dependencies + 1. Python3, Pip \[Required\] + 2. Virualenv \[Optional\] + 3. Install Exiftool \[Optional\] + ``` shell + sudo apt-get -y install libimage-exiftool-perl + ``` + +2. Install Khoj + ``` shell + virtualenv -m python3 .venv && source .venv/bin/activate # Optional + pip install khoj-assistant + ``` + +3. Configure + - Configure files/directories to search in `content-type` section of `sample_config.yml` + - To run application on test data, update file paths containing `/data/` to `tests/data/` in `sample_config.yml` + - Example replace `/data/notes/*.org` with `tests/data/notes/*.org` + +4. Run + Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML + + ``` shell + khoj -c=config/sample_config.yml -vv + ``` + +#### Using Conda 1. Install Dependencies 1. Install Python3 \[Required\] 2. [Install Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) \[Required\] @@ -145,7 +173,12 @@ docker-compose build --pull ``` ### Upgrade On Local Machine +#### Using Pip +``` shell +pip install --upgrade khoj-assistant +``` +#### Using Conda ``` shell cd khoj git pull origin master diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..0a5a1b40 --- /dev/null +++ b/setup.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +from setuptools import find_packages, setup + +from pathlib import Path +this_directory = Path(__file__).parent + +setup( + name='khoj-assistant', + version='0.1.3', + description="A natural language search engine for your personal notes, transactions and images", + long_description=(this_directory / "Readme.md").read_text(encoding="utf-8"), + long_description_content_type="text/markdown", + author='Debanjum Singh Solanky, Saba Imran', + author_email='debanjum+pypi@gmail.com, narmiabas@gmail.com', + url='https://github.com/debanjum/khoj', + license="GPLv3", + keywords="search semantic-search productivity NLP org-mode markdown beancount images", + python_requires=">=3.5, <4", + packages=find_packages( + where=".", + exclude=["tests*"], + include=["src*"] + ), + install_requires=[ + "numpy == 1.22.4", + "torch == 1.11.0", + "torchvision == 0.12.0", + "transformers == 4.21.0", + "sentence-transformers == 2.1.0", + "openai == 0.20.0", + "huggingface_hub == 0.8.1", + "pydantic == 1.9.1", + "fastapi == 0.77.1", + "uvicorn == 0.17.6", + "jinja2 == 3.1.2", + "pyyaml == 6.0", + "pytest == 7.1.2", + "pillow == 8.4.0", + "aiofiles == 0.8.0", + "dateparser == 1.1.1", + ], + include_package_data=True, + entry_points={"console_scripts": ["khoj = src.main:run"]}, + classifiers=[ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + ] +) diff --git a/src/main.py b/src/main.py index b4d3ec46..052ca375 100644 --- a/src/main.py +++ b/src/main.py @@ -2,6 +2,7 @@ import sys, json, yaml, os import time from typing import Optional +from pathlib import Path # External Packages import uvicorn @@ -31,14 +32,15 @@ processor_config = ProcessorConfigModel() config_file = "" verbose = 0 app = FastAPI() -web_directory = f'src/interface/web/' +this_directory = Path(__file__).parent +web_directory = this_directory / 'interface/web/' app.mount("/static", StaticFiles(directory=web_directory), name="static") templates = Jinja2Templates(directory=web_directory) @app.get("/", response_class=FileResponse) def index(): - return FileResponse(web_directory + "index.html") + return FileResponse(web_directory / "index.html") @app.get('/config', response_class=HTMLResponse) def ui(request: Request): @@ -116,7 +118,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Opti # query images query_start = time.time() hits = image_search.query(user_query, results_count, model.image_search) - output_directory = f'{os.getcwd()}/{web_directory}/images' + output_directory = web_directory / 'images' query_end = time.time() # collate and return results @@ -279,26 +281,31 @@ def shutdown_event(): print('INFO:\tConversation logs saved to disk.') -if __name__ == '__main__': +def run(): # Load config from CLI args = cli(sys.argv[1:]) # Stores the file path to the config file. + global config_file config_file = args.config_file # Store the verbose flag + global verbose verbose = args.verbose # Store the raw config data. + global config config = args.config # Set device to GPU if available device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") # Initialize the search model from Config + global model model = initialize_search(args.config, args.regenerate, device=device) # Initialize Processor from Config + global processor_config processor_config = initialize_processor(args.config) # Start Application Server @@ -306,3 +313,7 @@ if __name__ == '__main__': uvicorn.run(app, proxy_headers=True, uds=args.socket) else: uvicorn.run(app, host=args.host, port=args.port) + + +if __name__ == '__main__': + run() \ No newline at end of file diff --git a/src/processor/conversation/__init__.py b/src/processor/conversation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/search_filter/__init__.py b/src/search_filter/__init__.py new file mode 100644 index 00000000..e69de29b