mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-06 05:39:12 +00:00
[Multi-User Part 1]: Enable storage of settings for plaintext files based on user account (#498)
- Partition configuration for indexing local data based on user accounts - Store indexed data in an underlying postgres db using the `pgvector` extension - Add migrations for all relevant user data and embeddings generation. Very little performance optimization has been done for the lookup time - Apply filters using SQL queries - Start removing many server-level configuration settings - Configure GitHub test actions to run during any PR. Update the test action to run in a containerized environment with a DB. - Update the Docker image and docker-compose.yml to work with the new application design
This commit is contained in:
60
src/app/README.md
Normal file
60
src/app/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# Django App
|
||||
|
||||
Khoj uses Django as the backend framework primarily for its powerful ORM and the admin interface. The Django app is located in the `src/app` directory. We have one installed app, under the `/database/` directory. This app is responsible for all the database related operations and holds all of our models. You can find the extensive Django documentation [here](https://docs.djangoproject.com/en/4.2/) 🌈.
|
||||
|
||||
## Setup (Docker)
|
||||
|
||||
### Prerequisites
|
||||
1. Ensure you have [Docker](https://docs.docker.com/get-docker/) installed.
|
||||
2. Ensure you have [Docker Compose](https://docs.docker.com/compose/install/) installed.
|
||||
|
||||
### Run
|
||||
|
||||
Using the `docker-compose.yml` file in the root directory, you can run the Khoj app using the following command:
|
||||
```bash
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
## Setup (Local)
|
||||
|
||||
### Install dependencies
|
||||
|
||||
```bash
|
||||
pip install -e '.[dev]'
|
||||
```
|
||||
|
||||
### Setup the database
|
||||
|
||||
1. Ensure you have Postgres installed. For MacOS, you can use [Postgres.app](https://postgresapp.com/).
|
||||
2. If you're not using Postgres.app, you may have to install the pgvector extension manually. You can find the instructions [here](https://github.com/pgvector/pgvector#installation). If you're using Postgres.app, you can skip this step. Reproduced instructions below for convenience.
|
||||
|
||||
```bash
|
||||
cd /tmp
|
||||
git clone --branch v0.5.1 https://github.com/pgvector/pgvector.git
|
||||
cd pgvector
|
||||
make
|
||||
make install # may need sudo
|
||||
```
|
||||
3. Create a database
|
||||
|
||||
### Make migrations
|
||||
|
||||
This command will create the migrations for the database app. This command should be run whenever a new model is added to the database app or an existing model is modified (updated or deleted).
|
||||
|
||||
```bash
|
||||
python3 src/manage.py makemigrations
|
||||
```
|
||||
|
||||
### Run migrations
|
||||
|
||||
This command will run any pending migrations in your application.
|
||||
```bash
|
||||
python3 src/manage.py migrate
|
||||
```
|
||||
|
||||
### Run the server
|
||||
|
||||
While we're using Django for the ORM, we're still using the FastAPI server for the API. This command automatically scaffolds the Django application in the backend.
|
||||
```bash
|
||||
python3 src/khoj/main.py
|
||||
```
|
||||
134
src/app/main.py
134
src/app/main.py
@@ -1,134 +0,0 @@
|
||||
# Standard Packages
|
||||
import os
|
||||
import sys
|
||||
import locale
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import warnings
|
||||
from importlib.metadata import version
|
||||
|
||||
# Ignore non-actionable warnings
|
||||
warnings.filterwarnings("ignore", message=r"snapshot_download.py has been made private", category=FutureWarning)
|
||||
warnings.filterwarnings("ignore", message=r"legacy way to download files from the HF hub,", category=FutureWarning)
|
||||
|
||||
# External Packages
|
||||
import uvicorn
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
import schedule
|
||||
import django
|
||||
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from rich.logging import RichHandler
|
||||
from django.core.asgi import get_asgi_application
|
||||
from django.core.management import call_command
|
||||
|
||||
# Internal Packages
|
||||
from khoj.configure import configure_routes, initialize_server, configure_middleware
|
||||
from khoj.utils import state
|
||||
from khoj.utils.cli import cli
|
||||
|
||||
# Initialize Django
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings")
|
||||
django.setup()
|
||||
|
||||
# Initialize Django Database
|
||||
call_command("migrate", "--noinput")
|
||||
|
||||
# Initialize the Application Server
|
||||
app = FastAPI()
|
||||
|
||||
# Get Django Application
|
||||
django_app = get_asgi_application()
|
||||
|
||||
# Add CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["app://obsidian.md", "http://localhost:*", "https://app.khoj.dev/*", "app://khoj.dev"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Set Locale
|
||||
locale.setlocale(locale.LC_ALL, "")
|
||||
|
||||
# Setup Logger
|
||||
rich_handler = RichHandler(rich_tracebacks=True)
|
||||
rich_handler.setFormatter(fmt=logging.Formatter(fmt="%(message)s", datefmt="[%X]"))
|
||||
logging.basicConfig(handlers=[rich_handler])
|
||||
|
||||
logger = logging.getLogger("khoj")
|
||||
|
||||
|
||||
def run():
|
||||
# Turn Tokenizers Parallelism Off. App does not support it.
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
|
||||
# Load config from CLI
|
||||
state.cli_args = sys.argv[1:]
|
||||
args = cli(state.cli_args)
|
||||
set_state(args)
|
||||
|
||||
# Create app directory, if it doesn't exist
|
||||
state.config_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Set Logging Level
|
||||
if args.verbose == 0:
|
||||
logger.setLevel(logging.INFO)
|
||||
elif args.verbose >= 1:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Set Log File
|
||||
fh = logging.FileHandler(state.config_file.parent / "khoj.log", encoding="utf-8")
|
||||
fh.setLevel(logging.DEBUG)
|
||||
logger.addHandler(fh)
|
||||
|
||||
logger.info("🌘 Starting Khoj")
|
||||
|
||||
# Setup task scheduler
|
||||
poll_task_scheduler()
|
||||
|
||||
# Start Server
|
||||
configure_routes(app)
|
||||
|
||||
# Mount Django and Static Files
|
||||
app.mount("/django", django_app, name="django")
|
||||
app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||
|
||||
# Configure Middleware
|
||||
configure_middleware(app)
|
||||
|
||||
initialize_server(args.config)
|
||||
start_server(app, host=args.host, port=args.port, socket=args.socket)
|
||||
|
||||
|
||||
def set_state(args):
|
||||
state.config_file = args.config_file
|
||||
state.config = args.config
|
||||
state.verbose = args.verbose
|
||||
state.host = args.host
|
||||
state.port = args.port
|
||||
state.demo = args.demo
|
||||
state.khoj_version = version("khoj-assistant")
|
||||
|
||||
|
||||
def start_server(app, host=None, port=None, socket=None):
|
||||
logger.info("🌖 Khoj is ready to use")
|
||||
if socket:
|
||||
uvicorn.run(app, proxy_headers=True, uds=socket, log_level="debug", use_colors=True, log_config=None)
|
||||
else:
|
||||
uvicorn.run(app, host=host, port=port, log_level="debug", use_colors=True, log_config=None)
|
||||
logger.info("🌒 Stopping Khoj")
|
||||
|
||||
|
||||
def poll_task_scheduler():
|
||||
timer_thread = threading.Timer(60.0, poll_task_scheduler)
|
||||
timer_thread.daemon = True
|
||||
timer_thread.start()
|
||||
schedule.run_pending()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@@ -77,8 +77,12 @@ WSGI_APPLICATION = "app.wsgi.application"
|
||||
|
||||
DATABASES = {
|
||||
"default": {
|
||||
"ENGINE": "django.db.backends.sqlite3",
|
||||
"NAME": BASE_DIR / "db.sqlite3",
|
||||
"ENGINE": "django.db.backends.postgresql",
|
||||
"HOST": os.getenv("POSTGRES_HOST", "localhost"),
|
||||
"PORT": os.getenv("POSTGRES_PORT", "5432"),
|
||||
"USER": os.getenv("POSTGRES_USER", "postgres"),
|
||||
"NAME": os.getenv("POSTGRES_DB", "khoj"),
|
||||
"PASSWORD": os.getenv("POSTGRES_PASSWORD", "postgres"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user