mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 13:25:11 +00:00
Use embedded postgres db to simplify self-hosted setup (#1141)
Use pgserver python package as an embedded postgres db, installed directly as a khoj python package dependency. This significantly simplifies self-hosting with just a `pip install khoj'. No need to also install postgres separately. Still use standard postgres server for multi-user, production use-cases.
This commit is contained in:
@@ -70,6 +70,7 @@ dependencies = [
|
|||||||
"itsdangerous == 2.1.2",
|
"itsdangerous == 2.1.2",
|
||||||
"httpx == 0.28.1",
|
"httpx == 0.28.1",
|
||||||
"pgvector == 0.2.4",
|
"pgvector == 0.2.4",
|
||||||
|
"pgserver == 0.1.4",
|
||||||
"psycopg2-binary == 2.9.9",
|
"psycopg2-binary == 2.9.9",
|
||||||
"lxml == 4.9.3",
|
"lxml == 4.9.3",
|
||||||
"tzdata == 2023.3",
|
"tzdata == 2023.3",
|
||||||
|
|||||||
@@ -10,6 +10,8 @@ For the full list of settings and their values, see
|
|||||||
https://docs.djangoproject.com/en/4.2/ref/settings/
|
https://docs.djangoproject.com/en/4.2/ref/settings/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import atexit
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -119,13 +121,71 @@ CLOSE_CONNECTIONS_AFTER_REQUEST = True
|
|||||||
# Database
|
# Database
|
||||||
# https://docs.djangoproject.com/en/4.2/ref/settings/#databases
|
# https://docs.djangoproject.com/en/4.2/ref/settings/#databases
|
||||||
DATA_UPLOAD_MAX_NUMBER_FIELDS = 20000
|
DATA_UPLOAD_MAX_NUMBER_FIELDS = 20000
|
||||||
|
|
||||||
|
# Default PostgreSQL configuration
|
||||||
|
DB_NAME = os.getenv("POSTGRES_DB", "khoj")
|
||||||
|
DB_HOST = os.getenv("POSTGRES_HOST", "localhost")
|
||||||
|
DB_PORT = os.getenv("POSTGRES_PORT", "5432")
|
||||||
|
|
||||||
|
# Use pgserver if env var explicitly set to true
|
||||||
|
USE_EMBEDDED_DB = is_env_var_true("USE_EMBEDDED_DB")
|
||||||
|
|
||||||
|
if USE_EMBEDDED_DB:
|
||||||
|
# Set up logging for pgserver
|
||||||
|
logger = logging.getLogger("pgserver_django")
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
if not logger.handlers:
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
|
||||||
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pgserver
|
||||||
|
|
||||||
|
# Set up data directory
|
||||||
|
PGSERVER_DATA_DIR = os.path.join(BASE_DIR, "pgserver_data")
|
||||||
|
os.makedirs(PGSERVER_DATA_DIR, exist_ok=True)
|
||||||
|
|
||||||
|
logger.debug(f"Initializing embedded Postgres DB with data directory: {PGSERVER_DATA_DIR}")
|
||||||
|
|
||||||
|
# Start server
|
||||||
|
PGSERVER_INSTANCE = pgserver.get_server(PGSERVER_DATA_DIR)
|
||||||
|
|
||||||
|
# Create pgvector extension, if not already exists
|
||||||
|
PGSERVER_INSTANCE.psql("CREATE EXTENSION IF NOT EXISTS vector;")
|
||||||
|
|
||||||
|
# Create database, if not already exists
|
||||||
|
db_exists_result = PGSERVER_INSTANCE.psql(f"SELECT 1 FROM pg_database WHERE datname = '{DB_NAME}';")
|
||||||
|
db_exists = "(1 row)" in db_exists_result # Check for actual row in result
|
||||||
|
if not db_exists:
|
||||||
|
logger.info(f"Creating database: {DB_NAME}")
|
||||||
|
PGSERVER_INSTANCE.psql(f"CREATE DATABASE {DB_NAME};")
|
||||||
|
|
||||||
|
# Register cleanup
|
||||||
|
def cleanup_pgserver():
|
||||||
|
if PGSERVER_INSTANCE:
|
||||||
|
logger.debug("Shutting down embedded Postgres DB")
|
||||||
|
PGSERVER_INSTANCE.cleanup()
|
||||||
|
|
||||||
|
atexit.register(cleanup_pgserver)
|
||||||
|
|
||||||
|
# Update database configuration for pgserver
|
||||||
|
DB_HOST = PGSERVER_DATA_DIR
|
||||||
|
DB_PORT = "" # pgserver uses Unix socket, so port is empty
|
||||||
|
|
||||||
|
logger.info("Embedded Postgres DB started successfully")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error initializing embedded Postgres DB: {str(e)}. Use standard PostgreSQL server.")
|
||||||
|
|
||||||
|
# Set the database configuration
|
||||||
DATABASES = {
|
DATABASES = {
|
||||||
"default": {
|
"default": {
|
||||||
"ENGINE": "django.db.backends.postgresql",
|
"ENGINE": "django.db.backends.postgresql",
|
||||||
"HOST": os.getenv("POSTGRES_HOST", "localhost"),
|
"HOST": DB_HOST,
|
||||||
"PORT": os.getenv("POSTGRES_PORT", "5432"),
|
"PORT": DB_PORT,
|
||||||
"USER": os.getenv("POSTGRES_USER", "postgres"),
|
"USER": os.getenv("POSTGRES_USER", "postgres"),
|
||||||
"NAME": os.getenv("POSTGRES_DB", "khoj"),
|
"NAME": DB_NAME,
|
||||||
"PASSWORD": os.getenv("POSTGRES_PASSWORD", "postgres"),
|
"PASSWORD": os.getenv("POSTGRES_PASSWORD", "postgres"),
|
||||||
"CONN_MAX_AGE": 0,
|
"CONN_MAX_AGE": 0,
|
||||||
"CONN_HEALTH_CHECKS": True,
|
"CONN_HEALTH_CHECKS": True,
|
||||||
|
|||||||
Reference in New Issue
Block a user