mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Store docker, conda, semantic-search configuration in a config directory
- Improves organization of config files required for application - Declutters the application root directory from configs
This commit is contained in:
29
config/Dockerfile
Normal file
29
config/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
FROM continuumio/miniconda3:latest
|
||||
|
||||
# Install system dependencies.
|
||||
RUN apt-get update -y && \
|
||||
apt-get -y install libimage-exiftool-perl
|
||||
|
||||
# Add the local code to the /app directory and set it to be the working directory.
|
||||
# Since we mount the /app directory as a volume in docker-compose.yml, this
|
||||
# allows us to automatically update the code in the Docker image when it's changed.
|
||||
ADD .. /app
|
||||
WORKDIR /app
|
||||
|
||||
# Get the arguments from the docker-compose environment.
|
||||
ARG PORT
|
||||
EXPOSE ${PORT}
|
||||
|
||||
# Create the conda environment.
|
||||
RUN conda env create -f config/environment.yml
|
||||
|
||||
# Use the conda environment we created to run the application.
|
||||
# To enable the conda env, we cannot simply RUN `conda activate semantic-search`,
|
||||
# since each RUN command in a Dockerfile is a separate bash shell.
|
||||
# The environment would not carry forward.
|
||||
# Instead, we'll use `conda run` to run the application.
|
||||
# There are more arguments required for the script to run,
|
||||
# but these should be passed in through the docker-compose.yml file.
|
||||
ENTRYPOINT ["conda", "run", "--no-capture-output", "--name", "semantic-search", \
|
||||
"python3", "-m", "src.main"]
|
||||
20
config/environment.yml
Normal file
20
config/environment.yml
Normal file
@@ -0,0 +1,20 @@
|
||||
name: semantic-search
|
||||
channels:
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- python=3.*
|
||||
- numpy=1.*
|
||||
- pytorch=1.*
|
||||
- transformers=4.*
|
||||
- sentence-transformers=2.1.0
|
||||
- fastapi=0.*
|
||||
- uvicorn=0.*
|
||||
- pyyaml=5.*
|
||||
- pytest=6.*
|
||||
- pillow=8.*
|
||||
- torchvision=0.*
|
||||
- openai=0.*
|
||||
- pydantic=1.*
|
||||
- jinja2=3.0.*
|
||||
- aiofiles=0.*
|
||||
- huggingface_hub=0.*
|
||||
47
config/sample_config.yml
Normal file
47
config/sample_config.yml
Normal file
@@ -0,0 +1,47 @@
|
||||
content-type:
|
||||
# The /data/folder/ prefix to the folders is here because this is
|
||||
# the directory to which the local files are copied in the docker-compose.
|
||||
# If changing, the docker-compose volumes should also be changed to match.
|
||||
org:
|
||||
input-files: null
|
||||
input-filter: "/data/notes/*.org"
|
||||
compressed-jsonl: "/data/generated/notes.json.gz"
|
||||
embeddings-file: "/data/generated/note_embeddings.pt"
|
||||
|
||||
ledger:
|
||||
input-files: null
|
||||
input-filter: /data/ledger/*.beancount
|
||||
compressed-jsonl: /data/generated/transactions.jsonl.gz
|
||||
embeddings-file: /data/generated/transaction_embeddings.pt
|
||||
|
||||
image:
|
||||
input-directory: "/data/images/"
|
||||
embeddings-file: "/data/generated/image_embeddings.pt"
|
||||
batch-size: 50
|
||||
use-xmp-metadata: true
|
||||
|
||||
music:
|
||||
input-files: ["/data/music/music.org"]
|
||||
input-filter: null
|
||||
compressed-jsonl: "/data/generated/songs.jsonl.gz"
|
||||
embeddings-file: "/data/generated/song_embeddings.pt"
|
||||
|
||||
search-type:
|
||||
symmetric:
|
||||
encoder: "sentence-transformers/paraphrase-MiniLM-L6-v2"
|
||||
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
||||
model_directory: "/data/models/symmetric"
|
||||
|
||||
asymmetric:
|
||||
encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3"
|
||||
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
||||
model_directory: "/data/models/asymmetric"
|
||||
|
||||
image:
|
||||
encoder: "clip-ViT-B-32"
|
||||
model_directory: "/data/models/image_encoder"
|
||||
|
||||
processor:
|
||||
conversation:
|
||||
openai-api-key: null
|
||||
conversation-logfile: "/data/generated/conversation_logs.json"
|
||||
Reference in New Issue
Block a user