Store docker, conda, semantic-search configuration in a config directory

- Improves organization of config files required for application - Declutters the application root directory from configs
2026-03-02 21:19:12 +00:00 · 2022-01-29 02:41:11 -05:00
parent 79c2224eaa
commit b0067fc32e
4 changed files with 4 additions and 4 deletions
--- a/config/Dockerfile
+++ b/config/Dockerfile
@@ -0,0 +1,29 @@
+# syntax=docker/dockerfile:1
+FROM continuumio/miniconda3:latest
+
+# Install system dependencies.
+RUN apt-get update -y && \
+    apt-get -y install libimage-exiftool-perl
+
+# Add the local code to the /app directory and set it to be the working directory.
+# Since we mount the /app directory as a volume in docker-compose.yml, this
+# allows us to automatically update the code in the Docker image when it's changed.
+ADD .. /app
+WORKDIR /app
+
+# Get the arguments from the docker-compose environment.
+ARG PORT
+EXPOSE ${PORT}
+
+# Create the conda environment.
+RUN conda env create -f config/environment.yml
+
+# Use the conda environment we created to run the application.
+# To enable the conda env, we cannot simply RUN `conda activate semantic-search`, 
+# since each RUN command in a Dockerfile is a separate bash shell. 
+# The environment would not carry forward.
+# Instead, we'll use `conda run` to run the application.
+# There are more arguments required for the script to run, 
+# but these should be passed in through the docker-compose.yml file.
+ENTRYPOINT ["conda", "run", "--no-capture-output", "--name", "semantic-search", \
+    "python3", "-m", "src.main"]
--- a/config/environment.yml
+++ b/config/environment.yml
@@ -0,0 +1,20 @@
+name: semantic-search
+channels:
+  - conda-forge
+dependencies:
+  - python=3.*
+  - numpy=1.*
+  - pytorch=1.*
+  - transformers=4.*
+  - sentence-transformers=2.1.0
+  - fastapi=0.*
+  - uvicorn=0.*
+  - pyyaml=5.*
+  - pytest=6.*
+  - pillow=8.*
+  - torchvision=0.*
+  - openai=0.*
+  - pydantic=1.*
+  - jinja2=3.0.*
+  - aiofiles=0.*
+  - huggingface_hub=0.*
--- a/config/sample_config.yml
+++ b/config/sample_config.yml
@@ -0,0 +1,47 @@
+content-type:
+  # The /data/folder/ prefix to the folders is here because this is
+  # the directory to which the local files are copied in the docker-compose.
+  # If changing, the docker-compose volumes should also be changed to match.
+  org:
+    input-files: null
+    input-filter: "/data/notes/*.org"
+    compressed-jsonl: "/data/generated/notes.json.gz"
+    embeddings-file: "/data/generated/note_embeddings.pt"
+
+  ledger:
+    input-files: null
+    input-filter: /data/ledger/*.beancount
+    compressed-jsonl: /data/generated/transactions.jsonl.gz
+    embeddings-file: /data/generated/transaction_embeddings.pt
+
+  image:
+    input-directory: "/data/images/"
+    embeddings-file: "/data/generated/image_embeddings.pt"
+    batch-size: 50
+    use-xmp-metadata: true
+
+  music:
+    input-files: ["/data/music/music.org"]
+    input-filter: null
+    compressed-jsonl: "/data/generated/songs.jsonl.gz"
+    embeddings-file: "/data/generated/song_embeddings.pt"
+
+search-type:
+  symmetric:
+    encoder: "sentence-transformers/paraphrase-MiniLM-L6-v2"
+    cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
+    model_directory: "/data/models/symmetric"
+
+  asymmetric:
+    encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3"
+    cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
+    model_directory: "/data/models/asymmetric"
+
+  image:
+    encoder: "clip-ViT-B-32"
+    model_directory: "/data/models/image_encoder"
+
+processor:
+  conversation:
+    openai-api-key: null
+    conversation-logfile: "/data/generated/conversation_logs.json"