Fix sample khoj docker config to index test data using new schema

This commit is contained in:
Debanjum Singh Solanky
2023-07-30 01:48:18 -07:00
parent 53810a0ff7
commit dffbfee62b

View File

@@ -15,11 +15,11 @@ content-type:
compressed-jsonl: "/data/embeddings/markdown.jsonl.gz" compressed-jsonl: "/data/embeddings/markdown.jsonl.gz"
embeddings-file: "/data/embeddings/markdown_embeddings.pt" embeddings-file: "/data/embeddings/markdown_embeddings.pt"
ledger: pdf:
input-files: null input-files: null
input-filter: ["/data/ledger/**/*.beancount"] input-filter: ["/data/pdf/**/*.pdf"]
compressed-jsonl: /data/embeddings/transactions.jsonl.gz compressed-jsonl: "/data/embeddings/pdf.jsonl.gz"
embeddings-file: /data/embeddings/transaction_embeddings.pt embeddings-file: "/data/embeddings/pdf_embeddings.pt"
image: image:
input-directories: ["/data/images/"] input-directories: ["/data/images/"]
@@ -27,29 +27,27 @@ content-type:
batch-size: 50 batch-size: 50
use-xmp-metadata: false use-xmp-metadata: false
music: notion: null
input-files: ["/data/music/music.org"] github: null
input-filter: null plugins: null
compressed-jsonl: "/data/embeddings/songs.jsonl.gz"
embeddings-file: "/data/embeddings/song_embeddings.pt"
search-type: search-type:
symmetric: symmetric: null
encoder: "sentence-transformers/all-MiniLM-L6-v2"
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
model_directory: "/data/models/symmetric"
asymmetric: asymmetric:
encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1" encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2" cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
model_directory: "/data/models/asymmetric" model_directory: "/data/models/asymmetric"
image: image:
encoder: "sentence-transformers/clip-ViT-B-32" encoder: "sentence-transformers/clip-ViT-B-32"
model_directory: "/data/models/image_encoder" model_directory: "/data/models/image_encoder"
processor: processor:
#conversation: conversation:
# openai-api-key: null conversation-logfile: "/data/embeddings/conversation_logs.json"
# model: "text-davinci-003" enable-offline-chat: false
# conversation-logfile: "/data/embeddings/conversation_logs.json" openai:
api-key: null
chat-model: "chat-gpt-3.5"
app:
should_log_telemetry: true