From dffbfee62bf6a333785cbe5158a5348802eed9ea Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 30 Jul 2023 01:48:18 -0700 Subject: [PATCH] Fix sample khoj docker config to index test data using new schema --- config/khoj_docker.yml | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/config/khoj_docker.yml b/config/khoj_docker.yml index d74a5e82..311e1588 100644 --- a/config/khoj_docker.yml +++ b/config/khoj_docker.yml @@ -15,11 +15,11 @@ content-type: compressed-jsonl: "/data/embeddings/markdown.jsonl.gz" embeddings-file: "/data/embeddings/markdown_embeddings.pt" - ledger: + pdf: input-files: null - input-filter: ["/data/ledger/**/*.beancount"] - compressed-jsonl: /data/embeddings/transactions.jsonl.gz - embeddings-file: /data/embeddings/transaction_embeddings.pt + input-filter: ["/data/pdf/**/*.pdf"] + compressed-jsonl: "/data/embeddings/pdf.jsonl.gz" + embeddings-file: "/data/embeddings/pdf_embeddings.pt" image: input-directories: ["/data/images/"] @@ -27,29 +27,27 @@ content-type: batch-size: 50 use-xmp-metadata: false - music: - input-files: ["/data/music/music.org"] - input-filter: null - compressed-jsonl: "/data/embeddings/songs.jsonl.gz" - embeddings-file: "/data/embeddings/song_embeddings.pt" + notion: null + github: null + plugins: null search-type: - symmetric: - encoder: "sentence-transformers/all-MiniLM-L6-v2" - cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2" - model_directory: "/data/models/symmetric" - + symmetric: null asymmetric: encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1" cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2" model_directory: "/data/models/asymmetric" - image: encoder: "sentence-transformers/clip-ViT-B-32" model_directory: "/data/models/image_encoder" processor: - #conversation: - # openai-api-key: null - # model: "text-davinci-003" - # conversation-logfile: "/data/embeddings/conversation_logs.json" + conversation: + conversation-logfile: "/data/embeddings/conversation_logs.json" + enable-offline-chat: false + openai: + api-key: null + chat-model: "chat-gpt-3.5" + +app: + should_log_telemetry: true