Fix sample khoj docker config to index test data using new schema

2026-03-02 21:19:12 +00:00 · 2023-07-30 01:48:18 -07:00
parent 53810a0ff7
commit dffbfee62b
1 changed files with 17 additions and 19 deletions
--- a/config/khoj_docker.yml
+++ b/config/khoj_docker.yml
@@ -15,11 +15,11 @@ content-type:
    compressed-jsonl: "/data/embeddings/markdown.jsonl.gz"
    embeddings-file: "/data/embeddings/markdown_embeddings.pt"
-  ledger:
+  pdf:
    input-files: null
-    input-filter: ["/data/ledger/**/*.beancount"]
+    input-filter: ["/data/pdf/**/*.pdf"]
-    compressed-jsonl: /data/embeddings/transactions.jsonl.gz
+    compressed-jsonl: "/data/embeddings/pdf.jsonl.gz"
-    embeddings-file: /data/embeddings/transaction_embeddings.pt
+    embeddings-file: "/data/embeddings/pdf_embeddings.pt"
  image:
    input-directories: ["/data/images/"]
@@ -27,29 +27,27 @@ content-type:
    batch-size: 50
    use-xmp-metadata: false
-  music:
+  notion: null
-    input-files: ["/data/music/music.org"]
+  github: null
-    input-filter: null
+  plugins: null
    compressed-jsonl: "/data/embeddings/songs.jsonl.gz"
    embeddings-file: "/data/embeddings/song_embeddings.pt"
 search-type:
-  symmetric:
+  symmetric: null
    encoder: "sentence-transformers/all-MiniLM-L6-v2"
    cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
    model_directory: "/data/models/symmetric"
  asymmetric:
    encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
    cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
    model_directory: "/data/models/asymmetric"
  image:
    encoder: "sentence-transformers/clip-ViT-B-32"
    model_directory: "/data/models/image_encoder"
 processor:
-  #conversation:
+  conversation:
-  #  openai-api-key: null
+    conversation-logfile: "/data/embeddings/conversation_logs.json"
-  #  model: "text-davinci-003"
+    enable-offline-chat: false
-  #  conversation-logfile: "/data/embeddings/conversation_logs.json"
+    openai:
      api-key: null
      chat-model: "chat-gpt-3.5"
 app:
  should_log_telemetry: true