From f2e293a14905cbdd6af5d668ec5433c46acd4f2a Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 17 Oct 2023 02:17:44 -0700 Subject: [PATCH] Push Vault files to index to Khoj server using Khoj Obsidian plugin Use the multi-part/form-data request to sync Markdown, PDF files in vault to index on khoj server Run scheduled job to push updates to value for indexing every 1 hour --- src/interface/obsidian/src/main.ts | 20 +++++++++-- src/interface/obsidian/src/utils.ts | 54 ++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/src/interface/obsidian/src/main.ts b/src/interface/obsidian/src/main.ts index 935945dd..65dac069 100644 --- a/src/interface/obsidian/src/main.ts +++ b/src/interface/obsidian/src/main.ts @@ -1,12 +1,13 @@ -import { Notice, Plugin } from 'obsidian'; +import { Notice, Plugin, TFile } from 'obsidian'; import { KhojSetting, KhojSettingTab, DEFAULT_SETTINGS } from 'src/settings' import { KhojSearchModal } from 'src/search_modal' import { KhojChatModal } from 'src/chat_modal' -import { configureKhojBackend } from './utils'; +import { configureKhojBackend, updateContentIndex } from './utils'; export default class Khoj extends Plugin { settings: KhojSetting; + indexingTimer: NodeJS.Timeout; async onload() { await this.loadSettings(); @@ -54,6 +55,13 @@ export default class Khoj extends Plugin { // Add a settings tab so the user can configure khoj this.addSettingTab(new KhojSettingTab(this.app, this)); + + // Add scheduled job to update index every 60 minutes + this.indexingTimer = setInterval(async () => { + if (this.settings.autoConfigure) { + this.lastSyncedFiles = await updateContentIndex(this.app.vault, this.settings); + } + }, 60 * 60 * 1000); } async loadSettings() { @@ -72,4 +80,12 @@ export default class Khoj extends Plugin { } this.saveData(this.settings); } + + async onunload() { + // Remove scheduled job to update index at regular cadence + if (this.indexingTimer) + clearInterval(this.indexingTimer); + + this.unload(); + } } diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts index 920da583..1707703a 100644 --- a/src/interface/obsidian/src/utils.ts +++ b/src/interface/obsidian/src/utils.ts @@ -1,4 +1,4 @@ -import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal } from 'obsidian'; +import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal, TFile } from 'obsidian'; import { KhojSetting } from 'src/settings' export function getVaultAbsolutePath(vault: Vault): string { @@ -22,6 +22,58 @@ interface ProcessorData { }; } +function fileExtensionToMimeType (extension: string): string { + switch (extension) { + case 'pdf': + return 'application/pdf'; + case 'png': + return 'image/png'; + case 'jpg': + case 'jpeg': + return 'image/jpeg'; + case 'md': + case 'markdown': + return 'text/markdown'; + case 'org': + return 'text/org'; + default: + return 'text/plain'; + } +} + +export async function updateContentIndex(vault: Vault, setting: KhojSetting): Promise { + // Get all markdown, pdf files in the vault + console.log(`Khoj: Updating Khoj content index...`) + const files = vault.getFiles().filter(file => file.extension === 'md' || file.extension === 'pdf'); + const binaryFileTypes = ['pdf', 'png', 'jpg', 'jpeg'] + + // Create multipart form data with all markdown, pdf files + const formData = new FormData(); + for (const file of files) { + const encoding = binaryFileTypes.includes(file.extension) ? "binary" : "utf8"; + const mimeType = fileExtensionToMimeType(file.extension) + (encoding === "utf8" ? "; charset=UTF-8" : ""); + const fileContent = await vault.read(file); + formData.append('files', new Blob([fileContent], { type: mimeType }), file.path); + } + + // Call Khoj backend to update index with all markdown, pdf files + const response = await fetch(`${setting.khojUrl}/api/v1/indexer/batch`, { + method: 'POST', + headers: { + 'x-api-key': 'secret', + }, + body: formData, + }); + + if (!response.ok) { + new Notice(`❗️Failed to update Khoj content index. Ensure Khoj server connected or raise issue on Khoj Discord/Github\nError: ${response.statusText}`); + } else { + console.log(`✅ Refreshed Khoj content index.`); + } + + return files; +} + export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) { let vaultPath = getVaultAbsolutePath(vault); let mdInVault = `${vaultPath}/**/*.md`;