From 043de068ff5e218ea05d582740b997d408c8dd77 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Sun, 12 Jan 2025 11:12:43 +0700 Subject: [PATCH] Fix force sync of large vaults from Obsidian Previously if you tried to force sync a vault with more than 1000 files it would only end up keeping the last batch because the PUT API call would delete all previous entries. This change calls DELETE for all previously indexed data first, followed by a PATCH to index current vault on a force sync (regenerate) request. This ensures that files from previous batches are not deleted. --- src/interface/obsidian/src/utils.ts | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts index 39f33ab9..468e8c88 100644 --- a/src/interface/obsidian/src/utils.ts +++ b/src/interface/obsidian/src/utils.ts @@ -87,7 +87,7 @@ export async function updateContentIndex(vault: Vault, setting: KhojSetting, las lastSync = lastSync.size > 0 ? lastSync : new Map(); // Add all files to index as multipart form data - const fileData = []; + let fileData = []; let currentBatchSize = 0; const MAX_BATCH_SIZE = 10 * 1024 * 1024; // 10MB max batch size let currentBatch = []; @@ -132,18 +132,38 @@ export async function updateContentIndex(vault: Vault, setting: KhojSetting, las fileData.push(currentBatch); } + // Delete all files of enabled content types first if regenerating + let error_message = null; + const contentTypesToDelete = []; + if (regenerate) { + // Mark content types to delete based on user sync file type settings + if (setting.syncFileType.markdown) contentTypesToDelete.push('markdown'); + if (setting.syncFileType.pdf) contentTypesToDelete.push('pdf'); + if (setting.syncFileType.images) contentTypesToDelete.push('image'); + } + for (const contentType of contentTypesToDelete) { + const response = await fetch(`${setting.khojUrl}/api/content/type/${contentType}?client=obsidian`, { + method: "DELETE", + headers: { + 'Authorization': `Bearer ${setting.khojApiKey}`, + } + }); + if (!response.ok) { + error_message = "❗️Failed to clear existing content index"; + fileData = []; + } + } + // Iterate through all indexable files in vault, 10Mb batch at a time let responses: string[] = []; - let error_message = null; for (const batch of fileData) { // Create multipart form data with all files in batch const formData = new FormData(); batch.forEach(fileItem => { formData.append('files', fileItem.blob, fileItem.path) }); // Call Khoj backend to sync index with updated files in vault - const method = regenerate ? "PUT" : "PATCH"; const response = await fetch(`${setting.khojUrl}/api/content?client=obsidian`, { - method: method, + method: "PATCH", headers: { 'Authorization': `Bearer ${setting.khojApiKey}`, },