diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts index 27903af4..468e8c88 100644 --- a/src/interface/obsidian/src/utils.ts +++ b/src/interface/obsidian/src/utils.ts @@ -87,7 +87,11 @@ export async function updateContentIndex(vault: Vault, setting: KhojSetting, las lastSync = lastSync.size > 0 ? lastSync : new Map(); // Add all files to index as multipart form data - const fileData = []; + let fileData = []; + let currentBatchSize = 0; + const MAX_BATCH_SIZE = 10 * 1024 * 1024; // 10MB max batch size + let currentBatch = []; + for (const file of files) { // Only push files that have been modified since last sync if not regenerating if (!regenerate && file.stat.mtime < (lastSync.get(file) ?? 0)) { @@ -98,31 +102,68 @@ export async function updateContentIndex(vault: Vault, setting: KhojSetting, las const encoding = supportedBinaryFileTypes.includes(file.extension) ? "binary" : "utf8"; const mimeType = fileExtensionToMimeType(file.extension) + (encoding === "utf8" ? "; charset=UTF-8" : ""); const fileContent = encoding == 'binary' ? await vault.readBinary(file) : await vault.read(file); - fileData.push({ blob: new Blob([fileContent], { type: mimeType }), path: file.path }); + const fileItem = { blob: new Blob([fileContent], { type: mimeType }), path: file.path }; + + // Check if adding this file would exceed batch size + const fileSize = (typeof fileContent === 'string') ? new Blob([fileContent]).size : fileContent.byteLength; + if (currentBatchSize + fileSize > MAX_BATCH_SIZE && currentBatch.length > 0) { + fileData.push(currentBatch); + currentBatch = []; + currentBatchSize = 0; + } + + currentBatch.push(fileItem); + currentBatchSize += fileSize; } - // Add any previously synced files to be deleted to multipart form data + // Add any previously synced files to be deleted to final batch let filesToDelete: TFile[] = []; for (const lastSyncedFile of lastSync.keys()) { if (!files.includes(lastSyncedFile)) { countOfFilesToDelete++; let fileObj = new Blob([""], { type: filenameToMimeType(lastSyncedFile) }); - fileData.push({ blob: fileObj, path: lastSyncedFile.path }); + currentBatch.push({ blob: fileObj, path: lastSyncedFile.path }); filesToDelete.push(lastSyncedFile); } } - // Iterate through all indexable files in vault, 1000 at a time - let responses: string[] = []; + // Add final batch if not empty + if (currentBatch.length > 0) { + fileData.push(currentBatch); + } + + // Delete all files of enabled content types first if regenerating let error_message = null; - for (let i = 0; i < fileData.length; i += 1000) { - const filesGroup = fileData.slice(i, i + 1000); + const contentTypesToDelete = []; + if (regenerate) { + // Mark content types to delete based on user sync file type settings + if (setting.syncFileType.markdown) contentTypesToDelete.push('markdown'); + if (setting.syncFileType.pdf) contentTypesToDelete.push('pdf'); + if (setting.syncFileType.images) contentTypesToDelete.push('image'); + } + for (const contentType of contentTypesToDelete) { + const response = await fetch(`${setting.khojUrl}/api/content/type/${contentType}?client=obsidian`, { + method: "DELETE", + headers: { + 'Authorization': `Bearer ${setting.khojApiKey}`, + } + }); + if (!response.ok) { + error_message = "❗️Failed to clear existing content index"; + fileData = []; + } + } + + // Iterate through all indexable files in vault, 10Mb batch at a time + let responses: string[] = []; + for (const batch of fileData) { + // Create multipart form data with all files in batch const formData = new FormData(); - const method = regenerate ? "PUT" : "PATCH"; - filesGroup.forEach(fileItem => { formData.append('files', fileItem.blob, fileItem.path) }); - // Call Khoj backend to update index with all markdown, pdf files + batch.forEach(fileItem => { formData.append('files', fileItem.blob, fileItem.path) }); + + // Call Khoj backend to sync index with updated files in vault const response = await fetch(`${setting.khojUrl}/api/content?client=obsidian`, { - method: method, + method: "PATCH", headers: { 'Authorization': `Bearer ${setting.khojApiKey}`, }, @@ -167,7 +208,7 @@ export async function updateContentIndex(vault: Vault, setting: KhojSetting, las error_message = `❗️Could not connect to Khoj server. Ensure you can connect to it.`; break; } else { - error_message = `❗️Failed to sync your content with Khoj server. Raise issue on Khoj Discord or Github\nError: ${response.statusText}`; + error_message = `❗️Failed to sync all your content with Khoj server. Raise issue on Khoj Discord or Github\nError: ${response.statusText}`; } } else { responses.push(await response.text()); diff --git a/src/interface/web/app/settings/page.tsx b/src/interface/web/app/settings/page.tsx index 9b845987..14929fa0 100644 --- a/src/interface/web/app/settings/page.tsx +++ b/src/interface/web/app/settings/page.tsx @@ -595,48 +595,48 @@ export default function SettingsView() { } }; - const disconnectContent = async (type: string) => { + const disconnectContent = async (source: string) => { try { - const response = await fetch(`/api/content/${type}`, { + const response = await fetch(`/api/content/source/${source}`, { method: "DELETE", headers: { "Content-Type": "application/json", }, }); - if (!response.ok) throw new Error(`Failed to disconnect ${type}`); + if (!response.ok) throw new Error(`Failed to disconnect ${source}`); // Set updated user settings if (userConfig) { let newUserConfig = userConfig; - if (type === "computer") { + if (source === "computer") { newUserConfig.enabled_content_source.computer = false; - } else if (type === "notion") { + } else if (source === "notion") { newUserConfig.enabled_content_source.notion = false; newUserConfig.notion_token = null; setNotionToken(newUserConfig.notion_token); - } else if (type === "github") { + } else if (source === "github") { newUserConfig.enabled_content_source.github = false; } setUserConfig(newUserConfig); } // Notify user about disconnecting content source - if (type === "computer") { + if (source === "computer") { toast({ title: `✅ Deleted Synced Files`, description: "Your synced documents have been deleted.", }); } else { toast({ - title: `✅ Disconnected ${type}`, - description: `Your ${type} integration to Khoj has been disconnected.`, + title: `✅ Disconnected ${source}`, + description: `Your ${source} integration to Khoj has been disconnected.`, }); } } catch (error) { - console.error(`Error disconnecting ${type}:`, error); + console.error(`Error disconnecting ${source}:`, error); toast({ - title: `⚠️ Failed to Disconnect ${type}`, - description: `Failed to disconnect from ${type}. Try again or contact team@khoj.dev`, + title: `⚠️ Failed to Disconnect ${source}`, + description: `Failed to disconnect from ${source}. Try again or contact team@khoj.dev`, }); } }; diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index ccbbcd99..33e879aa 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -1549,6 +1549,11 @@ class FileObjectAdapters: async def adelete_file_object_by_name(user: KhojUser, file_name: str): return await FileObject.objects.filter(user=user, file_name=file_name).adelete() + @staticmethod + @arequire_valid_user + async def adelete_file_objects_by_names(user: KhojUser, file_names: List[str]): + return await FileObject.objects.filter(user=user, file_name__in=file_names).adelete() + @staticmethod @arequire_valid_user async def adelete_all_file_objects(user: KhojUser): @@ -1678,6 +1683,15 @@ class EntryAdapters: .values_list("file_path", flat=True) ) + @staticmethod + @require_valid_user + def get_all_filenames_by_type(user: KhojUser, file_type: str): + return ( + Entry.objects.filter(user=user, file_type=file_type) + .distinct("file_path") + .values_list("file_path", flat=True) + ) + @staticmethod @require_valid_user def get_size_of_indexed_data_in_mb(user: KhojUser): diff --git a/src/khoj/routers/api_content.py b/src/khoj/routers/api_content.py index d9412629..90e4b498 100644 --- a/src/khoj/routers/api_content.py +++ b/src/khoj/routers/api_content.py @@ -401,6 +401,36 @@ async def get_file_object( ) +@api_content.delete("/type/{content_type}", status_code=200) +@requires(["authenticated"]) +async def delete_content_type( + request: Request, + content_type: str, + client: Optional[str] = None, +): + user = request.user.object + if content_type not in {s.value for s in SearchType}: + raise ValueError(f"Unsupported content type: {content_type}") + if content_type == "all": + await EntryAdapters.adelete_all_entries(user) + else: + # Delete file objects of the given type + file_list = await sync_to_async(list)(EntryAdapters.get_all_filenames_by_type(user, content_type)) # type: ignore[call-arg] + await FileObjectAdapters.adelete_file_objects_by_names(user, file_list) + # Delete entries of the given type + await EntryAdapters.adelete_all_entries(user, file_type=content_type) + + update_telemetry_state( + request=request, + telemetry_type="api", + api="delete_content_config", + client=client, + metadata={"content_type": content_type}, + ) + + return {"status": "ok"} + + @api_content.get("/{content_source}", response_model=List[str]) @requires(["authenticated"]) async def get_content_source( @@ -420,7 +450,7 @@ async def get_content_source( return await sync_to_async(list)(EntryAdapters.get_all_filenames_by_source(user, content_source)) # type: ignore[call-arg] -@api_content.delete("/{content_source}", status_code=200) +@api_content.delete("/source/{content_source}", status_code=200) @requires(["authenticated"]) async def delete_content_source( request: Request, @@ -434,7 +464,12 @@ async def delete_content_source( raise ValueError(f"Invalid content source: {content_source}") elif content_object != "Computer": await content_object.objects.filter(user=user).adelete() - await sync_to_async(EntryAdapters.delete_all_entries)(user, file_source=content_source) + else: + # Delete file objects from the given source + file_list = await sync_to_async(list)(EntryAdapters.get_all_filenames_by_source(user, content_source)) # type: ignore[call-arg] + await FileObjectAdapters.adelete_file_objects_by_names(user, file_list) + # Delete entries from the given source + await EntryAdapters.adelete_all_entries(user, file_source=content_source) if content_source == DbEntry.EntrySource.NOTION: await NotionConfig.objects.filter(user=user).adelete() @@ -449,7 +484,6 @@ async def delete_content_source( metadata={"content_source": content_source}, ) - enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user) return {"status": "ok"}