mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 13:23:15 +00:00
Push 1000 files at a time from the Obsidian client for indexing
FastAPI API endpoints only support uploading 1000 files at a time. So split all files to index into groups of 1000 for upload to index/update API endpoint
This commit is contained in:
@@ -31,39 +31,54 @@ function fileExtensionToMimeType (extension: string): string {
|
|||||||
export async function updateContentIndex(vault: Vault, setting: KhojSetting, lastSyncedFiles: TFile[], regenerate: boolean = false): Promise<TFile[]> {
|
export async function updateContentIndex(vault: Vault, setting: KhojSetting, lastSyncedFiles: TFile[], regenerate: boolean = false): Promise<TFile[]> {
|
||||||
// Get all markdown, pdf files in the vault
|
// Get all markdown, pdf files in the vault
|
||||||
console.log(`Khoj: Updating Khoj content index...`)
|
console.log(`Khoj: Updating Khoj content index...`)
|
||||||
const files = vault.getFiles().filter(file => file.extension === 'md' || file.extension === 'pdf');
|
const files = vault.getFiles().filter(file => file.extension === 'md' || file.extension === 'markdown' || file.extension === 'pdf');
|
||||||
const binaryFileTypes = ['pdf', 'png', 'jpg', 'jpeg']
|
const binaryFileTypes = ['pdf']
|
||||||
let countOfFilesToIndex = 0;
|
let countOfFilesToIndex = 0;
|
||||||
let countOfFilesToDelete = 0;
|
let countOfFilesToDelete = 0;
|
||||||
|
|
||||||
// Add all files to index as multipart form data
|
// Add all files to index as multipart form data
|
||||||
const formData = new FormData();
|
const fileData = [];
|
||||||
for (const file of files) {
|
for (const file of files) {
|
||||||
countOfFilesToIndex++;
|
countOfFilesToIndex++;
|
||||||
const encoding = binaryFileTypes.includes(file.extension) ? "binary" : "utf8";
|
const encoding = binaryFileTypes.includes(file.extension) ? "binary" : "utf8";
|
||||||
const mimeType = fileExtensionToMimeType(file.extension) + (encoding === "utf8" ? "; charset=UTF-8" : "");
|
const mimeType = fileExtensionToMimeType(file.extension) + (encoding === "utf8" ? "; charset=UTF-8" : "");
|
||||||
const fileContent = encoding == 'binary' ? await vault.readBinary(file) : await vault.read(file);
|
const fileContent = encoding == 'binary' ? await vault.readBinary(file) : await vault.read(file);
|
||||||
formData.append('files', new Blob([fileContent], { type: mimeType }), file.path);
|
fileData.push({blob: new Blob([fileContent], { type: mimeType }), path: file.path});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add any previously synced files to be deleted to multipart form data
|
// Add any previously synced files to be deleted to multipart form data
|
||||||
for (const lastSyncedFile of lastSyncedFiles) {
|
for (const lastSyncedFile of lastSyncedFiles) {
|
||||||
if (!files.includes(lastSyncedFile)) {
|
if (!files.includes(lastSyncedFile)) {
|
||||||
countOfFilesToDelete++;
|
countOfFilesToDelete++;
|
||||||
formData.append('files', new Blob([]), lastSyncedFile.path);
|
fileData.push({blob: new Blob([]), path: lastSyncedFile.path});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Call Khoj backend to update index with all markdown, pdf files
|
// Iterate through all indexable files in vault, 1000 at a time
|
||||||
const response = await fetch(`${setting.khojUrl}/api/v1/index/update?force=${regenerate}&client=obsidian`, {
|
let batchResponseSuccess = true;
|
||||||
method: 'POST',
|
let batchResponseThrottled = false;
|
||||||
headers: {
|
for (let i = 0; i < fileData.length && !batchResponseThrottled; i += 1000) {
|
||||||
'Authorization': `Bearer ${setting.khojApiKey}`,
|
const filesGroup = fileData.slice(i, i + 1000);
|
||||||
},
|
const formData = new FormData();
|
||||||
body: formData,
|
filesGroup.forEach(fileItem => { formData.append('files', fileItem.blob, fileItem.path) });
|
||||||
});
|
// Call Khoj backend to update index with all markdown, pdf files
|
||||||
|
const response = await fetch(`${setting.khojUrl}/api/v1/index/update?force=${regenerate}&client=obsidian`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${setting.khojApiKey}`,
|
||||||
|
},
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
|
batchResponseSuccess = false;
|
||||||
|
batchResponseThrottled = response.status === 429;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (batchResponseThrottled) {
|
||||||
|
new Notice(`❗️Failed to update Khoj content index. Requests were throttled. Upgrade your subscription or try again later.`);
|
||||||
|
} else if (!batchResponseSuccess) {
|
||||||
new Notice(`❗️Failed to update Khoj content index. Ensure Khoj server connected or raise issue on Khoj Discord/Github\nError: ${response.statusText}`);
|
new Notice(`❗️Failed to update Khoj content index. Ensure Khoj server connected or raise issue on Khoj Discord/Github\nError: ${response.statusText}`);
|
||||||
} else {
|
} else {
|
||||||
console.log(`✅ Refreshed Khoj content index. Updated: ${countOfFilesToIndex} files, Deleted: ${countOfFilesToDelete} files.`);
|
console.log(`✅ Refreshed Khoj content index. Updated: ${countOfFilesToIndex} files, Deleted: ${countOfFilesToDelete} files.`);
|
||||||
@@ -92,7 +107,7 @@ export async function createNote(name: string, newLeaf = false): Promise<void> {
|
|||||||
console.error('Khoj: Could not create note.\n' + (e as any).message);
|
console.error('Khoj: Could not create note.\n' + (e as any).message);
|
||||||
throw e
|
throw e
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function createNoteAndCloseModal(query: string, modal: Modal, opt?: { newLeaf: boolean }): Promise<void> {
|
export async function createNoteAndCloseModal(query: string, modal: Modal, opt?: { newLeaf: boolean }): Promise<void> {
|
||||||
try {
|
try {
|
||||||
|
|||||||
Reference in New Issue
Block a user