mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-04 21:29:12 +00:00
Push Files to Index from Emacs, Obsidian & Desktop Clients using Multi-Part Forms (#499)
### Overview
- Add ability to push data to index from the Emacs, Obsidian client
- Switch to standard mechanism of syncing files via HTTP multi-part/form. Previously we were streaming the data as JSON
- Benefits of new mechanism
- No manual parsing of files to send or receive on clients or server is required as most have in-built mechanisms to send multi-part/form requests
- The whole response is not required to be kept in memory to parse content as JSON. As individual files arrive they're automatically pushed to disk to conserve memory if required
- Binary files don't need to be encoded on client and decoded on server
### Code Details
### Major
- Use multi-part form to receive files to index on server
- Use multi-part form to send files to index on desktop client
- Send files to index on server from the khoj.el emacs client
- Send content for indexing on server at a regular interval from khoj.el
- Send files to index on server from the khoj obsidian client
- Update tests to test multi-part/form method of pushing files to index
#### Minor
- Put indexer API endpoint under /api path segment
- Explicitly make GET request to /config/data from khoj.el:khoj-server-configure method
- Improve emoji, message on content index updated via logger
- Don't call khoj server on khoj.el load, only once khoj invoked explicitly by user
- Improve indexing of binary files
- Let fs_syncer pass PDF files directly as binary before indexing
- Use encoding of each file set in indexer request to read file
- Add CORS policy to khoj server. Allow requests from khoj apps, obsidian & localhost
- Update indexer API endpoint URL to` index/update` from `indexer/batch`
Resolves #471 #243
This commit is contained in:
@@ -8,7 +8,6 @@ const {dialog} = require('electron');
|
||||
|
||||
const cron = require('cron').CronJob;
|
||||
const axios = require('axios');
|
||||
const { Readable } = require('stream');
|
||||
|
||||
const KHOJ_URL = 'http://127.0.0.1:42110'
|
||||
|
||||
@@ -65,7 +64,7 @@ const schema = {
|
||||
|
||||
var state = {}
|
||||
|
||||
const store = new Store({schema});
|
||||
const store = new Store({ schema });
|
||||
|
||||
console.log(store);
|
||||
|
||||
@@ -86,37 +85,48 @@ function handleSetTitle (event, title) {
|
||||
});
|
||||
}
|
||||
|
||||
function filenameToMimeType (filename) {
|
||||
const extension = filename.split('.').pop();
|
||||
switch (extension) {
|
||||
case 'pdf':
|
||||
return 'application/pdf';
|
||||
case 'png':
|
||||
return 'image/png';
|
||||
case 'jpg':
|
||||
case 'jpeg':
|
||||
return 'image/jpeg';
|
||||
case 'md':
|
||||
case 'markdown':
|
||||
return 'text/markdown';
|
||||
case 'org':
|
||||
return 'text/org';
|
||||
default:
|
||||
return 'text/plain';
|
||||
}
|
||||
}
|
||||
|
||||
function pushDataToKhoj (regenerate = false) {
|
||||
let filesToPush = [];
|
||||
const files = store.get('files');
|
||||
const folders = store.get('folders');
|
||||
state = {
|
||||
completed: true
|
||||
const files = store.get('files') || [];
|
||||
const folders = store.get('folders') || [];
|
||||
state = { completed: true }
|
||||
|
||||
for (const file of files) {
|
||||
filesToPush.push(file.path);
|
||||
}
|
||||
|
||||
if (files) {
|
||||
for (file of files) {
|
||||
filesToPush.push(file.path);
|
||||
}
|
||||
}
|
||||
if (folders) {
|
||||
for (folder of folders) {
|
||||
const files = fs.readdirSync(folder.path, { withFileTypes: true });
|
||||
for (file of files) {
|
||||
if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) {
|
||||
filesToPush.push(path.join(folder.path, file.name));
|
||||
}
|
||||
for (const folder of folders) {
|
||||
const files = fs.readdirSync(folder.path, { withFileTypes: true });
|
||||
for (const file of files) {
|
||||
if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) {
|
||||
filesToPush.push(path.join(folder.path, file.name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let data = {
|
||||
files: []
|
||||
}
|
||||
|
||||
const lastSync = store.get('lastSync') || [];
|
||||
|
||||
for (file of filesToPush) {
|
||||
const formData = new FormData();
|
||||
for (const file of filesToPush) {
|
||||
const stats = fs.statSync(file);
|
||||
if (!regenerate) {
|
||||
if (stats.mtime.toISOString() < lastSync.find((syncedFile) => syncedFile.path === file)?.datetime) {
|
||||
@@ -125,18 +135,10 @@ function pushDataToKhoj (regenerate = false) {
|
||||
}
|
||||
|
||||
try {
|
||||
let rawData;
|
||||
// If the file is a PDF or IMG file, read it as a binary file
|
||||
if (binaryFileTypes.includes(file.split('.').pop())) {
|
||||
rawData = fs.readFileSync(file).toString('base64');
|
||||
} else {
|
||||
rawData = fs.readFileSync(file, 'utf8');
|
||||
}
|
||||
|
||||
data.files.push({
|
||||
path: file,
|
||||
content: rawData
|
||||
});
|
||||
encoding = binaryFileTypes.includes(file.split('.').pop()) ? "binary" : "utf8";
|
||||
mimeType = filenameToMimeType(file) + (encoding === "utf8" ? "; charset=UTF-8" : "");
|
||||
fileObj = new Blob([fs.createReadStream(file, encoding)], { type: mimeType });
|
||||
formData.append('files', fileObj, file);
|
||||
state[file] = {
|
||||
success: true,
|
||||
}
|
||||
@@ -151,44 +153,37 @@ function pushDataToKhoj (regenerate = false) {
|
||||
|
||||
for (const syncedFile of lastSync) {
|
||||
if (!filesToPush.includes(syncedFile.path)) {
|
||||
data.files.push({
|
||||
path: syncedFile.path,
|
||||
content: ""
|
||||
});
|
||||
fileObj = new Blob([""], { type: filenameToMimeType(syncedFile.path) });
|
||||
formData.append('files', fileObj, syncedFile.path);
|
||||
}
|
||||
}
|
||||
|
||||
const headers = { 'x-api-key': 'secret', 'Content-Type': 'application/json' };
|
||||
|
||||
const stream = new Readable({
|
||||
read() {
|
||||
this.push(JSON.stringify(data));
|
||||
this.push(null);
|
||||
}
|
||||
});
|
||||
|
||||
const hostURL = store.get('hostURL') || KHOJ_URL;
|
||||
|
||||
axios.post(`${hostURL}/v1/indexer/batch?regenerate=${regenerate}`, stream, { headers })
|
||||
.then(response => {
|
||||
console.log(response.data);
|
||||
const win = BrowserWindow.getAllWindows()[0];
|
||||
win.webContents.send('update-state', state);
|
||||
let lastSync = [];
|
||||
for (const file of filesToPush) {
|
||||
lastSync.push({
|
||||
path: file,
|
||||
datetime: new Date().toISOString()
|
||||
});
|
||||
}
|
||||
store.set('lastSync', lastSync);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error(error);
|
||||
state['completed'] = false
|
||||
const win = BrowserWindow.getAllWindows()[0];
|
||||
win.webContents.send('update-state', state);
|
||||
});
|
||||
if (!!formData?.entries()?.next().value) {
|
||||
const hostURL = store.get('hostURL') || KHOJ_URL;
|
||||
const headers = {
|
||||
'x-api-key': 'secret'
|
||||
};
|
||||
axios.post(`${hostURL}/api/v1/index/update?force=${regenerate}&client=desktop`, formData, { headers })
|
||||
.then(response => {
|
||||
console.log(response.data);
|
||||
const win = BrowserWindow.getAllWindows()[0];
|
||||
win.webContents.send('update-state', state);
|
||||
let lastSync = [];
|
||||
for (const file of filesToPush) {
|
||||
lastSync.push({
|
||||
path: file,
|
||||
datetime: new Date().toISOString()
|
||||
});
|
||||
}
|
||||
store.set('lastSync', lastSync);
|
||||
})
|
||||
.catch(error => {
|
||||
console.error(error);
|
||||
state['completed'] = false
|
||||
const win = BrowserWindow.getAllWindows()[0];
|
||||
win.webContents.send('update-state', state);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
pushDataToKhoj();
|
||||
|
||||
Reference in New Issue
Block a user