diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index 678a9e08..851044ef 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -416,10 +416,31 @@ Auto invokes setup steps on calling main entrypoint." (files-to-index (or file-paths (append (mapcan (lambda (dir) (directory-files-recursively dir "\\.\\(org\\|md\\|markdown\\|pdf\\|txt\\|rst\\|xml\\|htm\\|html\\)$")) content-directories) content-files))) (type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type))) + (delete-files (khoj--get-delete-file-list khoj--indexed-files files-to-index)) (inhibit-message t) - (message-log-max nil)) - (let ((url-request-method "POST") - (url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary)) + (message-log-max nil) + (batch-size 30)) + (dolist (files (-partition-all batch-size files-to-index)) + (khoj--send-index-update-request (khoj--render-update-files-as-request-body files boundary) boundary content-type type-query force)) + (when delete-files + (khoj--send-index-update-request (khoj--render-delete-files-as-request-body delete-files boundary) boundary content-type type-query force)) + (setq khoj--indexed-files files-to-index))) + +(defun khoj--get-delete-file-list (indexed-files upload-files) + "Get delete file list. when `INDEXED-FILES' no longer in `UPLOAD-FILES'. +delete them. return delete-file-list." + (let (delete-files '()) + (dolist (indexed-file indexed-files) + (when (not (member indexed-file upload-files)) + (push indexed-file delete-files))) + delete-files)) + +(defun khoj--send-index-update-request (body boundary &optional content-type type-query force) + "Send `BODY' request to khoj server. 'TYPE-QUERY' is appended to the URL. +Use `BOUNDARY' to add headder conte +nt-type." + (let ((url-request-method "POST") + (url-request-data body) (url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary)) ("Authorization" . ,(format "Bearer %s" khoj-api-key))))) (with-current-buffer @@ -437,10 +458,9 @@ Auto invokes setup steps on calling main entrypoint." (if content-type (format "%s " content-type) "all") (string-trim (format "%s %s" (nth 1 (nth 1 status)) (nth 2 (nth 1 status)))) (if (> (- (point-max) (point)) 0) (format ". Response: %s" (string-trim (buffer-substring-no-properties (point) (point-max)))) "")))))) - nil t t))) - (setq khoj--indexed-files files-to-index))) + nil t t)))) -(defun khoj--render-files-as-request-body (files-to-index previously-indexed-files boundary) +(defun khoj--render-update-files-as-request-body (files-to-index boundary) "Render `FILES-TO-INDEX', `PREVIOUSLY-INDEXED-FILES' as multi-part form body. Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request." (with-temp-buffer @@ -448,32 +468,42 @@ Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request. (insert "\n") (dolist (file-to-index files-to-index) ;; find file content-type. Choose from org, markdown, pdf, plaintext - (let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org") - ((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown") - ((string-match "\\.pdf$" file-to-index) "application/pdf") - (t "text/plain")))) + (let ((content-type (khoj--filename-to-mime-type file-to-index)) + (file-name (encode-coding-string file-to-index 'utf-8))) (insert (format "--%s\r\n" boundary)) - (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index)) + (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-name)) (insert (format "Content-Type: %s\r\n\r\n" content-type)) (insert (with-temp-buffer (insert-file-contents-literally file-to-index) (buffer-string))) (insert "\r\n"))) - (dolist (file-to-index previously-indexed-files) - (when (not (member file-to-index files-to-index)) - ;; find file content-type. Choose from org, markdown, pdf, plaintext - (let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org") - ((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown") - ((string-match "\\.pdf$" file-to-index) "application/pdf") - (t "text/plain")))) - (insert (format "--%s\r\n" boundary)) - (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index)) - (insert "Content-Type: text/org\r\n\r\n") - (insert "") - (insert "\r\n")))) (insert (format "--%s--\r\n" boundary)) (buffer-string))) +(defun khoj--render-delete-files-as-request-body (delete-files boundary) + "Render `DELETE-FILES' as multi-part form body. +Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request." + (with-temp-buffer + (set-buffer-multibyte nil) + (insert "\n") + (dolist (file-to-index delete-files) + (let ((content-type (khoj--filename-to-mime-type file-to-index)) + (file-name (encode-coding-string file-to-index 'utf-8))) + (insert (format "--%s\r\n" boundary)) + (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-name)) + (insert (format "Content-Type: %s\r\n\r\n" content-type)) + (insert "") + (insert "\r\n"))) + (insert (format "--%s--\r\n" boundary)) + (buffer-string))) + +(defun khoj--filename-to-mime-type (file-name) + "`FILE-NAME' to mimeType." + (cond ((string-match "\\.org$" file-name) "text/org") + ((string-match "\\.\\(md\\|markdown\\)$" file-name) "text/markdown") + ((string-match "\\.pdf$" file-name) "application/pdf") + (t "text/plain"))) + ;; Cancel any running indexing timer, first (when khoj--index-timer (cancel-timer khoj--index-timer)) diff --git a/src/interface/emacs/tests/khoj-tests.el b/src/interface/emacs/tests/khoj-tests.el index c0d9f4a6..cd2a1f02 100644 --- a/src/interface/emacs/tests/khoj-tests.el +++ b/src/interface/emacs/tests/khoj-tests.el @@ -219,7 +219,7 @@ Rule everything\n") (progn (should (equal - (khoj--render-files-as-request-body (list upgrade-file act-file) '() "khoj") + (khoj--render-update-files-as-request-body (list upgrade-file act-file) "khoj") (format "\n--khoj\r\n\ Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\ @@ -244,19 +244,16 @@ Rule everything\n\n\r\n\ (progn (should (equal - (khoj--render-files-as-request-body (list upgrade-file act-file) (list upgrade-file act-file "/tmp/deleted-file.org") "khoj") + (khoj--render-delete-files-as-request-body (list upgrade-file act-file "/tmp/deleted-file.org") "khoj") (format "\n--khoj\r\n\ Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\ Content-Type: text/org\r\n\r\n\ -# Become God\n\ -## Upgrade\n\n\ -Penance to Immortality\n\n\r +\r --khoj\r\n\ Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\ Content-Type: text/org\r\n\r\n\ -## Act\n\n\ -Rule everything\n\n\r +\r --khoj\r\n\ Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\ Content-Type: text/org\r\n\r\n\