mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-03 13:19:16 +00:00
Push Files to Index from Emacs, Obsidian & Desktop Clients using Multi-Part Forms (#499)
### Overview
- Add ability to push data to index from the Emacs, Obsidian client
- Switch to standard mechanism of syncing files via HTTP multi-part/form. Previously we were streaming the data as JSON
- Benefits of new mechanism
- No manual parsing of files to send or receive on clients or server is required as most have in-built mechanisms to send multi-part/form requests
- The whole response is not required to be kept in memory to parse content as JSON. As individual files arrive they're automatically pushed to disk to conserve memory if required
- Binary files don't need to be encoded on client and decoded on server
### Code Details
### Major
- Use multi-part form to receive files to index on server
- Use multi-part form to send files to index on desktop client
- Send files to index on server from the khoj.el emacs client
- Send content for indexing on server at a regular interval from khoj.el
- Send files to index on server from the khoj obsidian client
- Update tests to test multi-part/form method of pushing files to index
#### Minor
- Put indexer API endpoint under /api path segment
- Explicitly make GET request to /config/data from khoj.el:khoj-server-configure method
- Improve emoji, message on content index updated via logger
- Don't call khoj server on khoj.el load, only once khoj invoked explicitly by user
- Improve indexing of binary files
- Let fs_syncer pass PDF files directly as binary before indexing
- Use encoding of each file set in indexer request to read file
- Add CORS policy to khoj server. Allow requests from khoj apps, obsidian & localhost
- Update indexer API endpoint URL to` index/update` from `indexer/batch`
Resolves #471 #243
This commit is contained in:
@@ -93,6 +93,15 @@
|
||||
:group 'khoj
|
||||
:type 'number)
|
||||
|
||||
(defcustom khoj-server-api-key "secret"
|
||||
"API Key to Khoj server."
|
||||
:group 'khoj
|
||||
:type 'string)
|
||||
|
||||
(defcustom khoj-index-interval 3600
|
||||
"Interval (in seconds) to wait before updating content index."
|
||||
:group 'khoj
|
||||
:type 'number)
|
||||
|
||||
(defcustom khoj-default-content-type "org"
|
||||
"The default content type to perform search on."
|
||||
@@ -125,6 +134,12 @@
|
||||
(defvar khoj--search-on-idle-timer nil
|
||||
"Idle timer to trigger incremental search.")
|
||||
|
||||
(defvar khoj--index-timer nil
|
||||
"Timer to trigger content indexing.")
|
||||
|
||||
(defvar khoj--indexed-files '()
|
||||
"Files that were indexed in previous content indexing run.")
|
||||
|
||||
(declare-function org-element-property "org-mode" (PROPERTY ELEMENT))
|
||||
(declare-function org-element-type "org-mode" (ELEMENT))
|
||||
(declare-function markdown-mode "markdown-mode" ())
|
||||
@@ -375,9 +390,10 @@ CONFIG is json obtained from Khoj config API."
|
||||
(string-join "/"))))
|
||||
|
||||
(defun khoj--server-configure ()
|
||||
"Configure the the Khoj server for search and chat."
|
||||
"Configure the Khoj server for search and chat."
|
||||
(interactive)
|
||||
(let* ((org-directory-regexes (or (mapcar (lambda (dir) (format "%s/**/*.org" dir)) khoj-org-directories) json-null))
|
||||
(url-request-method "GET")
|
||||
(current-config
|
||||
(with-temp-buffer
|
||||
(url-insert-file-contents (format "%s/api/config/data" khoj-server-url))
|
||||
@@ -389,7 +405,6 @@ CONFIG is json obtained from Khoj config API."
|
||||
(default-index-dir (khoj--get-directory-from-config default-config '(content-type org embeddings-file)))
|
||||
(default-chat-dir (khoj--get-directory-from-config default-config '(processor conversation conversation-logfile)))
|
||||
(chat-model (or khoj-chat-model (alist-get 'chat-model (alist-get 'openai (alist-get 'conversation (alist-get 'processor default-config))))))
|
||||
(default-model (alist-get 'model (alist-get 'conversation (alist-get 'processor default-config))))
|
||||
(enable-offline-chat (or khoj-chat-offline (alist-get 'enable-offline-chat (alist-get 'conversation (alist-get 'processor default-config)))))
|
||||
(config (or current-config default-config)))
|
||||
|
||||
@@ -519,9 +534,75 @@ CONFIG is json obtained from Khoj config API."
|
||||
(khoj--server-configure))))
|
||||
|
||||
|
||||
;; -----------------------------------------------
|
||||
;; Extract and Render Entries of each Content Type
|
||||
;; -----------------------------------------------
|
||||
;; -------------------
|
||||
;; Khoj Index Content
|
||||
;; -------------------
|
||||
|
||||
(defun khoj--server-index-files (&optional force content-type file-paths)
|
||||
"Send files at `FILE-PATHS' to the Khoj server to index for search and chat.
|
||||
`FORCE' re-indexes all files of `CONTENT-TYPE' even if they are already indexed."
|
||||
(interactive)
|
||||
(let ((boundary (format "-------------------------%d" (random (expt 10 10))))
|
||||
(files-to-index (or file-paths
|
||||
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.org$")) khoj-org-directories) khoj-org-files)))
|
||||
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
|
||||
(inhibit-message t)
|
||||
(message-log-max nil))
|
||||
(let ((url-request-method "POST")
|
||||
(url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary))
|
||||
(url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
|
||||
("x-api-key" . ,khoj-server-api-key))))
|
||||
(with-current-buffer
|
||||
(url-retrieve (format "%s/api/v1/index/update?%s&force=%s&client=emacs" khoj-server-url type-query (or force "false"))
|
||||
;; render response from indexing API endpoint on server
|
||||
(lambda (status)
|
||||
(if (not status)
|
||||
(message "khoj.el: %scontent index %supdated" (if content-type (format "%s " content-type) "") (if force "force " ""))
|
||||
(with-current-buffer (current-buffer)
|
||||
(goto-char "\n\n")
|
||||
(message "khoj.el: Failed to %supdate %s content index. Status: %s. Response: %s"
|
||||
(if force "force " "")
|
||||
content-type
|
||||
status
|
||||
(string-trim (buffer-substring-no-properties (point) (point-max)))))))
|
||||
nil t t)))
|
||||
(setq khoj--indexed-files files-to-index)))
|
||||
|
||||
(defun khoj--render-files-as-request-body (files-to-index previously-indexed-files boundary)
|
||||
"Render `FILES-TO-INDEX', `PREVIOUSLY-INDEXED-FILES' as multi-part form body.
|
||||
Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request."
|
||||
(with-temp-buffer
|
||||
(set-buffer-multibyte nil)
|
||||
(insert "\n")
|
||||
(dolist (file-to-index files-to-index)
|
||||
(insert (format "--%s\r\n" boundary))
|
||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
|
||||
(insert "Content-Type: text/org\r\n\r\n")
|
||||
(insert (with-temp-buffer
|
||||
(insert-file-contents-literally file-to-index)
|
||||
(buffer-string)))
|
||||
(insert "\r\n"))
|
||||
(dolist (file-to-index previously-indexed-files)
|
||||
(when (not (member file-to-index files-to-index))
|
||||
(insert (format "--%s\r\n" boundary))
|
||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
|
||||
(insert "Content-Type: text/org\r\n\r\n")
|
||||
(insert "")
|
||||
(insert "\r\n")))
|
||||
(insert (format "--%s--\r\n" boundary))
|
||||
(buffer-string)))
|
||||
|
||||
;; Cancel any running indexing timer, first
|
||||
(when khoj--index-timer
|
||||
(cancel-timer khoj--index-timer))
|
||||
;; Send files to index on server every `khoj-index-interval' seconds
|
||||
(setq khoj--index-timer
|
||||
(run-with-timer 60 khoj-index-interval 'khoj--server-index-files))
|
||||
|
||||
|
||||
;; -------------------------------------------
|
||||
;; Render Response from Khoj server for Emacs
|
||||
;; -------------------------------------------
|
||||
|
||||
(defun khoj--extract-entries-as-markdown (json-response query)
|
||||
"Convert JSON-RESPONSE, QUERY from API to markdown entries."
|
||||
@@ -1029,17 +1110,20 @@ Paragraph only starts at first text after blank line."
|
||||
;; Khoj Menu
|
||||
;; ---------
|
||||
|
||||
(transient-define-argument khoj--content-type-switch ()
|
||||
:class 'transient-switches
|
||||
:argument-format "--content-type=%s"
|
||||
:argument-regexp ".+"
|
||||
;; set content type to: last used > based on current buffer > default type
|
||||
:init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name))))))
|
||||
;; dynamically set choices to content types enabled on khoj backend
|
||||
:choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "image")))
|
||||
(defun khoj--setup-and-show-menu ()
|
||||
"Create Transient menu for khoj and show it."
|
||||
;; Create the Khoj Transient menu
|
||||
(transient-define-argument khoj--content-type-switch ()
|
||||
:class 'transient-switches
|
||||
:argument-format "--content-type=%s"
|
||||
:argument-regexp ".+"
|
||||
;; set content type to: last used > based on current buffer > default type
|
||||
:init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name))))))
|
||||
;; dynamically set choices to content types enabled on khoj backend
|
||||
:choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "image")))
|
||||
|
||||
(transient-define-suffix khoj--search-command (&optional args)
|
||||
(interactive (list (transient-args transient-current-command)))
|
||||
(transient-define-suffix khoj--search-command (&optional args)
|
||||
(interactive (list (transient-args transient-current-command)))
|
||||
(progn
|
||||
;; set content type to: specified > last used > based on current buffer > default type
|
||||
(setq khoj--content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
|
||||
@@ -1048,9 +1132,9 @@ Paragraph only starts at first text after blank line."
|
||||
;; trigger incremental search
|
||||
(call-interactively #'khoj-incremental)))
|
||||
|
||||
(transient-define-suffix khoj--find-similar-command (&optional args)
|
||||
"Find items similar to current item at point."
|
||||
(interactive (list (transient-args transient-current-command)))
|
||||
(transient-define-suffix khoj--find-similar-command (&optional args)
|
||||
"Find items similar to current item at point."
|
||||
(interactive (list (transient-args transient-current-command)))
|
||||
(progn
|
||||
;; set content type to: specified > last used > based on current buffer > default type
|
||||
(setq khoj--content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
|
||||
@@ -1058,37 +1142,38 @@ Paragraph only starts at first text after blank line."
|
||||
(setq khoj-results-count (or (transient-arg-value "--results-count=" args) khoj-results-count))
|
||||
(khoj--find-similar khoj--content-type)))
|
||||
|
||||
(transient-define-suffix khoj--update-command (&optional args)
|
||||
"Call khoj API to update index of specified content type."
|
||||
(interactive (list (transient-args transient-current-command)))
|
||||
(let* ((force-update (if (member "--force-update" args) "true" "false"))
|
||||
;; set content type to: specified > last used > based on current buffer > default type
|
||||
(content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
|
||||
(type-query (if (equal content-type "all") "" (format "t=%s" content-type)))
|
||||
(update-url (format "%s/api/update?%s&force=%s&client=emacs" khoj-server-url type-query force-update))
|
||||
(url-request-method "GET"))
|
||||
(progn
|
||||
(setq khoj--content-type content-type)
|
||||
(url-retrieve update-url (lambda (_) (message "khoj.el: %s index %supdated!" content-type (if (member "--force-update" args) "force " "")))))))
|
||||
(transient-define-suffix khoj--update-command (&optional args)
|
||||
"Call khoj API to update index of specified content type."
|
||||
(interactive (list (transient-args transient-current-command)))
|
||||
(let* ((force-update (if (member "--force-update" args) "true" "false"))
|
||||
;; set content type to: specified > last used > based on current buffer > default type
|
||||
(content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
|
||||
(url-request-method "GET"))
|
||||
(progn
|
||||
(setq khoj--content-type content-type)
|
||||
(khoj--server-index-files force-update content-type))))
|
||||
|
||||
(transient-define-suffix khoj--chat-command (&optional _)
|
||||
"Command to Chat with Khoj."
|
||||
(interactive (list (transient-args transient-current-command)))
|
||||
(khoj--chat))
|
||||
(transient-define-suffix khoj--chat-command (&optional _)
|
||||
"Command to Chat with Khoj."
|
||||
(interactive (list (transient-args transient-current-command)))
|
||||
(khoj--chat))
|
||||
|
||||
(transient-define-prefix khoj--menu ()
|
||||
"Create Khoj Menu to Configure and Execute Commands."
|
||||
[["Configure Search"
|
||||
("n" "Results Count" "--results-count=" :init-value (lambda (obj) (oset obj value (format "%s" khoj-results-count))))
|
||||
("t" "Content Type" khoj--content-type-switch)]
|
||||
["Configure Update"
|
||||
("-f" "Force Update" "--force-update")]]
|
||||
[["Act"
|
||||
("c" "Chat" khoj--chat-command)
|
||||
("s" "Search" khoj--search-command)
|
||||
("f" "Find Similar" khoj--find-similar-command)
|
||||
("u" "Update" khoj--update-command)
|
||||
("q" "Quit" transient-quit-one)]])
|
||||
(transient-define-prefix khoj--menu ()
|
||||
"Create Khoj Menu to Configure and Execute Commands."
|
||||
[["Configure Search"
|
||||
("n" "Results Count" "--results-count=" :init-value (lambda (obj) (oset obj value (format "%s" khoj-results-count))))
|
||||
("t" "Content Type" khoj--content-type-switch)]
|
||||
["Configure Update"
|
||||
("-f" "Force Update" "--force-update")]]
|
||||
[["Act"
|
||||
("c" "Chat" khoj--chat-command)
|
||||
("s" "Search" khoj--search-command)
|
||||
("f" "Find Similar" khoj--find-similar-command)
|
||||
("u" "Update" khoj--update-command)
|
||||
("q" "Quit" transient-quit-one)]])
|
||||
|
||||
;; Show the Khoj Transient menu
|
||||
(khoj--menu))
|
||||
|
||||
|
||||
;; ----------
|
||||
@@ -1101,7 +1186,7 @@ Paragraph only starts at first text after blank line."
|
||||
(interactive)
|
||||
(when khoj-auto-setup
|
||||
(khoj-setup t))
|
||||
(khoj--menu))
|
||||
(khoj--setup-and-show-menu))
|
||||
|
||||
(provide 'khoj)
|
||||
|
||||
|
||||
@@ -206,6 +206,64 @@ Rule everything\n")
|
||||
"Rule everything"))
|
||||
))
|
||||
|
||||
|
||||
;; -------------------------------------
|
||||
;; Test Helpers to Index Content
|
||||
;; -------------------------------------
|
||||
|
||||
(ert-deftest khoj-tests--render-files-to-add-request-body ()
|
||||
"Test files are formatted into a multi-part http request body"
|
||||
(let ((upgrade-file (make-temp-file "upgrade" nil ".org" "# Become God\n## Upgrade\n\nPenance to Immortality\n\n"))
|
||||
(act-file (make-temp-file "act" nil ".org" "## Act\n\nRule everything\n\n")))
|
||||
(unwind-protect
|
||||
(progn
|
||||
(should
|
||||
(equal
|
||||
(khoj--render-files-as-request-body (list upgrade-file act-file) '() "khoj")
|
||||
(format
|
||||
"\n--khoj\r\n\
|
||||
Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
|
||||
Content-Type: text/org\r\n\r\n\
|
||||
# Become God\n\
|
||||
## Upgrade\n\n\
|
||||
Penance to Immortality\n\n\r
|
||||
--khoj\r\n\
|
||||
Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
|
||||
Content-Type: text/org\r\n\r\n\
|
||||
## Act\n\n\
|
||||
Rule everything\n\n\r\n\
|
||||
--khoj--\r\n" upgrade-file act-file))))
|
||||
(delete-file upgrade-file)
|
||||
(delete-file act-file))))
|
||||
|
||||
(ert-deftest khoj-tests--render-files-to-add-delete-in-request-body ()
|
||||
"Test files are formatted into a multi-part http request body"
|
||||
(let ((upgrade-file (make-temp-file "upgrade" nil ".org" "# Become God\n## Upgrade\n\nPenance to Immortality\n\n"))
|
||||
(act-file (make-temp-file "act" nil ".org" "## Act\n\nRule everything\n\n")))
|
||||
(unwind-protect
|
||||
(progn
|
||||
(should
|
||||
(equal
|
||||
(khoj--render-files-as-request-body (list upgrade-file act-file) (list upgrade-file act-file "/tmp/deleted-file.org") "khoj")
|
||||
(format
|
||||
"\n--khoj\r\n\
|
||||
Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
|
||||
Content-Type: text/org\r\n\r\n\
|
||||
# Become God\n\
|
||||
## Upgrade\n\n\
|
||||
Penance to Immortality\n\n\r
|
||||
--khoj\r\n\
|
||||
Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
|
||||
Content-Type: text/org\r\n\r\n\
|
||||
## Act\n\n\
|
||||
Rule everything\n\n\r
|
||||
--khoj\r\n\
|
||||
Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
|
||||
Content-Type: text/org\r\n\r\n\
|
||||
\r
|
||||
--khoj--\r\n" upgrade-file act-file "/tmp/deleted-file.org"))))
|
||||
(delete-file upgrade-file)
|
||||
(delete-file act-file))))
|
||||
|
||||
(provide 'khoj-tests)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user