Also index pdf, markdown and plaintext files using khoj emacs client

Previously you could only index org-mode files and directories from
khoj.el

Mark the `khoj-org-directories', `khoj-org-files' variables for
deprecation, since `khoj-index-directories', `khoj-index-files'
replace them as more appropriate names for the more general case

Resolves #597
This commit is contained in:
Debanjum Singh Solanky
2024-01-02 11:45:06 +05:30
parent 5abaed9d08
commit e28adf2884

View File

@@ -241,7 +241,7 @@ for example), set this to the full interpreter path."
(member val '("python" "python3" "pythonw" "py"))) (member val '("python" "python3" "pythonw" "py")))
:group 'khoj) :group 'khoj)
(defcustom khoj-org-files (org-agenda-files t t) (defcustom khoj-org-files nil
"List of org-files to index on khoj server." "List of org-files to index on khoj server."
:type '(repeat string) :type '(repeat string)
:group 'khoj) :group 'khoj)
@@ -251,6 +251,19 @@ for example), set this to the full interpreter path."
:type '(repeat string) :type '(repeat string)
:group 'khoj) :group 'khoj)
(make-obsolete-variable 'khoj-org-directories 'khoj-index-directories "1.2.0" 'set)
(make-obsolete-variable 'khoj-org-files 'khoj-index-files "1.2.0" 'set)
(defcustom khoj-index-files (org-agenda-files t t)
"List of org, markdown, pdf and other plaintext to index on khoj server."
:type '(repeat string)
:group 'khoj)
(defcustom khoj-index-directories nil
"List of directories with org, markdown, pdf and other plaintext files to index on khoj server."
:type '(repeat string)
:group 'khoj)
(defcustom khoj-auto-setup t (defcustom khoj-auto-setup t
"Automate install, configure and start of khoj server. "Automate install, configure and start of khoj server.
Auto invokes setup steps on calling main entrypoint." Auto invokes setup steps on calling main entrypoint."
@@ -395,12 +408,16 @@ Auto invokes setup steps on calling main entrypoint."
"Send files at `FILE-PATHS' to the Khoj server to index for search and chat. "Send files at `FILE-PATHS' to the Khoj server to index for search and chat.
`FORCE' re-indexes all files of `CONTENT-TYPE' even if they are already indexed." `FORCE' re-indexes all files of `CONTENT-TYPE' even if they are already indexed."
(interactive) (interactive)
(let ((boundary (format "-------------------------%d" (random (expt 10 10)))) (let* ((boundary (format "-------------------------%d" (random (expt 10 10))))
(files-to-index (or file-paths ;; Use `khoj-index-directories', `khoj-index-files' when set, else fallback to `khoj-org-directories', `khoj-org-files'
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.org$")) khoj-org-directories) khoj-org-files))) ;; This is a temporary change. `khoj-org-directories', `khoj-org-files' are deprecated. They will be removed in a future release
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type))) (content-directories (or khoj-index-directories khoj-org-directories))
(inhibit-message t) (content-files (or khoj-index-files khoj-org-files))
(message-log-max nil)) (files-to-index (or file-paths
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.\\(org\\|md\\|markdown\\|pdf\\|txt\\|rst\\|xml\\|htm\\|html\\)$")) content-directories) content-files)))
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
(inhibit-message t)
(message-log-max nil))
(let ((url-request-method "POST") (let ((url-request-method "POST")
(url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary)) (url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary))
(url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary)) (url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
@@ -430,20 +447,30 @@ Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request.
(set-buffer-multibyte nil) (set-buffer-multibyte nil)
(insert "\n") (insert "\n")
(dolist (file-to-index files-to-index) (dolist (file-to-index files-to-index)
;; find file content-type. Choose from org, markdown, pdf, plaintext
(let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
((string-match "\\.pdf$" file-to-index) "application/pdf")
(t "text/plain"))))
(insert (format "--%s\r\n" boundary)) (insert (format "--%s\r\n" boundary))
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index)) (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
(insert "Content-Type: text/org\r\n\r\n") (insert (format "Content-Type: %s\r\n\r\n" content-type))
(insert (with-temp-buffer (insert (with-temp-buffer
(insert-file-contents-literally file-to-index) (insert-file-contents-literally file-to-index)
(buffer-string))) (buffer-string)))
(insert "\r\n")) (insert "\r\n")))
(dolist (file-to-index previously-indexed-files) (dolist (file-to-index previously-indexed-files)
(when (not (member file-to-index files-to-index)) (when (not (member file-to-index files-to-index))
(insert (format "--%s\r\n" boundary)) ;; find file content-type. Choose from org, markdown, pdf, plaintext
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index)) (let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
(insert "Content-Type: text/org\r\n\r\n") ((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
(insert "") ((string-match "\\.pdf$" file-to-index) "application/pdf")
(insert "\r\n"))) (t "text/plain"))))
(insert (format "--%s\r\n" boundary))
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
(insert "Content-Type: text/org\r\n\r\n")
(insert "")
(insert "\r\n"))))
(insert (format "--%s--\r\n" boundary)) (insert (format "--%s--\r\n" boundary))
(buffer-string))) (buffer-string)))