mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Also index pdf, markdown and plaintext files using khoj emacs client
Previously you could only index org-mode files and directories from khoj.el Mark the `khoj-org-directories', `khoj-org-files' variables for deprecation, since `khoj-index-directories', `khoj-index-files' replace them as more appropriate names for the more general case Resolves #597
This commit is contained in:
@@ -241,7 +241,7 @@ for example), set this to the full interpreter path."
|
||||
(member val '("python" "python3" "pythonw" "py")))
|
||||
:group 'khoj)
|
||||
|
||||
(defcustom khoj-org-files (org-agenda-files t t)
|
||||
(defcustom khoj-org-files nil
|
||||
"List of org-files to index on khoj server."
|
||||
:type '(repeat string)
|
||||
:group 'khoj)
|
||||
@@ -251,6 +251,19 @@ for example), set this to the full interpreter path."
|
||||
:type '(repeat string)
|
||||
:group 'khoj)
|
||||
|
||||
(make-obsolete-variable 'khoj-org-directories 'khoj-index-directories "1.2.0" 'set)
|
||||
(make-obsolete-variable 'khoj-org-files 'khoj-index-files "1.2.0" 'set)
|
||||
|
||||
(defcustom khoj-index-files (org-agenda-files t t)
|
||||
"List of org, markdown, pdf and other plaintext to index on khoj server."
|
||||
:type '(repeat string)
|
||||
:group 'khoj)
|
||||
|
||||
(defcustom khoj-index-directories nil
|
||||
"List of directories with org, markdown, pdf and other plaintext files to index on khoj server."
|
||||
:type '(repeat string)
|
||||
:group 'khoj)
|
||||
|
||||
(defcustom khoj-auto-setup t
|
||||
"Automate install, configure and start of khoj server.
|
||||
Auto invokes setup steps on calling main entrypoint."
|
||||
@@ -395,12 +408,16 @@ Auto invokes setup steps on calling main entrypoint."
|
||||
"Send files at `FILE-PATHS' to the Khoj server to index for search and chat.
|
||||
`FORCE' re-indexes all files of `CONTENT-TYPE' even if they are already indexed."
|
||||
(interactive)
|
||||
(let ((boundary (format "-------------------------%d" (random (expt 10 10))))
|
||||
(files-to-index (or file-paths
|
||||
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.org$")) khoj-org-directories) khoj-org-files)))
|
||||
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
|
||||
(inhibit-message t)
|
||||
(message-log-max nil))
|
||||
(let* ((boundary (format "-------------------------%d" (random (expt 10 10))))
|
||||
;; Use `khoj-index-directories', `khoj-index-files' when set, else fallback to `khoj-org-directories', `khoj-org-files'
|
||||
;; This is a temporary change. `khoj-org-directories', `khoj-org-files' are deprecated. They will be removed in a future release
|
||||
(content-directories (or khoj-index-directories khoj-org-directories))
|
||||
(content-files (or khoj-index-files khoj-org-files))
|
||||
(files-to-index (or file-paths
|
||||
(append (mapcan (lambda (dir) (directory-files-recursively dir "\\.\\(org\\|md\\|markdown\\|pdf\\|txt\\|rst\\|xml\\|htm\\|html\\)$")) content-directories) content-files)))
|
||||
(type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
|
||||
(inhibit-message t)
|
||||
(message-log-max nil))
|
||||
(let ((url-request-method "POST")
|
||||
(url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary))
|
||||
(url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
|
||||
@@ -430,20 +447,30 @@ Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request.
|
||||
(set-buffer-multibyte nil)
|
||||
(insert "\n")
|
||||
(dolist (file-to-index files-to-index)
|
||||
;; find file content-type. Choose from org, markdown, pdf, plaintext
|
||||
(let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
|
||||
((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
|
||||
((string-match "\\.pdf$" file-to-index) "application/pdf")
|
||||
(t "text/plain"))))
|
||||
(insert (format "--%s\r\n" boundary))
|
||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
|
||||
(insert "Content-Type: text/org\r\n\r\n")
|
||||
(insert (format "Content-Type: %s\r\n\r\n" content-type))
|
||||
(insert (with-temp-buffer
|
||||
(insert-file-contents-literally file-to-index)
|
||||
(buffer-string)))
|
||||
(insert "\r\n"))
|
||||
(insert "\r\n")))
|
||||
(dolist (file-to-index previously-indexed-files)
|
||||
(when (not (member file-to-index files-to-index))
|
||||
(insert (format "--%s\r\n" boundary))
|
||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
|
||||
(insert "Content-Type: text/org\r\n\r\n")
|
||||
(insert "")
|
||||
(insert "\r\n")))
|
||||
;; find file content-type. Choose from org, markdown, pdf, plaintext
|
||||
(let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org")
|
||||
((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown")
|
||||
((string-match "\\.pdf$" file-to-index) "application/pdf")
|
||||
(t "text/plain"))))
|
||||
(insert (format "--%s\r\n" boundary))
|
||||
(insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
|
||||
(insert "Content-Type: text/org\r\n\r\n")
|
||||
(insert "")
|
||||
(insert "\r\n"))))
|
||||
(insert (format "--%s--\r\n" boundary))
|
||||
(buffer-string)))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user