From e28adf2884c03cccddc3f3016b665f89565ebdef Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 2 Jan 2024 11:45:06 +0530 Subject: [PATCH] Also index pdf, markdown and plaintext files using khoj emacs client Previously you could only index org-mode files and directories from khoj.el Mark the `khoj-org-directories', `khoj-org-files' variables for deprecation, since `khoj-index-directories', `khoj-index-files' replace them as more appropriate names for the more general case Resolves #597 --- src/interface/emacs/khoj.el | 55 +++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index 80a75814..874a3bc8 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -241,7 +241,7 @@ for example), set this to the full interpreter path." (member val '("python" "python3" "pythonw" "py"))) :group 'khoj) -(defcustom khoj-org-files (org-agenda-files t t) +(defcustom khoj-org-files nil "List of org-files to index on khoj server." :type '(repeat string) :group 'khoj) @@ -251,6 +251,19 @@ for example), set this to the full interpreter path." :type '(repeat string) :group 'khoj) +(make-obsolete-variable 'khoj-org-directories 'khoj-index-directories "1.2.0" 'set) +(make-obsolete-variable 'khoj-org-files 'khoj-index-files "1.2.0" 'set) + +(defcustom khoj-index-files (org-agenda-files t t) + "List of org, markdown, pdf and other plaintext to index on khoj server." + :type '(repeat string) + :group 'khoj) + +(defcustom khoj-index-directories nil + "List of directories with org, markdown, pdf and other plaintext files to index on khoj server." + :type '(repeat string) + :group 'khoj) + (defcustom khoj-auto-setup t "Automate install, configure and start of khoj server. Auto invokes setup steps on calling main entrypoint." @@ -395,12 +408,16 @@ Auto invokes setup steps on calling main entrypoint." "Send files at `FILE-PATHS' to the Khoj server to index for search and chat. `FORCE' re-indexes all files of `CONTENT-TYPE' even if they are already indexed." (interactive) - (let ((boundary (format "-------------------------%d" (random (expt 10 10)))) - (files-to-index (or file-paths - (append (mapcan (lambda (dir) (directory-files-recursively dir "\\.org$")) khoj-org-directories) khoj-org-files))) - (type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type))) - (inhibit-message t) - (message-log-max nil)) + (let* ((boundary (format "-------------------------%d" (random (expt 10 10)))) + ;; Use `khoj-index-directories', `khoj-index-files' when set, else fallback to `khoj-org-directories', `khoj-org-files' + ;; This is a temporary change. `khoj-org-directories', `khoj-org-files' are deprecated. They will be removed in a future release + (content-directories (or khoj-index-directories khoj-org-directories)) + (content-files (or khoj-index-files khoj-org-files)) + (files-to-index (or file-paths + (append (mapcan (lambda (dir) (directory-files-recursively dir "\\.\\(org\\|md\\|markdown\\|pdf\\|txt\\|rst\\|xml\\|htm\\|html\\)$")) content-directories) content-files))) + (type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type))) + (inhibit-message t) + (message-log-max nil)) (let ((url-request-method "POST") (url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary)) (url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary)) @@ -430,20 +447,30 @@ Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request. (set-buffer-multibyte nil) (insert "\n") (dolist (file-to-index files-to-index) + ;; find file content-type. Choose from org, markdown, pdf, plaintext + (let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org") + ((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown") + ((string-match "\\.pdf$" file-to-index) "application/pdf") + (t "text/plain")))) (insert (format "--%s\r\n" boundary)) (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index)) - (insert "Content-Type: text/org\r\n\r\n") + (insert (format "Content-Type: %s\r\n\r\n" content-type)) (insert (with-temp-buffer (insert-file-contents-literally file-to-index) (buffer-string))) - (insert "\r\n")) + (insert "\r\n"))) (dolist (file-to-index previously-indexed-files) (when (not (member file-to-index files-to-index)) - (insert (format "--%s\r\n" boundary)) - (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index)) - (insert "Content-Type: text/org\r\n\r\n") - (insert "") - (insert "\r\n"))) + ;; find file content-type. Choose from org, markdown, pdf, plaintext + (let ((content-type (cond ((string-match "\\.org$" file-to-index) "text/org") + ((string-match "\\.\\(md\\|markdown\\)$" file-to-index) "text/markdown") + ((string-match "\\.pdf$" file-to-index) "application/pdf") + (t "text/plain")))) + (insert (format "--%s\r\n" boundary)) + (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index)) + (insert "Content-Type: text/org\r\n\r\n") + (insert "") + (insert "\r\n")))) (insert (format "--%s--\r\n" boundary)) (buffer-string)))