From 4b02a8c7881bd5d90fc97e3186c40ea25f01f78a Mon Sep 17 00:00:00 2001 From: sabaimran Date: Sun, 2 Jul 2023 12:37:24 -0700 Subject: [PATCH 1/5] Fix PDF setup in Obsidian plugin and force Obsidian configuration for markdown --- src/interface/obsidian/src/search_modal.ts | 2 +- src/interface/obsidian/src/utils.ts | 48 +++++++++++++++------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/interface/obsidian/src/search_modal.ts b/src/interface/obsidian/src/search_modal.ts index e99e644e..9fd1ac65 100644 --- a/src/interface/obsidian/src/search_modal.ts +++ b/src/interface/obsidian/src/search_modal.ts @@ -161,7 +161,7 @@ export class KhojSearchModal extends SuggestModal { // Open vault file at heading of chosen search result if (file_match) { let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : ''; - let linkToEntry = `${file_match.path}${resultHeading}` + let linkToEntry = resultHeading.startsWith('#') ? `${file_match.path}${resultHeading}` : file_match.path; this.app.workspace.openLinkText(linkToEntry, ''); console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`); } diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts index 053562b6..c04f3091 100644 --- a/src/interface/obsidian/src/utils.ts +++ b/src/interface/obsidian/src/utils.ts @@ -1,5 +1,6 @@ import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal } from 'obsidian'; import { KhojSetting } from 'src/settings' +import * as fs from 'fs'; export function getVaultAbsolutePath(vault: Vault): string { let adaptor = vault.adapter; @@ -72,7 +73,9 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n } } // Else if khoj is not configured to index markdown files in configured obsidian vault - else if (data["content-type"]["markdown"]["input-filter"].length != 1 || + else if (data["content-type"]["markdown"]["input-filter"] == null || + data["content-type"]["markdown"]["input-files"] != null || + data["content-type"]["markdown"]["input-filter"].length != 1 || data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) { // Update markdown config in khoj content-type config // Set markdown config to only index markdown files in configured obsidian vault @@ -88,25 +91,40 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n if (khoj_already_configured && !data["content-type"]["pdf"]) { // Add pdf config to khoj content-type config // Set pdf config to index pdf files in configured obsidian vault - data["content-type"]["pdf"] = { - "input-filter": [pdfInVault], - "input-files": null, - "embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`, - "compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`, + + let pdfs = fs.readdirSync(vaultPath).filter(file => file.endsWith(".pdf")); + + if (pdfs.length > 0) { + data["content-type"]["pdf"] = { + "input-filter": [pdfInVault], + "input-files": null, + "embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`, + "compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`, + } + } else { + data["content-type"]["pdf"] = null; } } // Else if khoj is not configured to index pdf files in configured obsidian vault else if (khoj_already_configured && - (data["content-type"]["pdf"]["input-filter"].length != 1 || + (data["content-type"]["pdf"]["input-filter"] == null || + data["content-type"]["pdf"]["input-filter"].length != 1 || data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) { - // Update pdf config in khoj content-type config - // Set pdf config to only index pdf files in configured obsidian vault - let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]); - data["content-type"]["pdf"] = { - "input-filter": [pdfInVault], - "input-files": null, - "embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`, - "compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`, + + let pdfs = fs.readdirSync(vaultPath).filter(file => file.endsWith(".pdf")); + + if (pdfs.length > 0) { + // Update pdf config in khoj content-type config + // Set pdf config to only index pdf files in configured obsidian vault + let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]); + data["content-type"]["pdf"] = { + "input-filter": [pdfInVault], + "input-files": null, + "embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`, + "compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`, + } + } else { + data["content-type"]["pdf"] = null; } } From e4c445f805c6ad453f65aaed4fbf25acce011c42 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Sun, 2 Jul 2023 13:35:02 -0700 Subject: [PATCH 2/5] Add try-except-finally blocks around configure calls in /update --- src/khoj/configure.py | 182 +++++++++++++++++++++------------------- src/khoj/routers/api.py | 9 +- 2 files changed, 102 insertions(+), 89 deletions(-) diff --git a/src/khoj/configure.py b/src/khoj/configure.py index de543349..19a07d44 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -93,98 +93,106 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, logger.warning("🚨 No Content or Search type is configured.") return - # Initialize Org Notes Search - if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric: - logger.info("🦄 Setting up search for orgmode notes") - # Extract Entries, Generate Notes Embeddings - model.org_search = text_search.setup( - OrgToJsonl, - config.content_type.org, - search_config=config.search_type.asymmetric, - regenerate=regenerate, - filters=[DateFilter(), WordFilter(), FileFilter()], - ) - - # Initialize Org Music Search - if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric: - logger.info("🎺 Setting up search for org-music") - # Extract Entries, Generate Music Embeddings - model.music_search = text_search.setup( - OrgToJsonl, - config.content_type.music, - search_config=config.search_type.asymmetric, - regenerate=regenerate, - filters=[DateFilter(), WordFilter()], - ) - - # Initialize Markdown Search - if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown and config.search_type.asymmetric: - logger.info("💎 Setting up search for markdown notes") - # Extract Entries, Generate Markdown Embeddings - model.markdown_search = text_search.setup( - MarkdownToJsonl, - config.content_type.markdown, - search_config=config.search_type.asymmetric, - regenerate=regenerate, - filters=[DateFilter(), WordFilter(), FileFilter()], - ) - - # Initialize Ledger Search - if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric: - logger.info("💸 Setting up search for ledger") - # Extract Entries, Generate Ledger Embeddings - model.ledger_search = text_search.setup( - BeancountToJsonl, - config.content_type.ledger, - search_config=config.search_type.symmetric, - regenerate=regenerate, - filters=[DateFilter(), WordFilter(), FileFilter()], - ) - - # Initialize PDF Search - if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric: - logger.info("🖨️ Setting up search for pdf") - # Extract Entries, Generate PDF Embeddings - model.pdf_search = text_search.setup( - PdfToJsonl, - config.content_type.pdf, - search_config=config.search_type.asymmetric, - regenerate=regenerate, - filters=[DateFilter(), WordFilter(), FileFilter()], - ) - - # Initialize Image Search - if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image: - logger.info("🌄 Setting up search for images") - # Extract Entries, Generate Image Embeddings - model.image_search = image_search.setup( - config.content_type.image, search_config=config.search_type.image, regenerate=regenerate - ) - - if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric: - logger.info("🐙 Setting up search for github") - # Extract Entries, Generate Github Embeddings - model.github_search = text_search.setup( - GithubToJsonl, - config.content_type.github, - search_config=config.search_type.asymmetric, - regenerate=regenerate, - filters=[DateFilter(), WordFilter(), FileFilter()], - ) - - # Initialize External Plugin Search - if (t == None or t in state.SearchType) and config.content_type.plugins: - logger.info("🔌 Setting up search for plugins") - model.plugin_search = {} - for plugin_type, plugin_config in config.content_type.plugins.items(): - model.plugin_search[plugin_type] = text_search.setup( - JsonlToJsonl, - plugin_config, + try: + # Initialize Org Notes Search + if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric: + logger.info("🦄 Setting up search for orgmode notes") + # Extract Entries, Generate Notes Embeddings + model.org_search = text_search.setup( + OrgToJsonl, + config.content_type.org, search_config=config.search_type.asymmetric, regenerate=regenerate, filters=[DateFilter(), WordFilter(), FileFilter()], ) + # Initialize Org Music Search + if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric: + logger.info("🎺 Setting up search for org-music") + # Extract Entries, Generate Music Embeddings + model.music_search = text_search.setup( + OrgToJsonl, + config.content_type.music, + search_config=config.search_type.asymmetric, + regenerate=regenerate, + filters=[DateFilter(), WordFilter()], + ) + + # Initialize Markdown Search + if ( + (t == state.SearchType.Markdown or t == None) + and config.content_type.markdown + and config.search_type.asymmetric + ): + logger.info("💎 Setting up search for markdown notes") + # Extract Entries, Generate Markdown Embeddings + model.markdown_search = text_search.setup( + MarkdownToJsonl, + config.content_type.markdown, + search_config=config.search_type.asymmetric, + regenerate=regenerate, + filters=[DateFilter(), WordFilter(), FileFilter()], + ) + + # Initialize Ledger Search + if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric: + logger.info("💸 Setting up search for ledger") + # Extract Entries, Generate Ledger Embeddings + model.ledger_search = text_search.setup( + BeancountToJsonl, + config.content_type.ledger, + search_config=config.search_type.symmetric, + regenerate=regenerate, + filters=[DateFilter(), WordFilter(), FileFilter()], + ) + + # Initialize PDF Search + if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric: + logger.info("🖨️ Setting up search for pdf") + # Extract Entries, Generate PDF Embeddings + model.pdf_search = text_search.setup( + PdfToJsonl, + config.content_type.pdf, + search_config=config.search_type.asymmetric, + regenerate=regenerate, + filters=[DateFilter(), WordFilter(), FileFilter()], + ) + + # Initialize Image Search + if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image: + logger.info("🌄 Setting up search for images") + # Extract Entries, Generate Image Embeddings + model.image_search = image_search.setup( + config.content_type.image, search_config=config.search_type.image, regenerate=regenerate + ) + + if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric: + logger.info("🐙 Setting up search for github") + # Extract Entries, Generate Github Embeddings + model.github_search = text_search.setup( + GithubToJsonl, + config.content_type.github, + search_config=config.search_type.asymmetric, + regenerate=regenerate, + filters=[DateFilter(), WordFilter(), FileFilter()], + ) + + # Initialize External Plugin Search + if (t == None or t in state.SearchType) and config.content_type.plugins: + logger.info("🔌 Setting up search for plugins") + model.plugin_search = {} + for plugin_type, plugin_config in config.content_type.plugins.items(): + model.plugin_search[plugin_type] = text_search.setup( + JsonlToJsonl, + plugin_config, + search_config=config.search_type.asymmetric, + regenerate=regenerate, + filters=[DateFilter(), WordFilter(), FileFilter()], + ) + except Exception as e: + logger.error("🚨 Failed to setup search") + raise e + # Invalidate Query Cache state.query_cache = LRU() diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index bec66ac8..91a67589 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -357,8 +357,13 @@ def update( ): try: state.search_index_lock.acquire() - state.model = configure_search(state.model, state.config, regenerate=force or False, t=t) - state.search_index_lock.release() + try: + state.model = configure_search(state.model, state.config, regenerate=force or False, t=t) + except Exception as e: + logger.error(e) + raise HTTPException(status_code=500, detail=str(e)) + finally: + state.search_index_lock.release() except ValueError as e: logger.error(e) raise HTTPException(status_code=500, detail=str(e)) From 30459ee4ba7fff384947ef82a3a2581253c1fe17 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 1 Jul 2023 20:24:46 -0700 Subject: [PATCH 3/5] Fix Khoj subtitle in desktop entry, pyproject, cli and Obsidian Readme --- Khoj.desktop | 2 +- pyproject.toml | 2 +- src/interface/obsidian/README.md | 2 +- src/khoj/utils/cli.py | 4 +--- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Khoj.desktop b/Khoj.desktop index a9bac639..b3a1cf75 100644 --- a/Khoj.desktop +++ b/Khoj.desktop @@ -1,7 +1,7 @@ [Desktop Entry] Type=Application Name=Khoj -Comment=A natural language search engine for your personal notes, transactions and images. +Comment=An AI personal assistant for your Digital Brain Path=/opt Exec=/opt/Khoj Icon=Khoj diff --git a/pyproject.toml b/pyproject.toml index c89f5575..e6632574 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "khoj-assistant" -description = "A natural language search engine for your personal notes, transactions and images" +description = "An AI personal assistant for your Digital Brain" readme = "README.md" license = "GPL-3.0-or-later" requires-python = ">=3.8" diff --git a/src/interface/obsidian/README.md b/src/interface/obsidian/README.md index f2d664d3..c63fec7f 100644 --- a/src/interface/obsidian/README.md +++ b/src/interface/obsidian/README.md @@ -1,6 +1,6 @@ Khoj LogoObsidian -> Natural language search for your Obsidian notes using [Khoj](https://github.com/khoj-ai/khoj) +> An AI personal assistant for your Digital Brain in Obsidian ## Table of Contents diff --git a/src/khoj/utils/cli.py b/src/khoj/utils/cli.py index 535e664b..cef718ee 100644 --- a/src/khoj/utils/cli.py +++ b/src/khoj/utils/cli.py @@ -10,9 +10,7 @@ from khoj.utils.yaml import parse_config_from_file def cli(args=None): # Setup Argument Parser for the Commandline Interface - parser = argparse.ArgumentParser( - description="Start Khoj; A Natural Language Search Engine for your personal Notes, Transactions and Photos" - ) + parser = argparse.ArgumentParser(description="Start Khoj; An AI personal assistant for your Digital Brain") parser.add_argument( "--config-file", "-c", default="~/.khoj/khoj.yml", type=pathlib.Path, help="YAML file to configure Khoj" ) From eff1436857b7fb14f8b844d43a93d03f720d8e8b Mon Sep 17 00:00:00 2001 From: sabaimran Date: Sun, 2 Jul 2023 16:17:25 -0700 Subject: [PATCH 4/5] Overwrite existing PDFs in Obsidian as well, make if-block more legible --- src/interface/obsidian/src/utils.ts | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts index c04f3091..ca8e23ef 100644 --- a/src/interface/obsidian/src/utils.ts +++ b/src/interface/obsidian/src/utils.ts @@ -73,28 +73,26 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n } } // Else if khoj is not configured to index markdown files in configured obsidian vault - else if (data["content-type"]["markdown"]["input-filter"] == null || + else if ( data["content-type"]["markdown"]["input-files"] != null || + data["content-type"]["markdown"]["input-filter"] == null || data["content-type"]["markdown"]["input-filter"].length != 1 || data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) { - // Update markdown config in khoj content-type config - // Set markdown config to only index markdown files in configured obsidian vault - let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]); - data["content-type"]["markdown"] = { - "input-filter": [mdInVault], - "input-files": null, - "embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`, - "compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`, - } + // Update markdown config in khoj content-type config + // Set markdown config to only index markdown files in configured obsidian vault + let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]); + data["content-type"]["markdown"] = { + "input-filter": [mdInVault], + "input-files": null, + "embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`, + "compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`, + } } if (khoj_already_configured && !data["content-type"]["pdf"]) { - // Add pdf config to khoj content-type config - // Set pdf config to index pdf files in configured obsidian vault + const hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf'); - let pdfs = fs.readdirSync(vaultPath).filter(file => file.endsWith(".pdf")); - - if (pdfs.length > 0) { + if (hasPdfFiles) { data["content-type"]["pdf"] = { "input-filter": [pdfInVault], "input-files": null, @@ -107,9 +105,11 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n } // Else if khoj is not configured to index pdf files in configured obsidian vault else if (khoj_already_configured && - (data["content-type"]["pdf"]["input-filter"] == null || - data["content-type"]["pdf"]["input-filter"].length != 1 || - data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) { + ( + data["content-type"]["pdf"]["input-files"] != null || + data["content-type"]["pdf"]["input-filter"] == null || + data["content-type"]["pdf"]["input-filter"].length != 1 || + data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) { let pdfs = fs.readdirSync(vaultPath).filter(file => file.endsWith(".pdf")); From a52c1c8380bca36c792adf07789dd15639aa656f Mon Sep 17 00:00:00 2001 From: sabaimran Date: Sun, 2 Jul 2023 16:20:43 -0700 Subject: [PATCH 5/5] Use built-in app.vault to determine whether there are any PDF files within --- src/interface/obsidian/src/utils.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts index ca8e23ef..c14912e5 100644 --- a/src/interface/obsidian/src/utils.ts +++ b/src/interface/obsidian/src/utils.ts @@ -1,6 +1,5 @@ import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal } from 'obsidian'; import { KhojSetting } from 'src/settings' -import * as fs from 'fs'; export function getVaultAbsolutePath(vault: Vault): string { let adaptor = vault.adapter; @@ -111,9 +110,9 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n data["content-type"]["pdf"]["input-filter"].length != 1 || data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) { - let pdfs = fs.readdirSync(vaultPath).filter(file => file.endsWith(".pdf")); + let hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf'); - if (pdfs.length > 0) { + if (hasPdfFiles) { // Update pdf config in khoj content-type config // Set pdf config to only index pdf files in configured obsidian vault let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);