diff --git a/Khoj.desktop b/Khoj.desktop
index a9bac639..b3a1cf75 100644
--- a/Khoj.desktop
+++ b/Khoj.desktop
@@ -1,7 +1,7 @@
[Desktop Entry]
Type=Application
Name=Khoj
-Comment=A natural language search engine for your personal notes, transactions and images.
+Comment=An AI personal assistant for your Digital Brain
Path=/opt
Exec=/opt/Khoj
Icon=Khoj
diff --git a/pyproject.toml b/pyproject.toml
index c89f5575..e6632574 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "khoj-assistant"
-description = "A natural language search engine for your personal notes, transactions and images"
+description = "An AI personal assistant for your Digital Brain"
readme = "README.md"
license = "GPL-3.0-or-later"
requires-python = ">=3.8"
diff --git a/src/interface/obsidian/README.md b/src/interface/obsidian/README.md
index f2d664d3..c63fec7f 100644
--- a/src/interface/obsidian/README.md
+++ b/src/interface/obsidian/README.md
@@ -1,6 +1,6 @@
Obsidian
-> Natural language search for your Obsidian notes using [Khoj](https://github.com/khoj-ai/khoj)
+> An AI personal assistant for your Digital Brain in Obsidian
## Table of Contents
diff --git a/src/interface/obsidian/src/search_modal.ts b/src/interface/obsidian/src/search_modal.ts
index e99e644e..9fd1ac65 100644
--- a/src/interface/obsidian/src/search_modal.ts
+++ b/src/interface/obsidian/src/search_modal.ts
@@ -161,7 +161,7 @@ export class KhojSearchModal extends SuggestModal {
// Open vault file at heading of chosen search result
if (file_match) {
let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : '';
- let linkToEntry = `${file_match.path}${resultHeading}`
+ let linkToEntry = resultHeading.startsWith('#') ? `${file_match.path}${resultHeading}` : file_match.path;
this.app.workspace.openLinkText(linkToEntry, '');
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);
}
diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts
index 053562b6..c14912e5 100644
--- a/src/interface/obsidian/src/utils.ts
+++ b/src/interface/obsidian/src/utils.ts
@@ -72,41 +72,58 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
}
}
// Else if khoj is not configured to index markdown files in configured obsidian vault
- else if (data["content-type"]["markdown"]["input-filter"].length != 1 ||
+ else if (
+ data["content-type"]["markdown"]["input-files"] != null ||
+ data["content-type"]["markdown"]["input-filter"] == null ||
+ data["content-type"]["markdown"]["input-filter"].length != 1 ||
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
- // Update markdown config in khoj content-type config
- // Set markdown config to only index markdown files in configured obsidian vault
- let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
- data["content-type"]["markdown"] = {
- "input-filter": [mdInVault],
- "input-files": null,
- "embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
- "compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
- }
+ // Update markdown config in khoj content-type config
+ // Set markdown config to only index markdown files in configured obsidian vault
+ let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
+ data["content-type"]["markdown"] = {
+ "input-filter": [mdInVault],
+ "input-files": null,
+ "embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
+ "compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
+ }
}
if (khoj_already_configured && !data["content-type"]["pdf"]) {
- // Add pdf config to khoj content-type config
- // Set pdf config to index pdf files in configured obsidian vault
- data["content-type"]["pdf"] = {
- "input-filter": [pdfInVault],
- "input-files": null,
- "embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
- "compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
+ const hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
+
+ if (hasPdfFiles) {
+ data["content-type"]["pdf"] = {
+ "input-filter": [pdfInVault],
+ "input-files": null,
+ "embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
+ "compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
+ }
+ } else {
+ data["content-type"]["pdf"] = null;
}
}
// Else if khoj is not configured to index pdf files in configured obsidian vault
else if (khoj_already_configured &&
- (data["content-type"]["pdf"]["input-filter"].length != 1 ||
- data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
- // Update pdf config in khoj content-type config
- // Set pdf config to only index pdf files in configured obsidian vault
- let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
- data["content-type"]["pdf"] = {
- "input-filter": [pdfInVault],
- "input-files": null,
- "embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
- "compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
+ (
+ data["content-type"]["pdf"]["input-files"] != null ||
+ data["content-type"]["pdf"]["input-filter"] == null ||
+ data["content-type"]["pdf"]["input-filter"].length != 1 ||
+ data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
+
+ let hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
+
+ if (hasPdfFiles) {
+ // Update pdf config in khoj content-type config
+ // Set pdf config to only index pdf files in configured obsidian vault
+ let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
+ data["content-type"]["pdf"] = {
+ "input-filter": [pdfInVault],
+ "input-files": null,
+ "embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
+ "compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
+ }
+ } else {
+ data["content-type"]["pdf"] = null;
}
}
diff --git a/src/khoj/configure.py b/src/khoj/configure.py
index de543349..19a07d44 100644
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@@ -93,98 +93,106 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
logger.warning("🚨 No Content or Search type is configured.")
return
- # Initialize Org Notes Search
- if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
- logger.info("🦄 Setting up search for orgmode notes")
- # Extract Entries, Generate Notes Embeddings
- model.org_search = text_search.setup(
- OrgToJsonl,
- config.content_type.org,
- search_config=config.search_type.asymmetric,
- regenerate=regenerate,
- filters=[DateFilter(), WordFilter(), FileFilter()],
- )
-
- # Initialize Org Music Search
- if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
- logger.info("🎺 Setting up search for org-music")
- # Extract Entries, Generate Music Embeddings
- model.music_search = text_search.setup(
- OrgToJsonl,
- config.content_type.music,
- search_config=config.search_type.asymmetric,
- regenerate=regenerate,
- filters=[DateFilter(), WordFilter()],
- )
-
- # Initialize Markdown Search
- if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown and config.search_type.asymmetric:
- logger.info("💎 Setting up search for markdown notes")
- # Extract Entries, Generate Markdown Embeddings
- model.markdown_search = text_search.setup(
- MarkdownToJsonl,
- config.content_type.markdown,
- search_config=config.search_type.asymmetric,
- regenerate=regenerate,
- filters=[DateFilter(), WordFilter(), FileFilter()],
- )
-
- # Initialize Ledger Search
- if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
- logger.info("💸 Setting up search for ledger")
- # Extract Entries, Generate Ledger Embeddings
- model.ledger_search = text_search.setup(
- BeancountToJsonl,
- config.content_type.ledger,
- search_config=config.search_type.symmetric,
- regenerate=regenerate,
- filters=[DateFilter(), WordFilter(), FileFilter()],
- )
-
- # Initialize PDF Search
- if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
- logger.info("🖨️ Setting up search for pdf")
- # Extract Entries, Generate PDF Embeddings
- model.pdf_search = text_search.setup(
- PdfToJsonl,
- config.content_type.pdf,
- search_config=config.search_type.asymmetric,
- regenerate=regenerate,
- filters=[DateFilter(), WordFilter(), FileFilter()],
- )
-
- # Initialize Image Search
- if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
- logger.info("🌄 Setting up search for images")
- # Extract Entries, Generate Image Embeddings
- model.image_search = image_search.setup(
- config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
- )
-
- if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
- logger.info("🐙 Setting up search for github")
- # Extract Entries, Generate Github Embeddings
- model.github_search = text_search.setup(
- GithubToJsonl,
- config.content_type.github,
- search_config=config.search_type.asymmetric,
- regenerate=regenerate,
- filters=[DateFilter(), WordFilter(), FileFilter()],
- )
-
- # Initialize External Plugin Search
- if (t == None or t in state.SearchType) and config.content_type.plugins:
- logger.info("🔌 Setting up search for plugins")
- model.plugin_search = {}
- for plugin_type, plugin_config in config.content_type.plugins.items():
- model.plugin_search[plugin_type] = text_search.setup(
- JsonlToJsonl,
- plugin_config,
+ try:
+ # Initialize Org Notes Search
+ if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
+ logger.info("🦄 Setting up search for orgmode notes")
+ # Extract Entries, Generate Notes Embeddings
+ model.org_search = text_search.setup(
+ OrgToJsonl,
+ config.content_type.org,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
+ # Initialize Org Music Search
+ if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
+ logger.info("🎺 Setting up search for org-music")
+ # Extract Entries, Generate Music Embeddings
+ model.music_search = text_search.setup(
+ OrgToJsonl,
+ config.content_type.music,
+ search_config=config.search_type.asymmetric,
+ regenerate=regenerate,
+ filters=[DateFilter(), WordFilter()],
+ )
+
+ # Initialize Markdown Search
+ if (
+ (t == state.SearchType.Markdown or t == None)
+ and config.content_type.markdown
+ and config.search_type.asymmetric
+ ):
+ logger.info("💎 Setting up search for markdown notes")
+ # Extract Entries, Generate Markdown Embeddings
+ model.markdown_search = text_search.setup(
+ MarkdownToJsonl,
+ config.content_type.markdown,
+ search_config=config.search_type.asymmetric,
+ regenerate=regenerate,
+ filters=[DateFilter(), WordFilter(), FileFilter()],
+ )
+
+ # Initialize Ledger Search
+ if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
+ logger.info("💸 Setting up search for ledger")
+ # Extract Entries, Generate Ledger Embeddings
+ model.ledger_search = text_search.setup(
+ BeancountToJsonl,
+ config.content_type.ledger,
+ search_config=config.search_type.symmetric,
+ regenerate=regenerate,
+ filters=[DateFilter(), WordFilter(), FileFilter()],
+ )
+
+ # Initialize PDF Search
+ if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
+ logger.info("🖨️ Setting up search for pdf")
+ # Extract Entries, Generate PDF Embeddings
+ model.pdf_search = text_search.setup(
+ PdfToJsonl,
+ config.content_type.pdf,
+ search_config=config.search_type.asymmetric,
+ regenerate=regenerate,
+ filters=[DateFilter(), WordFilter(), FileFilter()],
+ )
+
+ # Initialize Image Search
+ if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
+ logger.info("🌄 Setting up search for images")
+ # Extract Entries, Generate Image Embeddings
+ model.image_search = image_search.setup(
+ config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
+ )
+
+ if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
+ logger.info("🐙 Setting up search for github")
+ # Extract Entries, Generate Github Embeddings
+ model.github_search = text_search.setup(
+ GithubToJsonl,
+ config.content_type.github,
+ search_config=config.search_type.asymmetric,
+ regenerate=regenerate,
+ filters=[DateFilter(), WordFilter(), FileFilter()],
+ )
+
+ # Initialize External Plugin Search
+ if (t == None or t in state.SearchType) and config.content_type.plugins:
+ logger.info("🔌 Setting up search for plugins")
+ model.plugin_search = {}
+ for plugin_type, plugin_config in config.content_type.plugins.items():
+ model.plugin_search[plugin_type] = text_search.setup(
+ JsonlToJsonl,
+ plugin_config,
+ search_config=config.search_type.asymmetric,
+ regenerate=regenerate,
+ filters=[DateFilter(), WordFilter(), FileFilter()],
+ )
+ except Exception as e:
+ logger.error("🚨 Failed to setup search")
+ raise e
+
# Invalidate Query Cache
state.query_cache = LRU()
diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 259919bc..eff230f5 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -384,8 +384,13 @@ def update(
):
try:
state.search_index_lock.acquire()
- state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
- state.search_index_lock.release()
+ try:
+ state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
+ except Exception as e:
+ logger.error(e)
+ raise HTTPException(status_code=500, detail=str(e))
+ finally:
+ state.search_index_lock.release()
except ValueError as e:
logger.error(e)
raise HTTPException(status_code=500, detail=str(e))
diff --git a/src/khoj/utils/cli.py b/src/khoj/utils/cli.py
index 535e664b..cef718ee 100644
--- a/src/khoj/utils/cli.py
+++ b/src/khoj/utils/cli.py
@@ -10,9 +10,7 @@ from khoj.utils.yaml import parse_config_from_file
def cli(args=None):
# Setup Argument Parser for the Commandline Interface
- parser = argparse.ArgumentParser(
- description="Start Khoj; A Natural Language Search Engine for your personal Notes, Transactions and Photos"
- )
+ parser = argparse.ArgumentParser(description="Start Khoj; An AI personal assistant for your Digital Brain")
parser.add_argument(
"--config-file", "-c", default="~/.khoj/khoj.yml", type=pathlib.Path, help="YAML file to configure Khoj"
)