From 40091489c06f53c0ac8d964cadae37a7de962693 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 30 Mar 2023 03:31:47 +0700 Subject: [PATCH 1/5] For each result: snip it by lines, show filename, remove frontmatter Based on PR #135 Resolves #134 --- src/interface/obsidian/src/search_modal.ts | 24 +++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/interface/obsidian/src/search_modal.ts b/src/interface/obsidian/src/search_modal.ts index db06caaa..bd4d6271 100644 --- a/src/interface/obsidian/src/search_modal.ts +++ b/src/interface/obsidian/src/search_modal.ts @@ -90,12 +90,26 @@ export class KhojSearchModal extends SuggestModal { } async renderSuggestion(result: SearchResult, el: HTMLElement) { - let words_to_render = 30; - let entry_words = result.entry.split(' ') - let entry_snipped_indicator = entry_words.length > words_to_render ? ' **...**' : ''; - let snipped_entry = entry_words.slice(0, words_to_render).join(' '); + // Max number of lines to render + let lines_to_render = 8; + + // Extract filename of result + let os_path_separator = result.file.includes('\\') ? '\\' : '/'; + let filename = result.file.split(os_path_separator).pop(); + + // Remove YAML frontmatter when rendering string + result.entry = result.entry.replace(/---[\n\r][\s\S]*---[\n\r]/, ''); + + // Truncate search results to lines_to_render + let entry_snipped_indicator = result.entry.split('\n').length > lines_to_render ? ' **...**' : ''; + let snipped_entry = result.entry.split('\n').slice(0, lines_to_render).join('\n'); + + // Show filename of each search result for context + el.createEl("div",{ cls: 'khoj-result-file' }).setText(filename ?? ""); + let result_el = el.createEl("div", { cls: 'khoj-result-entry' }) + // @ts-ignore - MarkdownRenderer.renderMarkdown(snipped_entry + entry_snipped_indicator, el, null, null); + MarkdownRenderer.renderMarkdown(snipped_entry + entry_snipped_indicator, result_el, null, null); } async onChooseSuggestion(result: SearchResult, _: MouseEvent | KeyboardEvent) { From d3257cb24e462c3184f0b390087db1901e1fccb5 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 30 Mar 2023 03:32:24 +0700 Subject: [PATCH 2/5] Style the search result. Use Obsidian theme colors and font-size Based on PR #135 --- src/interface/obsidian/styles.css | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/interface/obsidian/styles.css b/src/interface/obsidian/styles.css index cb4e002f..e3597abe 100644 --- a/src/interface/obsidian/styles.css +++ b/src/interface/obsidian/styles.css @@ -145,3 +145,32 @@ If your plugin does not need CSS, delete this file. padding: 2px 4px; } } + +.khoj-result-file { + font-weight: 600; + } + + .khoj-result-entry { + color: var(--text-muted); + margin-left: 2em; + padding-left: 0.5em; + line-height: normal; + margin-top: 0.2em; + margin-bottom: 0.2em; + border-left-style: solid; + border-left-color: var(--color-accent-2); + white-space: normal; + } + + .khoj-result-entry > * { + font-size: var(--font-ui-medium); + } + + .khoj-result-entry > p { + margin-top: 0.2em; + margin-bottom: 0.2em; + } + + .khoj-result-entry p br { + display: none; + } From 67129964a7361e3711304accb7ebfba8aa6f7dbd Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 30 Mar 2023 13:43:34 +0700 Subject: [PATCH 3/5] Create Note with Query as title from within Khoj Search Modal This follows expected behavior for obsidain search modals E.g Ominsearch and default Obsidian search. The note creation code is borrowed from Omnisearch. Resolves #133 --- src/interface/obsidian/src/search_modal.ts | 11 +++++++ src/interface/obsidian/src/utils.ts | 34 +++++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/interface/obsidian/src/search_modal.ts b/src/interface/obsidian/src/search_modal.ts index bd4d6271..5f88ff9a 100644 --- a/src/interface/obsidian/src/search_modal.ts +++ b/src/interface/obsidian/src/search_modal.ts @@ -1,5 +1,6 @@ import { App, SuggestModal, request, MarkdownRenderer, Instruction, Platform } from 'obsidian'; import { KhojSetting } from 'src/settings'; +import { createNoteAndCloseModal } from 'src/utils'; export interface SearchResult { entry: string; @@ -10,6 +11,7 @@ export class KhojSearchModal extends SuggestModal { setting: KhojSetting; rerank: boolean = false; find_similar_notes: boolean; + query: string = ""; app: App; constructor(app: App, setting: KhojSetting, find_similar_notes: boolean = false) { @@ -31,6 +33,14 @@ export class KhojSearchModal extends SuggestModal { this.rerank = false }); + // Register Modal Keybindings to Create New Note with Query as Title + this.scope.register(['Shift'], 'Enter', async () => { + if (this.query != "") createNoteAndCloseModal(this.query, this); + }); + this.scope.register(['Ctrl', 'Shift'], 'Enter', async () => { + if (this.query != "") createNoteAndCloseModal(this.query, this, { newLeaf: true }); + }); + // Add Hints to Modal for available Keybindings const modalInstructions: Instruction[] = [ { @@ -86,6 +96,7 @@ export class KhojSearchModal extends SuggestModal { .filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path)) .map((result: any) => { return { entry: result.entry, file: result.additional.file } as SearchResult; }); + this.query = query; return results; } diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts index 5a84a191..c7cc2ef0 100644 --- a/src/interface/obsidian/src/utils.ts +++ b/src/interface/obsidian/src/utils.ts @@ -1,4 +1,4 @@ -import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault } from 'obsidian'; +import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal } from 'obsidian'; import { KhojSetting } from 'src/settings' export function getVaultAbsolutePath(vault: Vault): string { @@ -139,3 +139,35 @@ export async function updateKhojBackend(khojUrl: string, khojConfig: Object) { function getIndexDirectoryFromBackendConfig(filepath: string) { return filepath.split("/").slice(0, -1).join("/"); } + +export async function createNote(name: string, newLeaf = false): Promise { + try { + let pathPrefix: string + switch (app.vault.getConfig('newFileLocation')) { + case 'current': + pathPrefix = (app.workspace.getActiveFile()?.parent.path ?? '') + '/' + break + case 'folder': + pathPrefix = this.app.vault.getConfig('newFileFolderPath') + '/' + break + default: // 'root' + pathPrefix = '' + break + } + await app.workspace.openLinkText(`${pathPrefix}${name}.md`, '', newLeaf) + } catch (e) { + console.error('Khoj: Could not create note.\n' + (e as any).message); + throw e + } + } + +export async function createNoteAndCloseModal(query: string, modal: Modal, opt?: { newLeaf: boolean }): Promise { + try { + await createNote(query, opt?.newLeaf); + } + catch (e) { + new Notice((e as Error).message) + return + } + modal.close(); +} From a2ab68a7a2f0d7e0397051a54b5fd67a4052d907 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 30 Mar 2023 12:30:25 +0700 Subject: [PATCH 4/5] Include filename of markdown entries for search indexing Append originating filename to compiled string of each entry for better search quality by providing more context to model Update markdown_to_jsonl tests to ensure filename being added Resolves #142 --- src/khoj/processor/markdown/markdown_to_jsonl.py | 8 ++++++-- tests/test_markdown_to_jsonl.py | 10 ++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/khoj/processor/markdown/markdown_to_jsonl.py b/src/khoj/processor/markdown/markdown_to_jsonl.py index 045eea65..a1e4d0c1 100644 --- a/src/khoj/processor/markdown/markdown_to_jsonl.py +++ b/src/khoj/processor/markdown/markdown_to_jsonl.py @@ -1,8 +1,9 @@ # Standard Packages import glob -import re import logging +import re import time +from pathlib import Path from typing import List # Internal Packages @@ -124,7 +125,10 @@ class MarkdownToJsonl(TextToJsonl): "Convert each Markdown entries into a dictionary" entries = [] for parsed_entry in parsed_entries: - entries.append(Entry(compiled=parsed_entry, raw=parsed_entry, file=f"{entry_to_file_map[parsed_entry]}")) + entry_filename = Path(entry_to_file_map[parsed_entry]) + # Append base filename to compiled entry for context to model + compiled_entry = f"{parsed_entry}\n{entry_filename.stem}" + entries.append(Entry(compiled=compiled_entry, raw=parsed_entry, file=f"{entry_filename}")) logger.debug(f"Converted {len(parsed_entries)} markdown entries to dictionaries") diff --git a/tests/test_markdown_to_jsonl.py b/tests/test_markdown_to_jsonl.py index 16f19ab1..dfb42fed 100644 --- a/tests/test_markdown_to_jsonl.py +++ b/tests/test_markdown_to_jsonl.py @@ -1,5 +1,6 @@ # Standard Packages import json +from pathlib import Path # Internal Packages from khoj.processor.markdown.markdown_to_jsonl import MarkdownToJsonl @@ -66,16 +67,17 @@ def test_multiple_markdown_entries_to_jsonl(tmp_path): # Act # Extract Entries from specified Markdown files - entries, entry_to_file_map = MarkdownToJsonl.extract_markdown_entries(markdown_files=[markdownfile]) + entry_strings, entry_to_file_map = MarkdownToJsonl.extract_markdown_entries(markdown_files=[markdownfile]) + entries = MarkdownToJsonl.convert_markdown_entries_to_maps(entry_strings, entry_to_file_map) # Process Each Entry from All Notes Files - jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl( - MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map) - ) + jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(entries) jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] # Assert assert len(jsonl_data) == 2 + # Ensure entry compiled strings include the markdown files they originate from + assert all([markdownfile.stem in entry.compiled for entry in entries]) def test_get_markdown_files(tmp_path): From 5673bd5b9664207c6d83d030a6119d5863b5783c Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 30 Mar 2023 12:38:45 +0700 Subject: [PATCH 5/5] Keep original formatting in compiled text entry strings - Explicity split entry string by space during split by max_tokens - Prevent formatting of compiled entry from being lost - The formatting itself contains useful information No point in dropping the formatting unnecessarily, even if (say) the currrent search models don't account for it (yet) --- src/khoj/processor/text_to_jsonl.py | 2 +- tests/test_org_to_jsonl.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/khoj/processor/text_to_jsonl.py b/src/khoj/processor/text_to_jsonl.py index 570c22bb..22de2c01 100644 --- a/src/khoj/processor/text_to_jsonl.py +++ b/src/khoj/processor/text_to_jsonl.py @@ -31,7 +31,7 @@ class TextToJsonl(ABC): "Split entries if compiled entry length exceeds the max tokens supported by the ML model." chunked_entries: List[Entry] = [] for entry in entries: - compiled_entry_words = entry.compiled.split() + compiled_entry_words = [word for word in entry.compiled.split(" ") if word != ""] # Drop long words instead of having entry truncated to maintain quality of entry processed by models compiled_entry_words = [word for word in compiled_entry_words if len(word) <= max_word_length] for chunk_index in range(0, len(compiled_entry_words), max_tokens): diff --git a/tests/test_org_to_jsonl.py b/tests/test_org_to_jsonl.py index b8803772..aed4983f 100644 --- a/tests/test_org_to_jsonl.py +++ b/tests/test_org_to_jsonl.py @@ -44,7 +44,7 @@ def test_entry_split_when_exceeds_max_words(tmp_path): # Arrange entry = f"""*** Heading \t\r - Body Line 1 + Body Line """ orgfile = create_file(tmp_path, entry)