mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-06 13:22:12 +00:00
Include filename of markdown entries for search indexing
Append originating filename to compiled string of each entry for better search quality by providing more context to model Update markdown_to_jsonl tests to ensure filename being added Resolves #142
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
# Standard Packages
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
|
||||
@@ -66,16 +67,17 @@ def test_multiple_markdown_entries_to_jsonl(tmp_path):
|
||||
|
||||
# Act
|
||||
# Extract Entries from specified Markdown files
|
||||
entries, entry_to_file_map = MarkdownToJsonl.extract_markdown_entries(markdown_files=[markdownfile])
|
||||
entry_strings, entry_to_file_map = MarkdownToJsonl.extract_markdown_entries(markdown_files=[markdownfile])
|
||||
entries = MarkdownToJsonl.convert_markdown_entries_to_maps(entry_strings, entry_to_file_map)
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(
|
||||
MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map)
|
||||
)
|
||||
jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(entries)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
assert len(jsonl_data) == 2
|
||||
# Ensure entry compiled strings include the markdown files they originate from
|
||||
assert all([markdownfile.stem in entry.compiled for entry in entries])
|
||||
|
||||
|
||||
def test_get_markdown_files(tmp_path):
|
||||
|
||||
Reference in New Issue
Block a user