Set filename as top heading of md entries for better search context

Previously filename was appended to the end of the compiled entry.
This didn't provide appropriate structured context

Test filename getting prepended as heading to compiled entry
This commit is contained in:
Debanjum Singh Solanky
2023-05-03 18:55:56 +08:00
parent 0e3fb59e09
commit 5de04621b5
2 changed files with 9 additions and 5 deletions

View File

@@ -129,8 +129,9 @@ class MarkdownToJsonl(TextToJsonl):
for parsed_entry in parsed_entries:
entry_filename = Path(entry_to_file_map[parsed_entry])
# Append base filename to compiled entry for context to model
compiled_entry = f"{parsed_entry}\n{entry_filename.stem}"
entries.append(Entry(compiled=compiled_entry, raw=parsed_entry, file=f"{entry_filename}"))
# Increment heading level for heading entries and make filename as its top level heading
prefix = f"# {entry_filename.stem}\n#" if heading else f"# {entry_filename.stem}\n"
compiled_entry = f"{prefix}{parsed_entry}"
logger.debug(f"Converted {len(parsed_entries)} markdown entries to dictionaries")