mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Skip indexing single bad markdown, plaintext file (#460)
This commit is contained in:
@@ -105,10 +105,14 @@ class MarkdownToJsonl(TextToJsonl):
|
||||
entry_to_file_map = []
|
||||
for markdown_file in markdown_files:
|
||||
with open(markdown_file, "r", encoding="utf8") as f:
|
||||
markdown_content = f.read()
|
||||
entries, entry_to_file_map = MarkdownToJsonl.process_single_markdown_file(
|
||||
markdown_content, markdown_file, entries, entry_to_file_map
|
||||
)
|
||||
try:
|
||||
markdown_content = f.read()
|
||||
entries, entry_to_file_map = MarkdownToJsonl.process_single_markdown_file(
|
||||
markdown_content, markdown_file, entries, entry_to_file_map
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Unable to process file: {markdown_file}. This file will not be indexed.")
|
||||
logger.warning(e, exc_info=True)
|
||||
|
||||
return entries, dict(entry_to_file_map)
|
||||
|
||||
|
||||
@@ -100,7 +100,8 @@ class OrgToJsonl(TextToJsonl):
|
||||
entry_to_file_map += zip(org_file_entries, [org_file] * len(org_file_entries))
|
||||
entries.extend(org_file_entries)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file: {org_file} with error: {e}", exc_info=True)
|
||||
logger.warning(f"Unable to process file: {org_file}. This file will not be indexed.")
|
||||
logger.warning(e, exc_info=True)
|
||||
|
||||
return entries, dict(entry_to_file_map)
|
||||
|
||||
|
||||
@@ -91,8 +91,12 @@ class PlaintextToJsonl(TextToJsonl):
|
||||
|
||||
for plaintext_file in plaintext_files:
|
||||
with open(plaintext_file, "r") as f:
|
||||
plaintext_content = f.read()
|
||||
entry_to_file_map.append((plaintext_content, plaintext_file))
|
||||
try:
|
||||
plaintext_content = f.read()
|
||||
entry_to_file_map.append((plaintext_content, plaintext_file))
|
||||
except Exception as e:
|
||||
logger.warning(f"Unable to process file: {plaintext_file}. This file will not be indexed.")
|
||||
logger.warning(e, exc_info=True)
|
||||
|
||||
return dict(entry_to_file_map)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user