mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Skip indexing single bad markdown, plaintext file (#460)
This commit is contained in:
@@ -105,10 +105,14 @@ class MarkdownToJsonl(TextToJsonl):
|
|||||||
entry_to_file_map = []
|
entry_to_file_map = []
|
||||||
for markdown_file in markdown_files:
|
for markdown_file in markdown_files:
|
||||||
with open(markdown_file, "r", encoding="utf8") as f:
|
with open(markdown_file, "r", encoding="utf8") as f:
|
||||||
|
try:
|
||||||
markdown_content = f.read()
|
markdown_content = f.read()
|
||||||
entries, entry_to_file_map = MarkdownToJsonl.process_single_markdown_file(
|
entries, entry_to_file_map = MarkdownToJsonl.process_single_markdown_file(
|
||||||
markdown_content, markdown_file, entries, entry_to_file_map
|
markdown_content, markdown_file, entries, entry_to_file_map
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Unable to process file: {markdown_file}. This file will not be indexed.")
|
||||||
|
logger.warning(e, exc_info=True)
|
||||||
|
|
||||||
return entries, dict(entry_to_file_map)
|
return entries, dict(entry_to_file_map)
|
||||||
|
|
||||||
|
|||||||
@@ -100,7 +100,8 @@ class OrgToJsonl(TextToJsonl):
|
|||||||
entry_to_file_map += zip(org_file_entries, [org_file] * len(org_file_entries))
|
entry_to_file_map += zip(org_file_entries, [org_file] * len(org_file_entries))
|
||||||
entries.extend(org_file_entries)
|
entries.extend(org_file_entries)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing file: {org_file} with error: {e}", exc_info=True)
|
logger.warning(f"Unable to process file: {org_file}. This file will not be indexed.")
|
||||||
|
logger.warning(e, exc_info=True)
|
||||||
|
|
||||||
return entries, dict(entry_to_file_map)
|
return entries, dict(entry_to_file_map)
|
||||||
|
|
||||||
|
|||||||
@@ -91,8 +91,12 @@ class PlaintextToJsonl(TextToJsonl):
|
|||||||
|
|
||||||
for plaintext_file in plaintext_files:
|
for plaintext_file in plaintext_files:
|
||||||
with open(plaintext_file, "r") as f:
|
with open(plaintext_file, "r") as f:
|
||||||
|
try:
|
||||||
plaintext_content = f.read()
|
plaintext_content = f.read()
|
||||||
entry_to_file_map.append((plaintext_content, plaintext_file))
|
entry_to_file_map.append((plaintext_content, plaintext_file))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Unable to process file: {plaintext_file}. This file will not be indexed.")
|
||||||
|
logger.warning(e, exc_info=True)
|
||||||
|
|
||||||
return dict(entry_to_file_map)
|
return dict(entry_to_file_map)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user