mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-08 05:39:13 +00:00
Skip plaintext file indexing if there's a parsing issue and log the file
This commit is contained in:
@@ -91,10 +91,13 @@ class PlaintextToJsonl(TextToJsonl):
|
|||||||
|
|
||||||
for plaintext_file in plaintext_files:
|
for plaintext_file in plaintext_files:
|
||||||
with open(plaintext_file, "r") as f:
|
with open(plaintext_file, "r") as f:
|
||||||
|
try:
|
||||||
plaintext_content = f.read()
|
plaintext_content = f.read()
|
||||||
if plaintext_file.endswith(("html", "htm", "xml")):
|
if plaintext_file.endswith(("html", "htm", "xml")):
|
||||||
plaintext_content = PlaintextToJsonl.extract_html_content(plaintext_content)
|
plaintext_content = PlaintextToJsonl.extract_html_content(plaintext_content)
|
||||||
entry_to_file_map.append((plaintext_content, plaintext_file))
|
entry_to_file_map.append((plaintext_content, plaintext_file))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing file: {plaintext_file} - {e}", exc_info=True)
|
||||||
|
|
||||||
return dict(entry_to_file_map)
|
return dict(entry_to_file_map)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user