mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Skip plaintext file indexing if there's a parsing issue and log the file
This commit is contained in:
@@ -91,10 +91,13 @@ class PlaintextToJsonl(TextToJsonl):
|
||||
|
||||
for plaintext_file in plaintext_files:
|
||||
with open(plaintext_file, "r") as f:
|
||||
plaintext_content = f.read()
|
||||
if plaintext_file.endswith(("html", "htm", "xml")):
|
||||
plaintext_content = PlaintextToJsonl.extract_html_content(plaintext_content)
|
||||
entry_to_file_map.append((plaintext_content, plaintext_file))
|
||||
try:
|
||||
plaintext_content = f.read()
|
||||
if plaintext_file.endswith(("html", "htm", "xml")):
|
||||
plaintext_content = PlaintextToJsonl.extract_html_content(plaintext_content)
|
||||
entry_to_file_map.append((plaintext_content, plaintext_file))
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file: {plaintext_file} - {e}", exc_info=True)
|
||||
|
||||
return dict(entry_to_file_map)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user