mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Read text files as utf-8, instead of default os locale
On Windows, the default locale isn't utf8. Khoj had regressed to reading files in OS specified locale encoding, e.g cp1252, cp949 etc. It now explicitly uses utf8 encoding to read text files for indexing Resolves #495, resolves #472
This commit is contained in:
@@ -1,26 +1,25 @@
|
||||
# System Packages
|
||||
import logging
|
||||
import locale
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
# External Packages
|
||||
import pytest
|
||||
from khoj.utils.config import SearchModels
|
||||
|
||||
# Internal Packages
|
||||
from khoj.utils.state import content_index, search_models
|
||||
from khoj.search_type import text_search
|
||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
|
||||
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
|
||||
from khoj.processor.github.github_to_jsonl import GithubToJsonl
|
||||
from khoj.utils.config import SearchModels
|
||||
from khoj.utils.fs_syncer import get_org_files
|
||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
|
||||
|
||||
|
||||
# Test
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_text_search_setup_with_missing_file_raises_error(
|
||||
org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
|
||||
):
|
||||
def test_text_search_setup_with_missing_file_raises_error(org_config_with_only_new_file: TextContentConfig):
|
||||
# Arrange
|
||||
# Ensure file mentioned in org.input-files is missing
|
||||
single_new_file = Path(org_config_with_only_new_file.input_files[0])
|
||||
@@ -29,7 +28,7 @@ def test_text_search_setup_with_missing_file_raises_error(
|
||||
# Act
|
||||
# Generate notes embeddings during asymmetric setup
|
||||
with pytest.raises(FileNotFoundError):
|
||||
data = get_org_files(org_config_with_only_new_file)
|
||||
get_org_files(org_config_with_only_new_file)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@@ -48,6 +47,7 @@ def test_text_search_setup_with_empty_file_raises_error(
|
||||
def test_text_search_setup(content_config: ContentConfig, search_models: SearchModels):
|
||||
# Arrange
|
||||
data = get_org_files(content_config.org)
|
||||
|
||||
# Act
|
||||
# Regenerate notes embeddings during asymmetric setup
|
||||
notes_model = text_search.setup(
|
||||
|
||||
Reference in New Issue
Block a user