diff --git a/src/khoj/routers/indexer.py b/src/khoj/routers/indexer.py index a09e33f5..a9656050 100644 --- a/src/khoj/routers/indexer.py +++ b/src/khoj/routers/indexer.py @@ -72,7 +72,7 @@ async def update( raise HTTPException(status_code=401, detail="Invalid API Key") state.config_lock.acquire() try: - logger.info(f"📬 Updating content index via API call by {client}") + logger.info(f"📬 Updating content index via API call by {client} client") org_files: Dict[str, str] = {} markdown_files: Dict[str, str] = {} pdf_files: Dict[str, str] = {} @@ -139,7 +139,8 @@ async def update( except Exception as e: logger.error( - f"🚨 Failed to {force} update {t} content index triggered via API call by {client}: {e}", exc_info=True + f"🚨 Failed to {force} update {t} content index triggered via API call by {client} client: {e}", + exc_info=True, ) finally: state.config_lock.release() @@ -154,7 +155,7 @@ async def update( host=host, ) - logger.info(f"📪 Content index updated via API call by {client}") + logger.info(f"📪 Content index updated via API call by {client} client") return Response(content="OK", status_code=200) diff --git a/src/khoj/utils/fs_syncer.py b/src/khoj/utils/fs_syncer.py index 5cf97add..1745b760 100644 --- a/src/khoj/utils/fs_syncer.py +++ b/src/khoj/utils/fs_syncer.py @@ -1,6 +1,5 @@ import logging import glob -import base64 from typing import Optional from bs4 import BeautifulSoup @@ -69,7 +68,7 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]: filename_to_content_map = {} for file in all_target_files: - with open(file, "r") as f: + with open(file, "r", encoding="utf8") as f: try: plaintext_content = f.read() if file.endswith(("html", "htm", "xml")): @@ -115,7 +114,7 @@ def get_org_files(config: TextContentConfig): filename_to_content_map = {} for file in all_org_files: - with open(file, "r") as f: + with open(file, "r", encoding="utf8") as f: try: filename_to_content_map[file] = f.read() except Exception as e: @@ -137,7 +136,7 @@ def get_markdown_files(config: TextContentConfig): logger.debug("At least one of markdown-files or markdown-file-filter is required to be specified") return {} - "Get Markdown files to process" + # Get markdown files to process absolute_markdown_files, filtered_markdown_files = set(), set() if markdown_files: absolute_markdown_files = {get_absolute_path(markdown_file) for markdown_file in markdown_files} @@ -164,7 +163,7 @@ def get_markdown_files(config: TextContentConfig): filename_to_content_map = {} for file in all_markdown_files: - with open(file, "r") as f: + with open(file, "r", encoding="utf8") as f: try: filename_to_content_map[file] = f.read() except Exception as e: diff --git a/tests/test_text_search.py b/tests/test_text_search.py index b1a9aa4d..60246a61 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -1,26 +1,25 @@ # System Packages import logging +import locale from pathlib import Path import os # External Packages import pytest -from khoj.utils.config import SearchModels # Internal Packages from khoj.utils.state import content_index, search_models from khoj.search_type import text_search -from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl from khoj.processor.github.github_to_jsonl import GithubToJsonl +from khoj.utils.config import SearchModels from khoj.utils.fs_syncer import get_org_files +from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig # Test # ---------------------------------------------------------------------------------------------------- -def test_text_search_setup_with_missing_file_raises_error( - org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig -): +def test_text_search_setup_with_missing_file_raises_error(org_config_with_only_new_file: TextContentConfig): # Arrange # Ensure file mentioned in org.input-files is missing single_new_file = Path(org_config_with_only_new_file.input_files[0]) @@ -29,7 +28,7 @@ def test_text_search_setup_with_missing_file_raises_error( # Act # Generate notes embeddings during asymmetric setup with pytest.raises(FileNotFoundError): - data = get_org_files(org_config_with_only_new_file) + get_org_files(org_config_with_only_new_file) # ---------------------------------------------------------------------------------------------------- @@ -48,6 +47,7 @@ def test_text_search_setup_with_empty_file_raises_error( def test_text_search_setup(content_config: ContentConfig, search_models: SearchModels): # Arrange data = get_org_files(content_config.org) + # Act # Regenerate notes embeddings during asymmetric setup notes_model = text_search.setup(