Support multiple input-filters to configure content to index via khoj.yml

- Update existings code, tests to process input-filters as list
  instead of str
- Test `text_to_jsonl' get files methods to work with combination of
  `input-files' and `input-filters'

Resolves #84
This commit is contained in:
Debanjum Singh Solanky
2022-09-12 10:39:39 +03:00
parent 940c8fac8c
commit a701ad08b9
11 changed files with 138 additions and 37 deletions

View File

@@ -6,7 +6,7 @@ from typing import List, Optional
from pydantic import BaseModel, validator
# Internal Packages
from src.utils.helpers import to_snake_case_from_dash
from src.utils.helpers import to_snake_case_from_dash, is_none_or_empty
class ConfigBase(BaseModel):
class Config:
@@ -15,27 +15,27 @@ class ConfigBase(BaseModel):
class TextContentConfig(ConfigBase):
input_files: Optional[List[Path]]
input_filter: Optional[str]
input_filter: Optional[List[str]]
compressed_jsonl: Path
embeddings_file: Path
index_heading_entries: Optional[bool] = False
@validator('input_filter')
def input_filter_or_files_required(cls, input_filter, values, **kwargs):
if input_filter is None and ('input_files' not in values or values["input_files"] is None):
if is_none_or_empty(input_filter) and ('input_files' not in values or values["input_files"] is None):
raise ValueError("Either input_filter or input_files required in all content-type.<text_search> section of Khoj config file")
return input_filter
class ImageContentConfig(ConfigBase):
input_directories: Optional[List[Path]]
input_filter: Optional[str]
input_filter: Optional[List[str]]
embeddings_file: Path
use_xmp_metadata: bool
batch_size: int
@validator('input_filter')
def input_filter_or_directories_required(cls, input_filter, values, **kwargs):
if input_filter is None and ('input_directories' not in values or values["input_directories"] is None):
if is_none_or_empty(input_filter) and ('input_directories' not in values or values["input_directories"] is None):
raise ValueError("Either input_filter or input_directories required in all content-type.image section of Khoj config file")
return input_filter