mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-04 05:39:06 +00:00
Support multiple input-filters to configure content to index via khoj.yml
- Update existings code, tests to process input-filters as list instead of str - Test `text_to_jsonl' get files methods to work with combination of `input-files' and `input-filters' Resolves #84
This commit is contained in:
@@ -6,7 +6,7 @@ from typing import List, Optional
|
||||
from pydantic import BaseModel, validator
|
||||
|
||||
# Internal Packages
|
||||
from src.utils.helpers import to_snake_case_from_dash
|
||||
from src.utils.helpers import to_snake_case_from_dash, is_none_or_empty
|
||||
|
||||
class ConfigBase(BaseModel):
|
||||
class Config:
|
||||
@@ -15,27 +15,27 @@ class ConfigBase(BaseModel):
|
||||
|
||||
class TextContentConfig(ConfigBase):
|
||||
input_files: Optional[List[Path]]
|
||||
input_filter: Optional[str]
|
||||
input_filter: Optional[List[str]]
|
||||
compressed_jsonl: Path
|
||||
embeddings_file: Path
|
||||
index_heading_entries: Optional[bool] = False
|
||||
|
||||
@validator('input_filter')
|
||||
def input_filter_or_files_required(cls, input_filter, values, **kwargs):
|
||||
if input_filter is None and ('input_files' not in values or values["input_files"] is None):
|
||||
if is_none_or_empty(input_filter) and ('input_files' not in values or values["input_files"] is None):
|
||||
raise ValueError("Either input_filter or input_files required in all content-type.<text_search> section of Khoj config file")
|
||||
return input_filter
|
||||
|
||||
class ImageContentConfig(ConfigBase):
|
||||
input_directories: Optional[List[Path]]
|
||||
input_filter: Optional[str]
|
||||
input_filter: Optional[List[str]]
|
||||
embeddings_file: Path
|
||||
use_xmp_metadata: bool
|
||||
batch_size: int
|
||||
|
||||
@validator('input_filter')
|
||||
def input_filter_or_directories_required(cls, input_filter, values, **kwargs):
|
||||
if input_filter is None and ('input_directories' not in values or values["input_directories"] is None):
|
||||
if is_none_or_empty(input_filter) and ('input_directories' not in values or values["input_directories"] is None):
|
||||
raise ValueError("Either input_filter or input_directories required in all content-type.image section of Khoj config file")
|
||||
return input_filter
|
||||
|
||||
|
||||
Reference in New Issue
Block a user