mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Allow indexing multiple image directories for image search
This commit is contained in:
@@ -15,7 +15,7 @@ content-type:
|
||||
embeddings-file: /data/embeddings/transaction_embeddings.pt
|
||||
|
||||
image:
|
||||
input-directory: "/data/images/"
|
||||
input-directories: ["/data/images/"]
|
||||
embeddings-file: "/data/embeddings/image_embeddings.pt"
|
||||
batch-size: 50
|
||||
use-xmp-metadata: true
|
||||
|
||||
@@ -30,10 +30,12 @@ def initialize_model(search_config: ImageSearchConfig):
|
||||
return encoder
|
||||
|
||||
|
||||
def extract_entries(image_directory, verbose=0):
|
||||
image_directory = resolve_absolute_path(image_directory, strict=True)
|
||||
image_names = list(image_directory.glob('*.jpg'))
|
||||
image_names.extend(list(image_directory.glob('*.jpeg')))
|
||||
def extract_entries(image_directories, verbose=0):
|
||||
image_names = []
|
||||
for image_directory in image_directories:
|
||||
image_directory = resolve_absolute_path(image_directory, strict=True)
|
||||
image_names = list(image_directory.glob('*.jpg'))
|
||||
image_names.extend(list(image_directory.glob('*.jpeg')))
|
||||
|
||||
if verbose > 0:
|
||||
print(f'Found {len(image_names)} images in {image_directory}')
|
||||
@@ -197,8 +199,8 @@ def setup(config: ImageContentConfig, search_config: ImageSearchConfig, regenera
|
||||
encoder = initialize_model(search_config)
|
||||
|
||||
# Extract Entries
|
||||
image_directory = resolve_absolute_path(config.input_directory, strict=True)
|
||||
image_names = extract_entries(image_directory, verbose)
|
||||
image_directories = [resolve_absolute_path(directory, strict=True) for directory in config.input_directories]
|
||||
image_names = extract_entries(image_directories, verbose)
|
||||
|
||||
# Compute or Load Embeddings
|
||||
embeddings_file = resolve_absolute_path(config.embeddings_file)
|
||||
|
||||
@@ -22,7 +22,7 @@ class TextContentConfig(ConfigBase):
|
||||
class ImageContentConfig(ConfigBase):
|
||||
use_xmp_metadata: Optional[bool]
|
||||
batch_size: Optional[int]
|
||||
input_directory: Optional[Path]
|
||||
input_directories: Optional[List[Path]]
|
||||
input_filter: Optional[str]
|
||||
embeddings_file: Optional[Path]
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ def model_dir(search_config):
|
||||
# Generate Image Embeddings from Test Images
|
||||
content_config = ContentConfig()
|
||||
content_config.image = ImageContentConfig(
|
||||
input_directory = 'tests/data/images',
|
||||
input_directories = ['tests/data/images'],
|
||||
embeddings_file = model_dir.joinpath('image_embeddings.pt'),
|
||||
batch_size = 10,
|
||||
use_xmp_metadata = False)
|
||||
@@ -70,7 +70,7 @@ def content_config(model_dir):
|
||||
embeddings_file = model_dir.joinpath('note_embeddings.pt'))
|
||||
|
||||
content_config.image = ImageContentConfig(
|
||||
input_directory = 'tests/data/images',
|
||||
input_directories = ['tests/data/images'],
|
||||
embeddings_file = model_dir.joinpath('image_embeddings.pt'),
|
||||
batch_size = 10,
|
||||
use_xmp_metadata = False)
|
||||
|
||||
Reference in New Issue
Block a user