Allow indexing multiple image directories for image search

This commit is contained in:
Debanjum Singh Solanky
2022-07-20 02:54:03 +04:00
parent 68ee88cebc
commit 6c9ffdba57
4 changed files with 12 additions and 10 deletions

View File

@@ -15,7 +15,7 @@ content-type:
embeddings-file: /data/embeddings/transaction_embeddings.pt
image:
input-directory: "/data/images/"
input-directories: ["/data/images/"]
embeddings-file: "/data/embeddings/image_embeddings.pt"
batch-size: 50
use-xmp-metadata: true

View File

@@ -30,10 +30,12 @@ def initialize_model(search_config: ImageSearchConfig):
return encoder
def extract_entries(image_directory, verbose=0):
image_directory = resolve_absolute_path(image_directory, strict=True)
image_names = list(image_directory.glob('*.jpg'))
image_names.extend(list(image_directory.glob('*.jpeg')))
def extract_entries(image_directories, verbose=0):
image_names = []
for image_directory in image_directories:
image_directory = resolve_absolute_path(image_directory, strict=True)
image_names = list(image_directory.glob('*.jpg'))
image_names.extend(list(image_directory.glob('*.jpeg')))
if verbose > 0:
print(f'Found {len(image_names)} images in {image_directory}')
@@ -197,8 +199,8 @@ def setup(config: ImageContentConfig, search_config: ImageSearchConfig, regenera
encoder = initialize_model(search_config)
# Extract Entries
image_directory = resolve_absolute_path(config.input_directory, strict=True)
image_names = extract_entries(image_directory, verbose)
image_directories = [resolve_absolute_path(directory, strict=True) for directory in config.input_directories]
image_names = extract_entries(image_directories, verbose)
# Compute or Load Embeddings
embeddings_file = resolve_absolute_path(config.embeddings_file)

View File

@@ -22,7 +22,7 @@ class TextContentConfig(ConfigBase):
class ImageContentConfig(ConfigBase):
use_xmp_metadata: Optional[bool]
batch_size: Optional[int]
input_directory: Optional[Path]
input_directories: Optional[List[Path]]
input_filter: Optional[str]
embeddings_file: Optional[Path]

View File

@@ -41,7 +41,7 @@ def model_dir(search_config):
# Generate Image Embeddings from Test Images
content_config = ContentConfig()
content_config.image = ImageContentConfig(
input_directory = 'tests/data/images',
input_directories = ['tests/data/images'],
embeddings_file = model_dir.joinpath('image_embeddings.pt'),
batch_size = 10,
use_xmp_metadata = False)
@@ -70,7 +70,7 @@ def content_config(model_dir):
embeddings_file = model_dir.joinpath('note_embeddings.pt'))
content_config.image = ImageContentConfig(
input_directory = 'tests/data/images',
input_directories = ['tests/data/images'],
embeddings_file = model_dir.joinpath('image_embeddings.pt'),
batch_size = 10,
use_xmp_metadata = False)