From d68a9dc4454e0223379e6d1e57b39328e3be13f0 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 20 Jul 2022 03:51:27 +0400 Subject: [PATCH] Sort extracted images before computing their embeddings - Image order returned by glob is OS dependent - This prevented sharing image embeddings across machines running different OS - A stable sort order for processed images allows sharing embeddings across machines. - Use case: A more powerful, always on machine actually computes the image embeddings regularly The client machine just load these periodically to provide semantic search functionality --- src/search_type/image_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search_type/image_search.py b/src/search_type/image_search.py index 3cb2314d..7d1f86df 100644 --- a/src/search_type/image_search.py +++ b/src/search_type/image_search.py @@ -40,7 +40,7 @@ def extract_entries(image_directories, verbose=0): if verbose > 0: image_directory_names = ', '.join([str(image_directory) for image_directory in image_directories]) print(f'Found {len(image_names)} images in {image_directory_names}') - return image_names + return sorted(image_names) def compute_embeddings(image_names, encoder, embeddings_file, batch_size=50, use_xmp_metadata=False, regenerate=False, verbose=0):