mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 05:39:12 +00:00
Sort extracted images before computing their embeddings
- Image order returned by glob is OS dependent - This prevented sharing image embeddings across machines running different OS - A stable sort order for processed images allows sharing embeddings across machines. - Use case: A more powerful, always on machine actually computes the image embeddings regularly The client machine just load these periodically to provide semantic search functionality
This commit is contained in:
@@ -40,7 +40,7 @@ def extract_entries(image_directories, verbose=0):
|
|||||||
if verbose > 0:
|
if verbose > 0:
|
||||||
image_directory_names = ', '.join([str(image_directory) for image_directory in image_directories])
|
image_directory_names = ', '.join([str(image_directory) for image_directory in image_directories])
|
||||||
print(f'Found {len(image_names)} images in {image_directory_names}')
|
print(f'Found {len(image_names)} images in {image_directory_names}')
|
||||||
return image_names
|
return sorted(image_names)
|
||||||
|
|
||||||
|
|
||||||
def compute_embeddings(image_names, encoder, embeddings_file, batch_size=50, use_xmp_metadata=False, regenerate=False, verbose=0):
|
def compute_embeddings(image_names, encoder, embeddings_file, batch_size=50, use_xmp_metadata=False, regenerate=False, verbose=0):
|
||||||
|
|||||||
Reference in New Issue
Block a user