mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-06 13:22:12 +00:00
Ease access to image result for given query by image_search
- Copy images to accessible directory - Return URL paths to them to ease access - This is to be used in the web interface to render image results directly in browser - Return image, metadata scores for each image in response as well This should help get a better sense of image scores along both XMP metadata and whole image axis
This commit is contained in:
@@ -82,13 +82,16 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
|
|||||||
if (t == SearchType.Image or t == None) and model.image_search:
|
if (t == SearchType.Image or t == None) and model.image_search:
|
||||||
# query transactions
|
# query transactions
|
||||||
hits = image_search.query(user_query, results_count, model.image_search)
|
hits = image_search.query(user_query, results_count, model.image_search)
|
||||||
|
output_directory = f'{os.getcwd()}/{web_directory}'
|
||||||
|
|
||||||
# collate and return results
|
# collate and return results
|
||||||
return image_search.collate_results(
|
return image_search.collate_results(
|
||||||
hits,
|
hits,
|
||||||
model.image_search.image_names,
|
image_names=model.image_search.image_names,
|
||||||
config.content_type.image.input_directory,
|
image_directory=config.content_type.image.input_directory,
|
||||||
results_count)
|
output_directory=output_directory,
|
||||||
|
static_files_url='/static',
|
||||||
|
count=results_count)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return {}
|
return {}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import pathlib
|
import pathlib
|
||||||
import copy
|
import copy
|
||||||
|
import shutil
|
||||||
|
|
||||||
# External Packages
|
# External Packages
|
||||||
from sentence_transformers import SentenceTransformer, util
|
from sentence_transformers import SentenceTransformer, util
|
||||||
@@ -118,7 +119,7 @@ def query(raw_query, count, model: ImageSearchModel):
|
|||||||
query_embedding = model.image_encoder.encode([query], convert_to_tensor=True, show_progress_bar=False)
|
query_embedding = model.image_encoder.encode([query], convert_to_tensor=True, show_progress_bar=False)
|
||||||
|
|
||||||
# Compute top_k ranked images based on cosine-similarity b/w query and all image embeddings.
|
# Compute top_k ranked images based on cosine-similarity b/w query and all image embeddings.
|
||||||
image_hits = {result['corpus_id']: result['score']
|
image_hits = {result['corpus_id']: {'image_score': result['score'], 'score': result['score']}
|
||||||
for result
|
for result
|
||||||
in util.semantic_search(query_embedding, model.image_embeddings, top_k=count)[0]}
|
in util.semantic_search(query_embedding, model.image_embeddings, top_k=count)[0]}
|
||||||
|
|
||||||
@@ -130,10 +131,22 @@ def query(raw_query, count, model: ImageSearchModel):
|
|||||||
|
|
||||||
# Sum metadata, image scores of the highest ranked images
|
# Sum metadata, image scores of the highest ranked images
|
||||||
for corpus_id, score in metadata_hits.items():
|
for corpus_id, score in metadata_hits.items():
|
||||||
image_hits[corpus_id] = image_hits.get(corpus_id, 0) + score
|
if 'corpus_id' in image_hits:
|
||||||
|
image_hits[corpus_id].update({
|
||||||
|
'metadata_score': score,
|
||||||
|
'score': image_hits[corpus_id].get('score', 0) + score,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
image_hits[corpus_id] = {'metadata_score': score, 'score': score}
|
||||||
|
|
||||||
# Reformat results in original form from sentence transformer semantic_search()
|
# Reformat results in original form from sentence transformer semantic_search()
|
||||||
hits = [{'corpus_id': corpus_id, 'score': score} for corpus_id, score in image_hits.items()]
|
hits = [
|
||||||
|
{
|
||||||
|
'corpus_id': corpus_id,
|
||||||
|
'score': scores['score'],
|
||||||
|
'image_score': scores.get('image_score', 0),
|
||||||
|
'metadata_score': scores.get('metadata_score', 0),
|
||||||
|
} for corpus_id, scores in image_hits.items()]
|
||||||
|
|
||||||
# Sort the images based on their combined metadata, image scores
|
# Sort the images based on their combined metadata, image scores
|
||||||
return sorted(hits, key=lambda hit: hit["score"], reverse=True)
|
return sorted(hits, key=lambda hit: hit["score"], reverse=True)
|
||||||
@@ -149,15 +162,29 @@ def render_results(hits, image_names, image_directory, count):
|
|||||||
img.show()
|
img.show()
|
||||||
|
|
||||||
|
|
||||||
def collate_results(hits, image_names, image_directory, count=5):
|
def collate_results(hits, image_names, image_directory, output_directory, static_files_url, count=5):
|
||||||
|
results = []
|
||||||
image_directory = resolve_absolute_path(image_directory, strict=True)
|
image_directory = resolve_absolute_path(image_directory, strict=True)
|
||||||
return [
|
|
||||||
{
|
for index, hit in enumerate(hits[:count]):
|
||||||
"Entry": image_directory.joinpath(image_names[hit['corpus_id']]),
|
source_image_name = image_names[hit['corpus_id']]
|
||||||
"Score": f"{hit['score']:.3f}"
|
source_path = image_directory.joinpath(source_image_name)
|
||||||
}
|
|
||||||
for hit
|
target_image_name = f"{index}{source_path.suffix}"
|
||||||
in hits[0:count]]
|
target_path = resolve_absolute_path(f"{output_directory}/{target_image_name}")
|
||||||
|
|
||||||
|
# Copy the image to the output directory
|
||||||
|
shutil.copy(source_path, target_path)
|
||||||
|
|
||||||
|
# Add the image metadata to the results
|
||||||
|
results += [{
|
||||||
|
"entry": f'{static_files_url}/{target_image_name}',
|
||||||
|
"score": f"{hit['score']:.3f}",
|
||||||
|
"image_score": f"{hit['image_score']:.3f}",
|
||||||
|
"metadata_score": f"{hit['metadata_score']:.3f}",
|
||||||
|
}]
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def setup(config: ImageContentConfig, search_config: ImageSearchConfig, regenerate: bool, verbose: bool=False) -> ImageSearchModel:
|
def setup(config: ImageContentConfig, search_config: ImageSearchConfig, regenerate: bool, verbose: bool=False) -> ImageSearchModel:
|
||||||
|
|||||||
Reference in New Issue
Block a user