Get XMP metadata from image using Pillow. Remove ExifTool dependency

- Pillow already supports reading XMP metadata from Images - Removes need to maintain my fork of unmaintained PyExiftool - This also removes dependency on system Exiftool package for XMP metadata extraction - Add test to verify XMP metadata extracted from test images - Remove references to Exiftool from Documentation
2026-03-02 21:19:12 +00:00 · 2022-09-14 13:22:27 +03:00
parent 8f57a62675
commit bf1ae038cb
10 changed files with 35 additions and 350 deletions
--- a/tests/data/images/guineapig_grass.jpg
+++ b/tests/data/images/guineapig_grass.jpg
--- a/tests/data/images/horse_dog.jpg
+++ b/tests/data/images/horse_dog.jpg
--- a/tests/data/images/kitten_park.jpg
+++ b/tests/data/images/kitten_park.jpg
--- a/tests/data/markdown/main_readme.md
+++ b/tests/data/markdown/main_readme.md
@@ -96,12 +96,6 @@ docker-compose build --pull
        Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html)
        \[Required\]

-    3.  Install Exiftool \[Optional\]
-
-        ``` shell
-        sudo apt-get -y install libimage-exiftool-perl
-        ```
-
 2.  2\. Install Khoj

    ``` shell
@@ -149,5 +143,3 @@ conda activate khoj
    Documentation](https://www.sbert.net/examples/applications/image-search/README.html)
 -   Charles Cave for [OrgNode
    Parser](http://members.optusnet.com.au/~charles57/GTD/orgnode.html)
-   Sven Marnach for
-    [PyExifTool](https://github.com/smarnach/pyexiftool/blob/master/exiftool.py)
--- a/tests/test_image_search.py
+++ b/tests/test_image_search.py
@@ -25,6 +25,28 @@ def test_image_search_setup(content_config: ContentConfig, search_config: Search
    assert len(image_search_model.image_embeddings) == 3


+def test_image_metadata(content_config: ContentConfig):
+    "Verify XMP Description and Subjects Extracted from Image"
+    # Arrange
+    expected_metadata_image_name_pairs = [
+        (["Billi Ka Bacha.", "Cat", "Grass"], "kitten_park.jpg"),
+        (["Pasture.", "Horse", "Dog"], "horse_dog.jpg"),
+        (["Guinea Pig Eating Celery.", "Rodent", "Whiskers"], "guineapig_grass.jpg")]
+
+    test_image_paths = [
+        Path(content_config.image.input_directories[0] / image_name[1])
+        for image_name in expected_metadata_image_name_pairs
+    ]
+
+    for expected_metadata, test_image_path in zip(expected_metadata_image_name_pairs, test_image_paths):
+        # Act
+        actual_metadata = image_search.extract_metadata(test_image_path)
+
+        # Assert
+        for expected_snippet in expected_metadata[0]:
+            assert expected_snippet in actual_metadata
+
+
 # ----------------------------------------------------------------------------------------------------
 def test_image_search(content_config: ContentConfig, search_config: SearchConfig):
    # Arrange