mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
- Added support for uploading .jpeg, .jpg, and .png files to Khoj from Web, Desktop app
- Updating indexer to generate raw text and entries using RapidOCR
- Details
* added support for indexing images via ocr
* fixed pyproject.toml
* Update src/khoj/processor/content/images/image_to_entries.py
Co-authored-by: Debanjum <debanjum@gmail.com>
* Update src/khoj/processor/content/images/image_to_entries.py
Co-authored-by: Debanjum <debanjum@gmail.com>
* removed redudant try except blocks
* updated desktop js file to support image formats
* added tests for jpg and png
* Fix processing for image to entries files
* Update unit tests with working image indexer
* Change png test from version verificaition to open-cv verification
---------
Co-authored-by: Debanjum <debanjum@gmail.com>
Co-authored-by: sabaimran <narmiabas@gmail.com>
22 lines
723 B
Python
22 lines
723 B
Python
import os
|
|
|
|
from khoj.processor.content.images.image_to_entries import ImageToEntries
|
|
|
|
|
|
def test_png_to_jsonl():
|
|
with open("tests/data/images/testocr.png", "rb") as f:
|
|
image_bytes = f.read()
|
|
data = {"tests/data/images/testocr.png": image_bytes}
|
|
entries = ImageToEntries.extract_image_entries(image_files=data)
|
|
assert len(entries) == 2
|
|
assert "opencv-python" in entries[1][0].raw
|
|
|
|
|
|
def test_jpg_to_jsonl():
|
|
with open("tests/data/images/nasdaq.jpg", "rb") as f:
|
|
image_bytes = f.read()
|
|
data = {"tests/data/images/nasdaq.jpg": image_bytes}
|
|
entries = ImageToEntries.extract_image_entries(image_files=data)
|
|
assert len(entries) == 2
|
|
assert "investments" in entries[1][0].raw
|