mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-03 21:29:08 +00:00
Support Indexing Images via OCR (#823)
- Added support for uploading .jpeg, .jpg, and .png files to Khoj from Web, Desktop app
- Updating indexer to generate raw text and entries using RapidOCR
- Details
* added support for indexing images via ocr
* fixed pyproject.toml
* Update src/khoj/processor/content/images/image_to_entries.py
Co-authored-by: Debanjum <debanjum@gmail.com>
* Update src/khoj/processor/content/images/image_to_entries.py
Co-authored-by: Debanjum <debanjum@gmail.com>
* removed redudant try except blocks
* updated desktop js file to support image formats
* added tests for jpg and png
* Fix processing for image to entries files
* Update unit tests with working image indexer
* Change png test from version verificaition to open-cv verification
---------
Co-authored-by: Debanjum <debanjum@gmail.com>
Co-authored-by: sabaimran <narmiabas@gmail.com>
This commit is contained in:
@@ -19,7 +19,7 @@ const textFileTypes = [
|
||||
'org', 'md', 'markdown', 'txt', 'html', 'xml',
|
||||
// Other valid text file extensions from https://google.github.io/magika/model/config.json
|
||||
'appleplist', 'asm', 'asp', 'batch', 'c', 'cs', 'css', 'csv', 'eml', 'go', 'html', 'ini', 'internetshortcut', 'java', 'javascript', 'json', 'latex', 'lisp', 'makefile', 'markdown', 'mht', 'mum', 'pem', 'perl', 'php', 'powershell', 'python', 'rdf', 'rst', 'rtf', 'ruby', 'rust', 'scala', 'shell', 'smali', 'sql', 'svg', 'symlinktext', 'txt', 'vba', 'winregistry', 'xml', 'yaml']
|
||||
const binaryFileTypes = ['pdf']
|
||||
const binaryFileTypes = ['pdf', 'jpg', 'jpeg', 'png']
|
||||
const validFileTypes = textFileTypes.concat(binaryFileTypes);
|
||||
|
||||
const schema = {
|
||||
|
||||
Reference in New Issue
Block a user