Add Vision Support (#889)

# Summary of Changes
* New UI to show preview of image uploads
* ChatML message changes to support gpt-4o vision based responses on images
* AWS S3 image uploads for persistent image context in conversations
* Database changes to have `vision_enabled` option in server admin panel while configuring models
* Render previously uploaded images in the chat history, show uploaded images for pending msgs
* Pass the uploaded_image_url through to subqueries
* Allow image to render upon first message from the homepage
* Add rendering support for images to shared chat as well
* Fix some UI/functionality bugs in the share page
* Convert user attached images for chat to webp format before upload
* Use placeholder to attached image for data source, response mode actors
* Update all clients to call /api/chat as a POST instead of GET request
* Fix copying chat messages with images to clipboard

TLDR; Add vision support for openai models on Khoj via the web UI!

---------

Co-authored-by: sabaimran <narmiabas@gmail.com>
Co-authored-by: Debanjum Singh Solanky <debanjum@gmail.com>
This commit is contained in:
Raghav Tirumale
2024-09-09 17:22:18 -05:00
committed by GitHub
parent b553bba1d8
commit 549686a7a4
33 changed files with 740 additions and 417 deletions

View File

@@ -49,7 +49,7 @@ def create_conversation(message_list, user, agent=None):
@pytest.mark.django_db(transaction=True)
def test_offline_chat_with_no_chat_history_or_retrieved_content(client_offline_chat):
# Act
response = client_offline_chat.get(f'/api/chat?q="Hello, my name is Testatron. Who are you?"&stream=true')
response = client_offline_chat.post(f'/api/chat?q="Hello, my name is Testatron. Who are you?"&stream=true')
response_message = response.content.decode("utf-8")
# Assert
@@ -67,7 +67,7 @@ def test_chat_with_online_content(client_offline_chat):
# Act
q = "/online give me the link to paul graham's essay how to do great work"
encoded_q = quote(q, safe="")
response = client_offline_chat.get(f"/api/chat?q={encoded_q}")
response = client_offline_chat.post(f"/api/chat?q={encoded_q}")
response_message = response.json()["response"]
# Assert
@@ -89,7 +89,7 @@ def test_chat_with_online_webpage_content(client_offline_chat):
# Act
q = "/online how many firefighters were involved in the great chicago fire and which year did it take place?"
encoded_q = quote(q, safe="")
response = client_offline_chat.get(f"/api/chat?q={encoded_q}")
response = client_offline_chat.post(f"/api/chat?q={encoded_q}")
response_message = response.json()["response"]
# Assert
@@ -112,7 +112,7 @@ def test_answer_from_chat_history(client_offline_chat, default_user2):
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f'/api/chat?q="What is my name?"&stream=true')
response = client_offline_chat.post(f'/api/chat?q="What is my name?"&stream=true')
response_message = response.content.decode("utf-8")
# Assert
@@ -139,7 +139,7 @@ def test_answer_from_currently_retrieved_content(client_offline_chat, default_us
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f'/api/chat?q="Where was Xi Li born?"')
response = client_offline_chat.post(f'/api/chat?q="Where was Xi Li born?"')
response_message = response.content.decode("utf-8")
# Assert
@@ -163,7 +163,7 @@ def test_answer_from_chat_history_and_previously_retrieved_content(client_offlin
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f'/api/chat?q="Where was I born?"')
response = client_offline_chat.post(f'/api/chat?q="Where was I born?"')
response_message = response.content.decode("utf-8")
# Assert
@@ -185,7 +185,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(client_offline
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f'/api/chat?q="Where was I born?"')
response = client_offline_chat.post(f'/api/chat?q="Where was I born?"')
response_message = response.content.decode("utf-8")
# Assert
@@ -211,7 +211,7 @@ def test_no_answer_in_chat_history_or_retrieved_content(client_offline_chat, def
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f'/api/chat?q="Where was I born?"&stream=true')
response = client_offline_chat.post(f'/api/chat?q="Where was I born?"&stream=true')
response_message = response.content.decode("utf-8")
# Assert
@@ -232,7 +232,7 @@ def test_answer_using_general_command(client_offline_chat, default_user2):
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f"/api/chat?q={query}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
@@ -250,7 +250,7 @@ def test_answer_from_retrieved_content_using_notes_command(client_offline_chat,
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f"/api/chat?q={query}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
@@ -269,8 +269,8 @@ def test_answer_using_file_filter(client_offline_chat, default_user2):
create_conversation(message_list, default_user2)
# Act
no_answer_response = client_offline_chat.get(f"/api/chat?q={no_answer_query}&stream=true").content.decode("utf-8")
answer_response = client_offline_chat.get(f"/api/chat?q={answer_query}&stream=true").content.decode("utf-8")
no_answer_response = client_offline_chat.post(f"/api/chat?q={no_answer_query}&stream=true").content.decode("utf-8")
answer_response = client_offline_chat.post(f"/api/chat?q={answer_query}&stream=true").content.decode("utf-8")
# Assert
assert "Fujiang" not in no_answer_response
@@ -287,7 +287,7 @@ def test_answer_not_known_using_notes_command(client_offline_chat, default_user2
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f"/api/chat?q={query}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
@@ -320,7 +320,7 @@ def test_summarize_one_file(client_offline_chat, default_user2: KhojUser):
json={"filename": summarization_file, "conversation_id": str(conversation.id)},
)
query = urllib.parse.quote("/summarize")
response = client_offline_chat.get(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
assert response_message != ""
@@ -352,7 +352,7 @@ def test_summarize_extra_text(client_offline_chat, default_user2: KhojUser):
json={"filename": summarization_file, "conversation_id": str(conversation.id)},
)
query = urllib.parse.quote("/summarize tell me about Xiu")
response = client_offline_chat.get(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
assert response_message != ""
@@ -380,7 +380,7 @@ def test_summarize_multiple_files(client_offline_chat, default_user2: KhojUser):
)
query = urllib.parse.quote("/summarize")
response = client_offline_chat.get(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
@@ -393,7 +393,7 @@ def test_summarize_no_files(client_offline_chat, default_user2: KhojUser):
message_list = []
conversation = create_conversation(message_list, default_user2)
query = urllib.parse.quote("/summarize")
response = client_offline_chat.get(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
assert response_message == "No files selected for summarization. Please add files using the section on the left."
@@ -425,14 +425,14 @@ def test_summarize_different_conversation(client_offline_chat, default_user2: Kh
)
query = urllib.parse.quote("/summarize")
response = client_offline_chat.get(f"/api/chat?q={query}&conversation_id={conversation2.id}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&conversation_id={conversation2.id}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
assert response_message == "No files selected for summarization. Please add files using the section on the left."
# now make sure that the file filter is still in conversation 1
response = client_offline_chat.get(f"/api/chat?q={query}&conversation_id={conversation1.id}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&conversation_id={conversation1.id}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
@@ -452,7 +452,7 @@ def test_summarize_nonexistant_file(client_offline_chat, default_user2: KhojUser
json={"filename": "imaginary.markdown", "conversation_id": str(conversation.id)},
)
query = urllib.parse.quote("/summarize")
response = client_offline_chat.get(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
assert response_message == "No files selected for summarization. Please add files using the section on the left."
@@ -483,7 +483,7 @@ def test_summarize_diff_user_file(
json={"filename": summarization_file, "conversation_id": str(conversation.id)},
)
query = urllib.parse.quote("/summarize")
response = client_offline_chat.get(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
assert response_message == "No files selected for summarization. Please add files using the section on the left."
@@ -499,7 +499,7 @@ def test_answer_requires_current_date_awareness(client_offline_chat):
query = urllib.parse.quote("Where did I have lunch today?")
# Act
response = client_offline_chat.get(f"/api/chat?q={query}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
@@ -518,7 +518,7 @@ def test_answer_requires_current_date_awareness(client_offline_chat):
def test_answer_requires_date_aware_aggregation_across_provided_notes(client_offline_chat):
"Chat director should be able to answer questions that require date aware aggregation across multiple notes"
# Act
response = client_offline_chat.get(f'/api/chat?q="How much did I spend on dining this year?"&stream=true')
response = client_offline_chat.post(f'/api/chat?q="How much did I spend on dining this year?"&stream=true')
response_message = response.content.decode("utf-8")
# Assert
@@ -559,7 +559,7 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(client
@pytest.mark.django_db(transaction=True)
def test_ask_for_clarification_if_not_enough_context_in_question(client_offline_chat, default_user2):
# Act
response = client_offline_chat.get(f'/api/chat?q="What is the name of Namitas older son"&stream=true')
response = client_offline_chat.post(f'/api/chat?q="What is the name of Namitas older son"&stream=true')
response_message = response.content.decode("utf-8")
# Assert
@@ -589,7 +589,7 @@ def test_answer_in_chat_history_beyond_lookback_window(client_offline_chat, defa
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f'/api/chat?q="What is my name?"&stream=true')
response = client_offline_chat.post(f'/api/chat?q="What is my name?"&stream=true')
response_message = response.content.decode("utf-8")
# Assert
@@ -653,7 +653,7 @@ def test_answer_in_chat_history_by_conversation_id_with_agent(
# Act
query = urllib.parse.quote("/general What did I eat for breakfast?")
response = client_offline_chat.get(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response = client_offline_chat.post(f"/api/chat?q={query}&conversation_id={conversation.id}&stream=true")
response_message = response.content.decode("utf-8")
# Assert that agent only responds with the summary of spending
@@ -673,7 +673,7 @@ def test_answer_chat_history_very_long(client_offline_chat, default_user2):
create_conversation(message_list, default_user2)
# Act
response = client_offline_chat.get(f'/api/chat?q="What is my name?"&stream=true')
response = client_offline_chat.post(f'/api/chat?q="What is my name?"&stream=true')
response_message = response.content.decode("utf-8")
# Assert
@@ -687,7 +687,7 @@ def test_answer_chat_history_very_long(client_offline_chat, default_user2):
def test_answer_requires_multiple_independent_searches(client_offline_chat):
"Chat director should be able to answer by doing multiple independent searches for required information"
# Act
response = client_offline_chat.get(f'/api/chat?q="Is Xi older than Namita?"&stream=true')
response = client_offline_chat.post(f'/api/chat?q="Is Xi older than Namita?"&stream=true')
response_message = response.content.decode("utf-8")
# Assert