mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-04 05:39:06 +00:00
Add Vision Support (#889)
# Summary of Changes * New UI to show preview of image uploads * ChatML message changes to support gpt-4o vision based responses on images * AWS S3 image uploads for persistent image context in conversations * Database changes to have `vision_enabled` option in server admin panel while configuring models * Render previously uploaded images in the chat history, show uploaded images for pending msgs * Pass the uploaded_image_url through to subqueries * Allow image to render upon first message from the homepage * Add rendering support for images to shared chat as well * Fix some UI/functionality bugs in the share page * Convert user attached images for chat to webp format before upload * Use placeholder to attached image for data source, response mode actors * Update all clients to call /api/chat as a POST instead of GET request * Fix copying chat messages with images to clipboard TLDR; Add vision support for openai models on Khoj via the web UI! --------- Co-authored-by: sabaimran <narmiabas@gmail.com> Co-authored-by: Debanjum Singh Solanky <debanjum@gmail.com>
This commit is contained in:
@@ -267,6 +267,7 @@ export default function ChatHistory(props: ChatHistoryProps) {
|
||||
created: message.timestamp,
|
||||
by: "you",
|
||||
automationId: "",
|
||||
uploadedImageData: message.uploadedImageData,
|
||||
}}
|
||||
customClassName="fullHistory"
|
||||
borderLeftColor={`${data?.agent.color}-500`}
|
||||
@@ -309,6 +310,7 @@ export default function ChatHistory(props: ChatHistoryProps) {
|
||||
created: new Date().getTime().toString(),
|
||||
by: "you",
|
||||
automationId: "",
|
||||
uploadedImageData: props.pendingMessage,
|
||||
}}
|
||||
customClassName="fullHistory"
|
||||
borderLeftColor={`${data?.agent.color}-500`}
|
||||
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
Microphone,
|
||||
Notebook,
|
||||
Paperclip,
|
||||
X,
|
||||
Question,
|
||||
Robot,
|
||||
Shapes,
|
||||
@@ -55,6 +56,7 @@ export interface ChatOptions {
|
||||
|
||||
interface ChatInputProps {
|
||||
sendMessage: (message: string) => void;
|
||||
sendImage: (image: string) => void;
|
||||
sendDisabled: boolean;
|
||||
setUploadedFiles?: (files: string[]) => void;
|
||||
conversationId?: string | null;
|
||||
@@ -75,6 +77,9 @@ export default function ChatInputArea(props: ChatInputProps) {
|
||||
const [showLoginPrompt, setShowLoginPrompt] = useState(false);
|
||||
|
||||
const [recording, setRecording] = useState(false);
|
||||
const [imageUploaded, setImageUploaded] = useState(false);
|
||||
const [imagePath, setImagePath] = useState<string | null>(null);
|
||||
const [imageData, setImageData] = useState<string | null>(null);
|
||||
const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder | null>(null);
|
||||
|
||||
const [progressValue, setProgressValue] = useState(0);
|
||||
@@ -97,7 +102,30 @@ export default function ChatInputArea(props: ChatInputProps) {
|
||||
}
|
||||
}, [uploading]);
|
||||
|
||||
useEffect(() => {
|
||||
async function fetchImageData() {
|
||||
if (imagePath) {
|
||||
const response = await fetch(imagePath);
|
||||
const blob = await response.blob();
|
||||
const reader = new FileReader();
|
||||
reader.onload = function () {
|
||||
const base64data = reader.result;
|
||||
setImageData(base64data as string);
|
||||
};
|
||||
reader.readAsDataURL(blob);
|
||||
}
|
||||
setUploading(false);
|
||||
}
|
||||
setUploading(true);
|
||||
fetchImageData();
|
||||
}, [imagePath]);
|
||||
|
||||
function onSendMessage() {
|
||||
if (imageUploaded) {
|
||||
setImageUploaded(false);
|
||||
setImagePath(null);
|
||||
props.sendImage(imageData || "");
|
||||
}
|
||||
if (!message.trim()) return;
|
||||
|
||||
if (!props.isLoggedIn) {
|
||||
@@ -142,6 +170,17 @@ export default function ChatInputArea(props: ChatInputProps) {
|
||||
setShowLoginPrompt(true);
|
||||
return;
|
||||
}
|
||||
// check for image file
|
||||
const image_endings = ["jpg", "jpeg", "png"];
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
const file = files[i];
|
||||
const file_extension = file.name.split(".").pop();
|
||||
if (image_endings.includes(file_extension || "")) {
|
||||
setImageUploaded(true);
|
||||
setImagePath(URL.createObjectURL(file));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
uploadDataForIndexing(
|
||||
files,
|
||||
@@ -287,6 +326,11 @@ export default function ChatInputArea(props: ChatInputProps) {
|
||||
setIsDragAndDropping(false);
|
||||
}
|
||||
|
||||
function removeImageUpload() {
|
||||
setImageUploaded(false);
|
||||
setImagePath(null);
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
{showLoginPrompt && loginRedirectMessage && (
|
||||
@@ -397,11 +441,24 @@ export default function ChatInputArea(props: ChatInputProps) {
|
||||
</div>
|
||||
)}
|
||||
<div
|
||||
className={`${styles.actualInputArea} items-center justify-between dark:bg-neutral-700`}
|
||||
className={`${styles.actualInputArea} items-center justify-between dark:bg-neutral-700 relative`}
|
||||
onDragOver={handleDragOver}
|
||||
onDragLeave={handleDragLeave}
|
||||
onDrop={handleDragAndDropFiles}
|
||||
>
|
||||
{imageUploaded && (
|
||||
<div className="absolute bottom-[80px] left-0 right-0 dark:bg-neutral-700 bg-white pt-5 pb-5 w-full rounded-lg border dark:border-none grid grid-cols-2">
|
||||
<div className="pl-4 pr-4">
|
||||
<img src={imagePath || ""} alt="img" className="w-auto max-h-[100px]" />
|
||||
</div>
|
||||
<div className="pl-4 pr-4">
|
||||
<X
|
||||
className="w-6 h-6 float-right dark:hover:bg-[hsl(var(--background))] hover:bg-neutral-100 rounded-sm"
|
||||
onClick={removeImageUpload}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<input
|
||||
type="file"
|
||||
multiple={true}
|
||||
@@ -427,6 +484,8 @@ export default function ChatInputArea(props: ChatInputProps) {
|
||||
value={message}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter" && !e.shiftKey) {
|
||||
setImageUploaded(false);
|
||||
setImagePath(null);
|
||||
e.preventDefault();
|
||||
onSendMessage();
|
||||
}
|
||||
|
||||
@@ -53,6 +53,11 @@ div.chatMessageContainer h3 img {
|
||||
width: 24px;
|
||||
}
|
||||
|
||||
div.you img {
|
||||
height: 16rem;
|
||||
width: auto;
|
||||
}
|
||||
|
||||
div.you {
|
||||
color: hsla(var(--secondary-foreground));
|
||||
}
|
||||
|
||||
@@ -111,6 +111,7 @@ export interface SingleChatMessage {
|
||||
rawQuery?: string;
|
||||
intent?: Intent;
|
||||
agent?: AgentData;
|
||||
uploadedImageData?: string;
|
||||
}
|
||||
|
||||
export interface StreamMessage {
|
||||
@@ -122,6 +123,7 @@ export interface StreamMessage {
|
||||
rawQuery: string;
|
||||
timestamp: string;
|
||||
agent?: AgentData;
|
||||
uploadedImageData?: string;
|
||||
}
|
||||
|
||||
export interface ChatHistoryData {
|
||||
@@ -203,6 +205,7 @@ interface ChatMessageProps {
|
||||
borderLeftColor?: string;
|
||||
isLastMessage?: boolean;
|
||||
agent?: AgentData;
|
||||
uploadedImageData?: string;
|
||||
}
|
||||
|
||||
interface TrainOfThoughtProps {
|
||||
@@ -273,6 +276,7 @@ export function TrainOfThought(props: TrainOfThoughtProps) {
|
||||
export default function ChatMessage(props: ChatMessageProps) {
|
||||
const [copySuccess, setCopySuccess] = useState<boolean>(false);
|
||||
const [isHovering, setIsHovering] = useState<boolean>(false);
|
||||
const [textRendered, setTextRendered] = useState<string>("");
|
||||
const [markdownRendered, setMarkdownRendered] = useState<string>("");
|
||||
const [isPlaying, setIsPlaying] = useState<boolean>(false);
|
||||
const [interrupted, setInterrupted] = useState<boolean>(false);
|
||||
@@ -322,6 +326,10 @@ export default function ChatMessage(props: ChatMessageProps) {
|
||||
.replace(/\\\[/g, "LEFTBRACKET")
|
||||
.replace(/\\\]/g, "RIGHTBRACKET");
|
||||
|
||||
if (props.chatMessage.uploadedImageData) {
|
||||
message = `\n\n${message}`;
|
||||
}
|
||||
|
||||
if (props.chatMessage.intent && props.chatMessage.intent.type == "text-to-image") {
|
||||
message = ``;
|
||||
} else if (props.chatMessage.intent && props.chatMessage.intent.type == "text-to-image2") {
|
||||
@@ -340,6 +348,9 @@ export default function ChatMessage(props: ChatMessageProps) {
|
||||
message += `\n\n**Inferred Query**\n\n${props.chatMessage.intent["inferred-queries"][0]}`;
|
||||
}
|
||||
|
||||
setTextRendered(message);
|
||||
|
||||
// Render the markdown
|
||||
let markdownRendered = md.render(message);
|
||||
|
||||
// Replace placeholders with LaTeX delimiters
|
||||
@@ -542,7 +553,6 @@ export default function ChatMessage(props: ChatMessageProps) {
|
||||
className={constructClasses(props.chatMessage)}
|
||||
onMouseLeave={(event) => setIsHovering(false)}
|
||||
onMouseEnter={(event) => setIsHovering(true)}
|
||||
onClick={props.chatMessage.by === "khoj" ? (event) => undefined : undefined}
|
||||
>
|
||||
<div className={chatMessageWrapperClasses(props.chatMessage)}>
|
||||
<div
|
||||
@@ -595,7 +605,7 @@ export default function ChatMessage(props: ChatMessageProps) {
|
||||
title="Copy"
|
||||
className={`${styles.copyButton}`}
|
||||
onClick={() => {
|
||||
navigator.clipboard.writeText(props.chatMessage.message);
|
||||
navigator.clipboard.writeText(textRendered);
|
||||
setCopySuccess(true);
|
||||
}}
|
||||
>
|
||||
|
||||
Reference in New Issue
Block a user