Add Vision Support (#889)

# Summary of Changes
* New UI to show preview of image uploads
* ChatML message changes to support gpt-4o vision based responses on images
* AWS S3 image uploads for persistent image context in conversations
* Database changes to have `vision_enabled` option in server admin panel while configuring models
* Render previously uploaded images in the chat history, show uploaded images for pending msgs
* Pass the uploaded_image_url through to subqueries
* Allow image to render upon first message from the homepage
* Add rendering support for images to shared chat as well
* Fix some UI/functionality bugs in the share page
* Convert user attached images for chat to webp format before upload
* Use placeholder to attached image for data source, response mode actors
* Update all clients to call /api/chat as a POST instead of GET request
* Fix copying chat messages with images to clipboard

TLDR; Add vision support for openai models on Khoj via the web UI!

---------

Co-authored-by: sabaimran <narmiabas@gmail.com>
Co-authored-by: Debanjum Singh Solanky <debanjum@gmail.com>
This commit is contained in:
Raghav Tirumale
2024-09-09 17:22:18 -05:00
committed by GitHub
parent b553bba1d8
commit 549686a7a4
33 changed files with 740 additions and 417 deletions

View File

@@ -40,9 +40,9 @@ export default function RootLayout({
content="default-src 'self' https://assets.khoj.dev;
media-src * blob:;
script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval';
connect-src 'self' https://ipapi.co/json ws://localhost:42110;
connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110;
style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com;
img-src 'self' data: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com;
img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com;
font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com;
child-src 'none';
object-src 'none';"

View File

@@ -27,12 +27,14 @@ interface ChatBodyDataProps {
setUploadedFiles: (files: string[]) => void;
isMobileWidth?: boolean;
isLoggedIn: boolean;
setImage64: (image64: string) => void;
}
function ChatBodyData(props: ChatBodyDataProps) {
const searchParams = useSearchParams();
const conversationId = searchParams.get("conversationId");
const [message, setMessage] = useState("");
const [image, setImage] = useState<string | null>(null);
const [processingMessage, setProcessingMessage] = useState(false);
const [agentMetadata, setAgentMetadata] = useState<AgentData | null>(null);
@@ -40,6 +42,19 @@ function ChatBodyData(props: ChatBodyDataProps) {
const onConversationIdChange = props.onConversationIdChange;
useEffect(() => {
if (image) {
props.setImage64(encodeURIComponent(image));
}
}, [image, props.setImage64]);
useEffect(() => {
const storedImage = localStorage.getItem("image");
if (storedImage) {
setImage(storedImage);
props.setImage64(encodeURIComponent(storedImage));
localStorage.removeItem("image");
}
const storedMessage = localStorage.getItem("message");
if (storedMessage) {
setProcessingMessage(true);
@@ -95,6 +110,7 @@ function ChatBodyData(props: ChatBodyDataProps) {
agentColor={agentMetadata?.color}
isLoggedIn={props.isLoggedIn}
sendMessage={(message) => setMessage(message)}
sendImage={(image) => setImage(image)}
sendDisabled={processingMessage}
chatOptionsData={props.chatOptionsData}
conversationId={conversationId}
@@ -116,6 +132,7 @@ export default function Chat() {
const [queryToProcess, setQueryToProcess] = useState<string>("");
const [processQuerySignal, setProcessQuerySignal] = useState(false);
const [uploadedFiles, setUploadedFiles] = useState<string[]>([]);
const [image64, setImage64] = useState<string>("");
const locationData = useIPLocationData();
const authenticatedData = useAuthenticatedData();
const isMobileWidth = useIsMobileWidth();
@@ -148,6 +165,7 @@ export default function Chat() {
completed: false,
timestamp: new Date().toISOString(),
rawQuery: queryToProcess || "",
uploadedImageData: decodeURIComponent(image64),
};
setMessages((prevMessages) => [...prevMessages, newStreamMessage]);
setProcessQuerySignal(true);
@@ -178,6 +196,7 @@ export default function Chat() {
if (done) {
setQueryToProcess("");
setProcessQuerySignal(false);
setImage64("");
break;
}
@@ -218,7 +237,14 @@ export default function Chat() {
chatAPI += `&region=${locationData.region}&country=${locationData.country}&city=${locationData.city}&timezone=${locationData.timezone}`;
}
const response = await fetch(chatAPI);
const response = await fetch(chatAPI, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: image64 ? JSON.stringify({ image: image64 }) : undefined,
});
try {
await readChatStream(response);
} catch (err) {
@@ -282,6 +308,7 @@ export default function Chat() {
setUploadedFiles={setUploadedFiles}
isMobileWidth={isMobileWidth}
onConversationIdChange={handleConversationIdChange}
setImage64={setImage64}
/>
</Suspense>
</div>