Add Vision Support (#889)

# Summary of Changes
* New UI to show preview of image uploads
* ChatML message changes to support gpt-4o vision based responses on images
* AWS S3 image uploads for persistent image context in conversations
* Database changes to have `vision_enabled` option in server admin panel while configuring models
* Render previously uploaded images in the chat history, show uploaded images for pending msgs
* Pass the uploaded_image_url through to subqueries
* Allow image to render upon first message from the homepage
* Add rendering support for images to shared chat as well
* Fix some UI/functionality bugs in the share page
* Convert user attached images for chat to webp format before upload
* Use placeholder to attached image for data source, response mode actors
* Update all clients to call /api/chat as a POST instead of GET request
* Fix copying chat messages with images to clipboard

TLDR; Add vision support for openai models on Khoj via the web UI!

---------

Co-authored-by: sabaimran <narmiabas@gmail.com>
Co-authored-by: Debanjum Singh Solanky <debanjum@gmail.com>
This commit is contained in:
Raghav Tirumale
2024-09-09 17:22:18 -05:00
committed by GitHub
parent b553bba1d8
commit 549686a7a4
33 changed files with 740 additions and 417 deletions

View File

@@ -20,9 +20,9 @@ export default function RootLayout({
httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev;
script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval';
connect-src 'self' https://ipapi.co/json ws://localhost:42110;
connect-src 'self' blob: https://ipapi.co/json ws://localhost:42110;
style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com;
img-src 'self' data: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com;
img-src 'self' data: blob: https://*.khoj.dev https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com;
font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com;
child-src 'none';
object-src 'none';"

View File

@@ -28,16 +28,23 @@ interface ChatBodyDataProps {
isLoggedIn: boolean;
conversationId?: string;
setQueryToProcess: (query: string) => void;
setImage64: (image64: string) => void;
}
function ChatBodyData(props: ChatBodyDataProps) {
const [message, setMessage] = useState("");
const [image, setImage] = useState<string | null>(null);
const [processingMessage, setProcessingMessage] = useState(false);
const [agentMetadata, setAgentMetadata] = useState<AgentData | null>(null);
const setQueryToProcess = props.setQueryToProcess;
const streamedMessages = props.streamedMessages;
useEffect(() => {
if (image) {
props.setImage64(encodeURIComponent(image));
}
}, [image, props.setImage64]);
useEffect(() => {
if (message) {
setProcessingMessage(true);
@@ -74,11 +81,12 @@ function ChatBodyData(props: ChatBodyDataProps) {
/>
</div>
<div
className={`${styles.inputBox} shadow-md bg-background align-middle items-center justify-center px-3`}
className={`${styles.inputBox} p-1 md:px-2 shadow-md bg-background align-middle items-center justify-center dark:bg-neutral-700 dark:border-0 dark:shadow-sm rounded-t-2xl rounded-b-none md:rounded-xl`}
>
<ChatInputArea
isLoggedIn={props.isLoggedIn}
sendMessage={(message) => setMessage(message)}
sendImage={(image) => setImage(image)}
sendDisabled={processingMessage}
chatOptionsData={props.chatOptionsData}
conversationId={props.conversationId}
@@ -101,6 +109,7 @@ export default function SharedChat() {
const [processQuerySignal, setProcessQuerySignal] = useState(false);
const [uploadedFiles, setUploadedFiles] = useState<string[]>([]);
const [paramSlug, setParamSlug] = useState<string | undefined>(undefined);
const [image64, setImage64] = useState<string>("");
const locationData = useIPLocationData();
const authenticatedData = useAuthenticatedData();
@@ -156,6 +165,7 @@ export default function SharedChat() {
completed: false,
timestamp: new Date().toISOString(),
rawQuery: queryToProcess || "",
uploadedImageData: decodeURIComponent(image64),
};
setMessages((prevMessages) => [...prevMessages, newStreamMessage]);
setProcessQuerySignal(true);
@@ -182,6 +192,7 @@ export default function SharedChat() {
if (done) {
setQueryToProcess("");
setProcessQuerySignal(false);
setImage64("");
break;
}
@@ -216,33 +227,21 @@ export default function SharedChat() {
chatAPI += `&region=${locationData.region}&country=${locationData.country}&city=${locationData.city}&timezone=${locationData.timezone}`;
}
const response = await fetch(chatAPI);
const response = await fetch(chatAPI, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: image64 ? JSON.stringify({ image: image64 }) : undefined,
});
try {
await readChatStream(response);
} catch (err) {
console.log(err);
} catch (error) {
console.error(error);
}
}
useEffect(() => {
(async () => {
if (conversationId) {
// Add a new object to the state
const newStreamMessage: StreamMessage = {
rawResponse: "",
trainOfThought: [],
context: [],
onlineContext: {},
completed: false,
timestamp: new Date().toISOString(),
rawQuery: queryToProcess || "",
};
setProcessQuerySignal(true);
setMessages((prevMessages) => [...prevMessages, newStreamMessage]);
}
})();
}, [conversationId, queryToProcess]);
if (isLoading) {
return <Loading />;
}
@@ -275,6 +274,7 @@ export default function SharedChat() {
setTitle={setTitle}
setUploadedFiles={setUploadedFiles}
isMobileWidth={isMobileWidth}
setImage64={setImage64}
/>
</Suspense>
</div>

View File

@@ -12,15 +12,11 @@ div.main {
div.inputBox {
border: 1px solid var(--border-color);
border-radius: 16px;
margin-bottom: 20px;
gap: 12px;
padding-left: 20px;
padding-right: 20px;
align-content: center;
}
input.inputBox {
border: none;
}
@@ -31,7 +27,7 @@ input.inputBox:focus {
}
div.inputBox:focus {
box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
box-shadow: 0px 8px 16px 0px rgba(0, 0, 0, 0.2);
}
div.chatBodyFull {
@@ -94,7 +90,6 @@ div.agentIndicator {
padding: 10px;
}
@media (max-width: 768px) {
div.chatBody {
grid-template-columns: 0fr 1fr;
@@ -124,5 +119,4 @@ div.agentIndicator {
gap: 0;
grid-template-columns: 1fr;
}
}