diff --git a/src/interface/web/app/components/chatHistory/chatHistory.tsx b/src/interface/web/app/components/chatHistory/chatHistory.tsx index 711f5a98..94f12650 100644 --- a/src/interface/web/app/components/chatHistory/chatHistory.tsx +++ b/src/interface/web/app/components/chatHistory/chatHistory.tsx @@ -8,7 +8,9 @@ import ChatMessage, { ChatHistoryData, StreamMessage, TrainOfThought, + TrainOfThoughtObject, } from "../chatMessage/chatMessage"; +import TrainOfThoughtVideoPlayer from "../../../components/trainOfThoughtVideoPlayer/trainOfThoughtVideoPlayer"; import { ScrollArea } from "@/components/ui/scroll-area"; @@ -41,17 +43,108 @@ interface ChatHistoryProps { setIsOwner?: (isOwner: boolean) => void; } +interface TrainOfThoughtFrame { + text: string; + image?: string; + timestamp: number; +} + +interface TrainOfThoughtGroup { + type: 'video' | 'text'; + frames?: TrainOfThoughtFrame[]; + textEntries?: TrainOfThoughtObject[]; +} + interface TrainOfThoughtComponentProps { - trainOfThought: string[]; + trainOfThought: string[] | TrainOfThoughtObject[]; lastMessage: boolean; agentColor: string; keyId: string; completed?: boolean; } +function extractTrainOfThoughtGroups(trainOfThought?: TrainOfThoughtObject[]): TrainOfThoughtGroup[] { + if (!trainOfThought) return []; + + const groups: TrainOfThoughtGroup[] = []; + let currentVideoFrames: TrainOfThoughtFrame[] = []; + let currentTextEntries: TrainOfThoughtObject[] = []; + + trainOfThought.forEach((thought, index) => { + let text = thought.data; + let hasImage = false; + + // Extract screenshot image from the thought data + try { + const jsonMatch = text.match( + /\{.*(\"action\": \"screenshot\"|\"type\": \"screenshot\"|\"image\": \"data:image\/.*\").*\}/, + ); + if (jsonMatch) { + const jsonMessage = JSON.parse(jsonMatch[0]); + if (jsonMessage.image) { + hasImage = true; + // Clean up the text to remove the JSON action + text = text.replace(`:\n**Action**: ${jsonMatch[0]}`, ""); + if (jsonMessage.text) { + text += `\n\n${jsonMessage.text}`; + } + + // If we have accumulated text entries, add them as a text group + if (currentTextEntries.length > 0) { + groups.push({ + type: 'text', + textEntries: [...currentTextEntries] + }); + currentTextEntries = []; + } + + // Add to current video frames + currentVideoFrames.push({ + text: text, + image: jsonMessage.image, + timestamp: index, + }); + } + } + } catch (e) { + console.error("Failed to parse screenshot data", e); + } + + if (!hasImage) { + // If we have accumulated video frames, add them as a video group + if (currentVideoFrames.length > 0) { + groups.push({ + type: 'video', + frames: [...currentVideoFrames] + }); + currentVideoFrames = []; + } + + // Add to current text entries + currentTextEntries.push(thought); + } + }); + + // Add any remaining frames/entries + if (currentVideoFrames.length > 0) { + groups.push({ + type: 'video', + frames: currentVideoFrames + }); + } + if (currentTextEntries.length > 0) { + groups.push({ + type: 'text', + textEntries: currentTextEntries + }); + } + + return groups; +} + function TrainOfThoughtComponent(props: TrainOfThoughtComponentProps) { - const lastIndex = props.trainOfThought.length - 1; const [collapsed, setCollapsed] = useState(props.completed); + const [trainOfThoughtGroups, setTrainOfThoughtGroups] = useState([]); const variants = { open: { @@ -72,6 +165,29 @@ function TrainOfThoughtComponent(props: TrainOfThoughtComponentProps) { } }, [props.completed]); + useEffect(() => { + // Handle empty array case + if (!props.trainOfThought || props.trainOfThought.length === 0) { + setTrainOfThoughtGroups([]); + return; + } + + // Convert string array to TrainOfThoughtObject array if needed + let trainOfThoughtObjects: TrainOfThoughtObject[]; + + if (typeof props.trainOfThought[0] === 'string') { + trainOfThoughtObjects = (props.trainOfThought as string[]).map((data, index) => ({ + type: 'text', + data: data + })); + } else { + trainOfThoughtObjects = props.trainOfThought as TrainOfThoughtObject[]; + } + + const groups = extractTrainOfThoughtGroups(trainOfThoughtObjects); + setTrainOfThoughtGroups(groups); + }, [props.trainOfThought]); + return (
{!collapsed && ( - {props.trainOfThought.map((train, index) => ( - + {trainOfThoughtGroups.map((group, groupIndex) => ( +
+ {group.type === 'video' && group.frames && group.frames.length > 0 && ( + + )} + {group.type === 'text' && group.textEntries && group.textEntries.map((entry, entryIndex) => { + const lastIndex = trainOfThoughtGroups.length - 1; + const isLastGroup = groupIndex === lastIndex; + const isLastEntry = entryIndex === group.textEntries!.length - 1; + const isPrimaryEntry = isLastGroup && isLastEntry && props.lastMessage && !props.completed; + + return ( + + ); + })} +
))}
)} @@ -401,9 +533,7 @@ export default function ChatHistory(props: ChatHistoryProps) { {chatMessage.trainOfThought && chatMessage.by === "khoj" && ( train.data, - )} + trainOfThought={chatMessage.trainOfThought} lastMessage={false} agentColor={data?.agent?.color || "orange"} key={`${index}trainOfThought`} @@ -462,12 +592,12 @@ export default function ChatHistory(props: ChatHistoryProps) { conversationId={props.conversationId} turnId={messageTurnId} /> - {message.trainOfThought && ( + {message.trainOfThought && message.trainOfThought.length > 0 && ( t.length).join('-')}`} keyId={`${index}trainOfThought`} completed={message.completed} /> diff --git a/src/interface/web/app/components/chatMessage/chatMessage.tsx b/src/interface/web/app/components/chatMessage/chatMessage.tsx index 2e4c600d..f34cf11d 100644 --- a/src/interface/web/app/components/chatMessage/chatMessage.tsx +++ b/src/interface/web/app/components/chatMessage/chatMessage.tsx @@ -144,7 +144,7 @@ interface Intent { "inferred-queries": string[]; } -interface TrainOfThoughtObject { +export interface TrainOfThoughtObject { type: string; data: string; } diff --git a/src/interface/web/components/trainOfThoughtVideoPlayer/trainOfThoughtVideoPlayer.module.css b/src/interface/web/components/trainOfThoughtVideoPlayer/trainOfThoughtVideoPlayer.module.css new file mode 100644 index 00000000..cfa9312d --- /dev/null +++ b/src/interface/web/components/trainOfThoughtVideoPlayer/trainOfThoughtVideoPlayer.module.css @@ -0,0 +1,170 @@ +.videoPlayer { + border: 1px solid hsl(var(--border)); + border-radius: 8px; + background-color: hsl(var(--background)); + margin: 16px 0; + overflow: hidden; +} + +.screen { + position: relative; + background-color: hsl(var(--muted)); + min-height: 300px; + display: flex; + align-items: center; + justify-content: center; +} + +.screenImage { + max-width: 100%; + max-height: 400px; + object-fit: contain; + border-radius: 4px; +} + +.textOverlay { + position: absolute; + bottom: 0; + left: 0; + right: 0; + background: linear-gradient(transparent, rgba(0, 0, 0, 0.8)); + padding: 20px 16px 12px; + color: white; +} + +.thoughtText { + font-size: 14px; + line-height: 1.4; + max-height: 100px; + overflow-y: auto; +} + +.controls { + padding: 12px 16px; + background-color: hsl(var(--card)); + border-top: 1px solid hsl(var(--border)); +} + +.timeline { + position: relative; + margin-bottom: 12px; +} + +.timelineSlider { + width: 100%; + height: 4px; + background-color: hsl(var(--muted)); + border-radius: 2px; + outline: none; + cursor: pointer; + -webkit-appearance: none; + appearance: none; +} + +.timelineSlider::-webkit-slider-thumb { + -webkit-appearance: none; + appearance: none; + width: 16px; + height: 16px; + border-radius: 50%; + background-color: hsl(var(--primary)); + cursor: pointer; + border: 2px solid white; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2); +} + +.timelineSlider::-moz-range-thumb { + width: 16px; + height: 16px; + border-radius: 50%; + background-color: hsl(var(--primary)); + cursor: pointer; + border: 2px solid white; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2); +} + +.frameMarkers { + position: absolute; + top: -2px; + left: 0; + right: 0; + height: 8px; + display: flex; + justify-content: space-between; + pointer-events: none; +} + +.frameMarker { + width: 6px; + height: 8px; + border-radius: 1px; + cursor: pointer; + pointer-events: auto; + transition: all 0.2s ease; +} + +.frameMarker.hasImage { + background-color: hsl(var(--primary)); +} + +.frameMarker.textOnly { + background-color: hsl(var(--muted-foreground)); +} + +.frameMarker.active { + background-color: hsl(var(--accent)) !important; + transform: scaleY(1.5); +} + +.frameMarker:hover { + transform: scaleY(1.2); +} + +.controlButtons { + display: flex; + align-items: center; + gap: 8px; + margin-bottom: 8px; +} + +.controlButton { + display: flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + border: 1px solid hsl(var(--border)); + border-radius: 4px; + background-color: hsl(var(--background)); + color: hsl(var(--foreground)); + cursor: pointer; + transition: all 0.2s ease; +} + +.controlButton:hover:not(:disabled) { + background-color: hsl(var(--muted)); +} + +.controlButton:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.controlButton.active { + background-color: hsl(var(--primary)); + color: hsl(var(--primary-foreground)); +} + +.frameInfo { + display: flex; + justify-content: center; + font-size: 12px; + color: hsl(var(--muted-foreground)); +} + +/* Dark mode adjustments */ +@media (prefers-color-scheme: dark) { + .textOverlay { + background: linear-gradient(transparent, rgba(0, 0, 0, 0.9)); + } +} diff --git a/src/interface/web/components/trainOfThoughtVideoPlayer/trainOfThoughtVideoPlayer.tsx b/src/interface/web/components/trainOfThoughtVideoPlayer/trainOfThoughtVideoPlayer.tsx new file mode 100644 index 00000000..4eeb6ed1 --- /dev/null +++ b/src/interface/web/components/trainOfThoughtVideoPlayer/trainOfThoughtVideoPlayer.tsx @@ -0,0 +1,186 @@ +"use client"; + +import React, { useState, useRef, useEffect } from "react"; +import { Play, Pause, FastForward, Rewind } from "@phosphor-icons/react"; +import styles from "./trainOfThoughtVideoPlayer.module.css"; + +interface TrainOfThoughtFrame { + text: string; + image?: string; + timestamp: number; +} + +interface TrainOfThoughtVideoPlayerProps { + frames: TrainOfThoughtFrame[]; + autoPlay?: boolean; + playbackSpeed?: number; +} + +export default function TrainOfThoughtVideoPlayer({ + frames, + autoPlay = true, + playbackSpeed = 1000, // ms per frame +}: TrainOfThoughtVideoPlayerProps) { + const [currentFrameIndex, setCurrentFrameIndex] = useState(0); + const [isPlaying, setIsPlaying] = useState(autoPlay); + const [isAutoTracking, setIsAutoTracking] = useState(true); + const intervalRef = useRef(null); + + // Auto-advance to latest frame when new frames are added + useEffect(() => { + if (isAutoTracking && frames.length > 0) { + setCurrentFrameIndex(frames.length - 1); + } + }, [frames.length, isAutoTracking]); + + // Handle playback + useEffect(() => { + if (isPlaying && frames.length > 1) { + intervalRef.current = setInterval(() => { + setCurrentFrameIndex((prev) => { + const next = prev + 1; + if (next >= frames.length) { + setIsPlaying(false); + return prev; + } + return next; + }); + }, playbackSpeed); + } else { + if (intervalRef.current) { + clearInterval(intervalRef.current); + intervalRef.current = null; + } + } + + return () => { + if (intervalRef.current) { + clearInterval(intervalRef.current); + } + }; + }, [isPlaying, frames.length, playbackSpeed]); + + const currentFrame = frames[currentFrameIndex]; + + const handleSeek = (index: number) => { + setCurrentFrameIndex(index); + setIsAutoTracking(false); + setIsPlaying(false); + }; + + const handlePlay = () => { + setIsPlaying(!isPlaying); + setIsAutoTracking(false); + }; + + const handlePrevious = () => { + if (currentFrameIndex > 0) { + setCurrentFrameIndex(currentFrameIndex - 1); + setIsAutoTracking(false); + setIsPlaying(false); + } + }; + + const handleNext = () => { + if (currentFrameIndex < frames.length - 1) { + setCurrentFrameIndex(currentFrameIndex + 1); + setIsAutoTracking(false); + setIsPlaying(false); + } + }; + + const handleAutoTrack = () => { + setIsAutoTracking(true); + setCurrentFrameIndex(frames.length - 1); + setIsPlaying(false); + }; + + if (!frames.length) { + return null; + } + + return ( +
+
+ {currentFrame?.image && ( + {`Train + )} +
+
{currentFrame?.text}
+
+
+ +
+
+ handleSeek(parseInt(e.target.value))} + className={styles.timelineSlider} + /> +
+ {frames.map((frame, index) => ( +
handleSeek(index)} + title={`Frame ${index + 1}: ${frame.text.slice(0, 50)}...`} + /> + ))} +
+
+ +
+ + + + + + + +
+ +
+ + {currentFrameIndex + 1} / {frames.length} + +
+
+
+ ); +}