Render operator train of thought as video on web app to ease viewing

- You can seek through the train of thought video of computer operation or
  follow it in live mode.
- Interleaves video with normal text thoughts.
- Video available of old interactions and currently streaming message.
This commit is contained in:
Debanjum
2025-05-31 04:31:23 -07:00
parent 6821bd38ed
commit 830a1af69e
4 changed files with 503 additions and 17 deletions

View File

@@ -8,7 +8,9 @@ import ChatMessage, {
ChatHistoryData,
StreamMessage,
TrainOfThought,
TrainOfThoughtObject,
} from "../chatMessage/chatMessage";
import TrainOfThoughtVideoPlayer from "../../../components/trainOfThoughtVideoPlayer/trainOfThoughtVideoPlayer";
import { ScrollArea } from "@/components/ui/scroll-area";
@@ -41,17 +43,108 @@ interface ChatHistoryProps {
setIsOwner?: (isOwner: boolean) => void;
}
interface TrainOfThoughtFrame {
text: string;
image?: string;
timestamp: number;
}
interface TrainOfThoughtGroup {
type: 'video' | 'text';
frames?: TrainOfThoughtFrame[];
textEntries?: TrainOfThoughtObject[];
}
interface TrainOfThoughtComponentProps {
trainOfThought: string[];
trainOfThought: string[] | TrainOfThoughtObject[];
lastMessage: boolean;
agentColor: string;
keyId: string;
completed?: boolean;
}
function extractTrainOfThoughtGroups(trainOfThought?: TrainOfThoughtObject[]): TrainOfThoughtGroup[] {
if (!trainOfThought) return [];
const groups: TrainOfThoughtGroup[] = [];
let currentVideoFrames: TrainOfThoughtFrame[] = [];
let currentTextEntries: TrainOfThoughtObject[] = [];
trainOfThought.forEach((thought, index) => {
let text = thought.data;
let hasImage = false;
// Extract screenshot image from the thought data
try {
const jsonMatch = text.match(
/\{.*(\"action\": \"screenshot\"|\"type\": \"screenshot\"|\"image\": \"data:image\/.*\").*\}/,
);
if (jsonMatch) {
const jsonMessage = JSON.parse(jsonMatch[0]);
if (jsonMessage.image) {
hasImage = true;
// Clean up the text to remove the JSON action
text = text.replace(`:\n**Action**: ${jsonMatch[0]}`, "");
if (jsonMessage.text) {
text += `\n\n${jsonMessage.text}`;
}
// If we have accumulated text entries, add them as a text group
if (currentTextEntries.length > 0) {
groups.push({
type: 'text',
textEntries: [...currentTextEntries]
});
currentTextEntries = [];
}
// Add to current video frames
currentVideoFrames.push({
text: text,
image: jsonMessage.image,
timestamp: index,
});
}
}
} catch (e) {
console.error("Failed to parse screenshot data", e);
}
if (!hasImage) {
// If we have accumulated video frames, add them as a video group
if (currentVideoFrames.length > 0) {
groups.push({
type: 'video',
frames: [...currentVideoFrames]
});
currentVideoFrames = [];
}
// Add to current text entries
currentTextEntries.push(thought);
}
});
// Add any remaining frames/entries
if (currentVideoFrames.length > 0) {
groups.push({
type: 'video',
frames: currentVideoFrames
});
}
if (currentTextEntries.length > 0) {
groups.push({
type: 'text',
textEntries: currentTextEntries
});
}
return groups;
}
function TrainOfThoughtComponent(props: TrainOfThoughtComponentProps) {
const lastIndex = props.trainOfThought.length - 1;
const [collapsed, setCollapsed] = useState(props.completed);
const [trainOfThoughtGroups, setTrainOfThoughtGroups] = useState<TrainOfThoughtGroup[]>([]);
const variants = {
open: {
@@ -72,6 +165,29 @@ function TrainOfThoughtComponent(props: TrainOfThoughtComponentProps) {
}
}, [props.completed]);
useEffect(() => {
// Handle empty array case
if (!props.trainOfThought || props.trainOfThought.length === 0) {
setTrainOfThoughtGroups([]);
return;
}
// Convert string array to TrainOfThoughtObject array if needed
let trainOfThoughtObjects: TrainOfThoughtObject[];
if (typeof props.trainOfThought[0] === 'string') {
trainOfThoughtObjects = (props.trainOfThought as string[]).map((data, index) => ({
type: 'text',
data: data
}));
} else {
trainOfThoughtObjects = props.trainOfThought as TrainOfThoughtObject[];
}
const groups = extractTrainOfThoughtGroups(trainOfThoughtObjects);
setTrainOfThoughtGroups(groups);
}, [props.trainOfThought]);
return (
<div
className={`${!collapsed ? styles.trainOfThought + " border" : ""} rounded-lg`}
@@ -101,15 +217,31 @@ function TrainOfThoughtComponent(props: TrainOfThoughtComponentProps) {
<AnimatePresence initial={false}>
{!collapsed && (
<motion.div initial="closed" animate="open" exit="closed" variants={variants}>
{props.trainOfThought.map((train, index) => (
<TrainOfThought
key={`train-${index}`}
message={train}
primary={
index === lastIndex && props.lastMessage && !props.completed
}
agentColor={props.agentColor}
/>
{trainOfThoughtGroups.map((group, groupIndex) => (
<div key={`train-group-${groupIndex}`}>
{group.type === 'video' && group.frames && group.frames.length > 0 && (
<TrainOfThoughtVideoPlayer
frames={group.frames}
autoPlay={false}
playbackSpeed={1500}
/>
)}
{group.type === 'text' && group.textEntries && group.textEntries.map((entry, entryIndex) => {
const lastIndex = trainOfThoughtGroups.length - 1;
const isLastGroup = groupIndex === lastIndex;
const isLastEntry = entryIndex === group.textEntries!.length - 1;
const isPrimaryEntry = isLastGroup && isLastEntry && props.lastMessage && !props.completed;
return (
<TrainOfThought
key={`train-text-${groupIndex}-${entryIndex}-${entry.data.length}`}
message={entry.data}
primary={isPrimaryEntry}
agentColor={props.agentColor}
/>
);
})}
</div>
))}
</motion.div>
)}
@@ -401,9 +533,7 @@ export default function ChatHistory(props: ChatHistoryProps) {
<React.Fragment key={`chatMessage-${index}`}>
{chatMessage.trainOfThought && chatMessage.by === "khoj" && (
<TrainOfThoughtComponent
trainOfThought={chatMessage.trainOfThought?.map(
(train) => train.data,
)}
trainOfThought={chatMessage.trainOfThought}
lastMessage={false}
agentColor={data?.agent?.color || "orange"}
key={`${index}trainOfThought`}
@@ -462,12 +592,12 @@ export default function ChatHistory(props: ChatHistoryProps) {
conversationId={props.conversationId}
turnId={messageTurnId}
/>
{message.trainOfThought && (
{message.trainOfThought && message.trainOfThought.length > 0 && (
<TrainOfThoughtComponent
trainOfThought={message.trainOfThought}
lastMessage={index === incompleteIncomingMessageIndex}
agentColor={data?.agent?.color || "orange"}
key={`${index}trainOfThought`}
key={`${index}trainOfThought-${message.trainOfThought.length}-${message.trainOfThought.map(t => t.length).join('-')}`}
keyId={`${index}trainOfThought`}
completed={message.completed}
/>

View File

@@ -144,7 +144,7 @@ interface Intent {
"inferred-queries": string[];
}
interface TrainOfThoughtObject {
export interface TrainOfThoughtObject {
type: string;
data: string;
}

View File

@@ -0,0 +1,170 @@
.videoPlayer {
border: 1px solid hsl(var(--border));
border-radius: 8px;
background-color: hsl(var(--background));
margin: 16px 0;
overflow: hidden;
}
.screen {
position: relative;
background-color: hsl(var(--muted));
min-height: 300px;
display: flex;
align-items: center;
justify-content: center;
}
.screenImage {
max-width: 100%;
max-height: 400px;
object-fit: contain;
border-radius: 4px;
}
.textOverlay {
position: absolute;
bottom: 0;
left: 0;
right: 0;
background: linear-gradient(transparent, rgba(0, 0, 0, 0.8));
padding: 20px 16px 12px;
color: white;
}
.thoughtText {
font-size: 14px;
line-height: 1.4;
max-height: 100px;
overflow-y: auto;
}
.controls {
padding: 12px 16px;
background-color: hsl(var(--card));
border-top: 1px solid hsl(var(--border));
}
.timeline {
position: relative;
margin-bottom: 12px;
}
.timelineSlider {
width: 100%;
height: 4px;
background-color: hsl(var(--muted));
border-radius: 2px;
outline: none;
cursor: pointer;
-webkit-appearance: none;
appearance: none;
}
.timelineSlider::-webkit-slider-thumb {
-webkit-appearance: none;
appearance: none;
width: 16px;
height: 16px;
border-radius: 50%;
background-color: hsl(var(--primary));
cursor: pointer;
border: 2px solid white;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
}
.timelineSlider::-moz-range-thumb {
width: 16px;
height: 16px;
border-radius: 50%;
background-color: hsl(var(--primary));
cursor: pointer;
border: 2px solid white;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
}
.frameMarkers {
position: absolute;
top: -2px;
left: 0;
right: 0;
height: 8px;
display: flex;
justify-content: space-between;
pointer-events: none;
}
.frameMarker {
width: 6px;
height: 8px;
border-radius: 1px;
cursor: pointer;
pointer-events: auto;
transition: all 0.2s ease;
}
.frameMarker.hasImage {
background-color: hsl(var(--primary));
}
.frameMarker.textOnly {
background-color: hsl(var(--muted-foreground));
}
.frameMarker.active {
background-color: hsl(var(--accent)) !important;
transform: scaleY(1.5);
}
.frameMarker:hover {
transform: scaleY(1.2);
}
.controlButtons {
display: flex;
align-items: center;
gap: 8px;
margin-bottom: 8px;
}
.controlButton {
display: flex;
align-items: center;
justify-content: center;
width: 32px;
height: 32px;
border: 1px solid hsl(var(--border));
border-radius: 4px;
background-color: hsl(var(--background));
color: hsl(var(--foreground));
cursor: pointer;
transition: all 0.2s ease;
}
.controlButton:hover:not(:disabled) {
background-color: hsl(var(--muted));
}
.controlButton:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.controlButton.active {
background-color: hsl(var(--primary));
color: hsl(var(--primary-foreground));
}
.frameInfo {
display: flex;
justify-content: center;
font-size: 12px;
color: hsl(var(--muted-foreground));
}
/* Dark mode adjustments */
@media (prefers-color-scheme: dark) {
.textOverlay {
background: linear-gradient(transparent, rgba(0, 0, 0, 0.9));
}
}

View File

@@ -0,0 +1,186 @@
"use client";
import React, { useState, useRef, useEffect } from "react";
import { Play, Pause, FastForward, Rewind } from "@phosphor-icons/react";
import styles from "./trainOfThoughtVideoPlayer.module.css";
interface TrainOfThoughtFrame {
text: string;
image?: string;
timestamp: number;
}
interface TrainOfThoughtVideoPlayerProps {
frames: TrainOfThoughtFrame[];
autoPlay?: boolean;
playbackSpeed?: number;
}
export default function TrainOfThoughtVideoPlayer({
frames,
autoPlay = true,
playbackSpeed = 1000, // ms per frame
}: TrainOfThoughtVideoPlayerProps) {
const [currentFrameIndex, setCurrentFrameIndex] = useState(0);
const [isPlaying, setIsPlaying] = useState(autoPlay);
const [isAutoTracking, setIsAutoTracking] = useState(true);
const intervalRef = useRef<NodeJS.Timeout | null>(null);
// Auto-advance to latest frame when new frames are added
useEffect(() => {
if (isAutoTracking && frames.length > 0) {
setCurrentFrameIndex(frames.length - 1);
}
}, [frames.length, isAutoTracking]);
// Handle playback
useEffect(() => {
if (isPlaying && frames.length > 1) {
intervalRef.current = setInterval(() => {
setCurrentFrameIndex((prev) => {
const next = prev + 1;
if (next >= frames.length) {
setIsPlaying(false);
return prev;
}
return next;
});
}, playbackSpeed);
} else {
if (intervalRef.current) {
clearInterval(intervalRef.current);
intervalRef.current = null;
}
}
return () => {
if (intervalRef.current) {
clearInterval(intervalRef.current);
}
};
}, [isPlaying, frames.length, playbackSpeed]);
const currentFrame = frames[currentFrameIndex];
const handleSeek = (index: number) => {
setCurrentFrameIndex(index);
setIsAutoTracking(false);
setIsPlaying(false);
};
const handlePlay = () => {
setIsPlaying(!isPlaying);
setIsAutoTracking(false);
};
const handlePrevious = () => {
if (currentFrameIndex > 0) {
setCurrentFrameIndex(currentFrameIndex - 1);
setIsAutoTracking(false);
setIsPlaying(false);
}
};
const handleNext = () => {
if (currentFrameIndex < frames.length - 1) {
setCurrentFrameIndex(currentFrameIndex + 1);
setIsAutoTracking(false);
setIsPlaying(false);
}
};
const handleAutoTrack = () => {
setIsAutoTracking(true);
setCurrentFrameIndex(frames.length - 1);
setIsPlaying(false);
};
if (!frames.length) {
return null;
}
return (
<div className={styles.videoPlayer}>
<div className={styles.screen}>
{currentFrame?.image && (
<img
src={currentFrame.image}
alt={`Train of thought frame ${currentFrameIndex + 1}`}
className={styles.screenImage}
/>
)}
<div className={styles.textOverlay}>
<div className={styles.thoughtText}>{currentFrame?.text}</div>
</div>
</div>
<div className={styles.controls}>
<div className={styles.timeline}>
<input
type="range"
min={0}
max={Math.max(0, frames.length - 1)}
value={currentFrameIndex}
onChange={(e) => handleSeek(parseInt(e.target.value))}
className={styles.timelineSlider}
/>
<div className={styles.frameMarkers}>
{frames.map((frame, index) => (
<div
key={index}
className={`${styles.frameMarker} ${
frame.image ? styles.hasImage : styles.textOnly
} ${index === currentFrameIndex ? styles.active : ""}`}
onClick={() => handleSeek(index)}
title={`Frame ${index + 1}: ${frame.text.slice(0, 50)}...`}
/>
))}
</div>
</div>
<div className={styles.controlButtons}>
<button
onClick={handlePrevious}
disabled={currentFrameIndex === 0}
title="Previous frame"
className={styles.controlButton}
>
<Rewind size={16} />
</button>
<button
onClick={handlePlay}
disabled={frames.length <= 1}
title={isPlaying ? "Pause" : "Play"}
className={styles.controlButton}
>
{isPlaying ? <Pause size={16} /> : <Play size={16} />}
</button>
<button
onClick={handleNext}
disabled={currentFrameIndex === frames.length - 1}
title="Next frame"
className={styles.controlButton}
>
<FastForward size={16} />
</button>
<button
onClick={handleAutoTrack}
className={`${styles.controlButton} ${isAutoTracking ? styles.active : ""}`}
title="Auto-track latest"
>
Live
</button>
</div>
<div className={styles.frameInfo}>
<span>
{currentFrameIndex + 1} / {frames.length}
</span>
</div>
</div>
</div>
);
}