/** * Subprocess Manager * * Manages background subprocesses for metrics and event collection using Bun.spawn. * Provides crash recovery, graceful shutdown, and IPC message routing. */ import { Subprocess } from 'bun'; import { saveHostMetric, logContainerEvent, type ContainerEventAction } from './db'; import { sendEventNotification, sendEnvironmentNotification } from './notifications'; import { containerEventEmitter } from './event-collector'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; import { existsSync } from 'node:fs'; // Get the directory of this file (works in both Vite and Bun) const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Determine subprocess script paths // In development: src/lib/server/subprocesses/*.ts (via __dirname) // In production: /app/subprocesses/*.js (bundled by scripts/build-subprocesses.ts) function getSubprocessPath(name: string): string { // Production path (Docker container) - bundled JS files const prodPath = `/app/subprocesses/${name}.js`; if (existsSync(prodPath)) { return prodPath; } // Development path (relative to this file) - raw TS files return path.join(__dirname, 'subprocesses', `${name}.ts`); } // IPC Message Types (Subprocess → Main) export interface MetricMessage { type: 'metric'; envId: number; cpu: number; memPercent: number; memUsed: number; memTotal: number; } export interface DiskWarningMessage { type: 'disk_warning'; envId: number; envName: string; message: string; diskPercent?: number; } export interface ContainerEventMessage { type: 'container_event'; event: { environmentId: number; containerId: string; containerName: string | null; image: string | null; action: ContainerEventAction; actorAttributes: Record | null; timestamp: string; }; notification?: { action: ContainerEventAction; title: string; message: string; notificationType: 'success' | 'error' | 'warning' | 'info'; image?: string; }; } export interface EnvStatusMessage { type: 'env_status'; envId: number; envName: string; online: boolean; error?: string; } export interface ReadyMessage { type: 'ready'; } export interface ErrorMessage { type: 'error'; message: string; } export type SubprocessMessage = | MetricMessage | DiskWarningMessage | ContainerEventMessage | EnvStatusMessage | ReadyMessage | ErrorMessage; // IPC Message Types (Main → Subprocess) export interface RefreshEnvironmentsCommand { type: 'refresh_environments'; } export interface ShutdownCommand { type: 'shutdown'; } export type MainProcessCommand = RefreshEnvironmentsCommand | ShutdownCommand; // Subprocess configuration interface SubprocessConfig { name: string; scriptPath: string; restartDelayMs: number; maxRestarts: number; } // Subprocess state interface SubprocessState { process: Subprocess<'ignore', 'inherit', 'inherit'> | null; restartCount: number; lastRestartTime: number; isShuttingDown: boolean; } class SubprocessManager { private metricsState: SubprocessState = { process: null, restartCount: 0, lastRestartTime: 0, isShuttingDown: false }; private eventsState: SubprocessState = { process: null, restartCount: 0, lastRestartTime: 0, isShuttingDown: false }; private readonly metricsConfig: SubprocessConfig = { name: 'metrics-subprocess', scriptPath: getSubprocessPath('metrics-subprocess'), restartDelayMs: 5000, maxRestarts: 10 }; private readonly eventsConfig: SubprocessConfig = { name: 'event-subprocess', scriptPath: getSubprocessPath('event-subprocess'), restartDelayMs: 5000, maxRestarts: 10 }; /** * Start all subprocesses */ async start(): Promise { console.log('[SubprocessManager] Starting background subprocesses...'); await this.startMetricsSubprocess(); await this.startEventsSubprocess(); console.log('[SubprocessManager] All subprocesses started'); } /** * Stop all subprocesses gracefully */ async stop(): Promise { console.log('[SubprocessManager] Stopping background subprocesses...'); this.metricsState.isShuttingDown = true; this.eventsState.isShuttingDown = true; // Send shutdown commands this.sendToMetrics({ type: 'shutdown' }); this.sendToEvents({ type: 'shutdown' }); // Wait a bit for graceful shutdown await new Promise((resolve) => setTimeout(resolve, 1000)); // Force kill if still running if (this.metricsState.process) { this.metricsState.process.kill(); this.metricsState.process = null; } if (this.eventsState.process) { this.eventsState.process.kill(); this.eventsState.process = null; } console.log('[SubprocessManager] All subprocesses stopped'); } /** * Notify subprocesses to refresh their environment list */ refreshEnvironments(): void { this.sendToMetrics({ type: 'refresh_environments' }); this.sendToEvents({ type: 'refresh_environments' }); } /** * Start the metrics collection subprocess */ private async startMetricsSubprocess(): Promise { if (this.metricsState.isShuttingDown) return; try { console.log(`[SubprocessManager] Starting ${this.metricsConfig.name}...`); const proc = Bun.spawn(['bun', 'run', this.metricsConfig.scriptPath], { stdio: ['inherit', 'inherit', 'inherit'], env: { ...process.env, SKIP_MIGRATIONS: '1' }, ipc: (message) => this.handleMetricsMessage(message as SubprocessMessage), onExit: (proc, exitCode, signalCode) => { this.handleMetricsExit(exitCode, signalCode); } }); this.metricsState.process = proc; this.metricsState.restartCount = 0; console.log(`[SubprocessManager] ${this.metricsConfig.name} started (PID: ${proc.pid})`); } catch (error) { const msg = error instanceof Error ? error.message : String(error); console.error(`[SubprocessManager] Failed to start ${this.metricsConfig.name}: ${msg}`); this.scheduleMetricsRestart(); } } /** * Start the event collection subprocess */ private async startEventsSubprocess(): Promise { if (this.eventsState.isShuttingDown) return; try { console.log(`[SubprocessManager] Starting ${this.eventsConfig.name}...`); const proc = Bun.spawn(['bun', 'run', this.eventsConfig.scriptPath], { stdio: ['inherit', 'inherit', 'inherit'], env: { ...process.env, SKIP_MIGRATIONS: '1' }, ipc: (message) => this.handleEventsMessage(message as SubprocessMessage), onExit: (proc, exitCode, signalCode) => { this.handleEventsExit(exitCode, signalCode); } }); this.eventsState.process = proc; this.eventsState.restartCount = 0; console.log(`[SubprocessManager] ${this.eventsConfig.name} started (PID: ${proc.pid})`); } catch (error) { const msg = error instanceof Error ? error.message : String(error); console.error(`[SubprocessManager] Failed to start ${this.eventsConfig.name}: ${msg}`); this.scheduleEventsRestart(); } } /** * Handle IPC messages from metrics subprocess */ private async handleMetricsMessage(message: SubprocessMessage): Promise { try { switch (message.type) { case 'ready': console.log(`[SubprocessManager] ${this.metricsConfig.name} is ready`); break; case 'metric': // Save metric to database await saveHostMetric( message.cpu, message.memPercent, message.memUsed, message.memTotal, message.envId ); break; case 'disk_warning': // Send disk warning notification await sendEventNotification( 'disk_space_warning', { title: message.diskPercent ? 'Disk space warning' : 'High Docker disk usage', message: message.message, type: 'warning' }, message.envId ); break; case 'error': console.error(`[SubprocessManager] ${this.metricsConfig.name} error:`, message.message); break; } } catch (error) { const msg = error instanceof Error ? error.message : String(error); console.error(`[SubprocessManager] Error handling metrics message: ${msg}`); } } /** * Handle IPC messages from events subprocess */ private async handleEventsMessage(message: SubprocessMessage): Promise { try { switch (message.type) { case 'ready': console.log(`[SubprocessManager] ${this.eventsConfig.name} is ready`); break; case 'container_event': // Save event to database const savedEvent = await logContainerEvent(message.event); // Broadcast to SSE clients containerEventEmitter.emit('event', savedEvent); // Send notification if provided if (message.notification) { const { action, title, message: notifMessage, notificationType, image } = message.notification; sendEnvironmentNotification(message.event.environmentId, action, { title, message: notifMessage, type: notificationType }, image).catch((err) => { console.error('[SubprocessManager] Failed to send notification:', err); }); } break; case 'env_status': // Broadcast to dashboard via containerEventEmitter containerEventEmitter.emit('env_status', { envId: message.envId, envName: message.envName, online: message.online, error: message.error }); // Send environment status notification if (message.online) { await sendEventNotification( 'environment_online', { title: 'Environment online', message: `Environment "${message.envName}" is now reachable`, type: 'success' }, message.envId ).catch((err) => { console.error('[SubprocessManager] Failed to send online notification:', err); }); } else { await sendEventNotification( 'environment_offline', { title: 'Environment offline', message: `Environment "${message.envName}" is unreachable${message.error ? `: ${message.error}` : ''}`, type: 'error' }, message.envId ).catch((err) => { console.error('[SubprocessManager] Failed to send offline notification:', err); }); } break; case 'error': console.error(`[SubprocessManager] ${this.eventsConfig.name} error:`, message.message); break; } } catch (error) { const msg = error instanceof Error ? error.message : String(error); console.error(`[SubprocessManager] Error handling events message: ${msg}`); } } /** * Handle metrics subprocess exit */ private handleMetricsExit(exitCode: number | null, signalCode: string | null): void { if (this.metricsState.isShuttingDown) { console.log(`[SubprocessManager] ${this.metricsConfig.name} stopped`); return; } console.error( `[SubprocessManager] ${this.metricsConfig.name} exited unexpectedly (code: ${exitCode}, signal: ${signalCode})` ); this.metricsState.process = null; this.scheduleMetricsRestart(); } /** * Handle events subprocess exit */ private handleEventsExit(exitCode: number | null, signalCode: string | null): void { if (this.eventsState.isShuttingDown) { console.log(`[SubprocessManager] ${this.eventsConfig.name} stopped`); return; } console.error( `[SubprocessManager] ${this.eventsConfig.name} exited unexpectedly (code: ${exitCode}, signal: ${signalCode})` ); this.eventsState.process = null; this.scheduleEventsRestart(); } /** * Schedule metrics subprocess restart with backoff */ private scheduleMetricsRestart(): void { if (this.metricsState.isShuttingDown) return; if (this.metricsState.restartCount >= this.metricsConfig.maxRestarts) { console.error( `[SubprocessManager] ${this.metricsConfig.name} exceeded max restarts (${this.metricsConfig.maxRestarts}), giving up` ); return; } const delay = this.metricsConfig.restartDelayMs * Math.pow(2, this.metricsState.restartCount); this.metricsState.restartCount++; console.log( `[SubprocessManager] Restarting ${this.metricsConfig.name} in ${delay}ms (attempt ${this.metricsState.restartCount}/${this.metricsConfig.maxRestarts})` ); setTimeout(() => { this.startMetricsSubprocess(); }, delay); } /** * Schedule events subprocess restart with backoff */ private scheduleEventsRestart(): void { if (this.eventsState.isShuttingDown) return; if (this.eventsState.restartCount >= this.eventsConfig.maxRestarts) { console.error( `[SubprocessManager] ${this.eventsConfig.name} exceeded max restarts (${this.eventsConfig.maxRestarts}), giving up` ); return; } const delay = this.eventsConfig.restartDelayMs * Math.pow(2, this.eventsState.restartCount); this.eventsState.restartCount++; console.log( `[SubprocessManager] Restarting ${this.eventsConfig.name} in ${delay}ms (attempt ${this.eventsState.restartCount}/${this.eventsConfig.maxRestarts})` ); setTimeout(() => { this.startEventsSubprocess(); }, delay); } /** * Send command to metrics subprocess */ private sendToMetrics(command: MainProcessCommand): void { if (this.metricsState.process) { try { this.metricsState.process.send(command); } catch (error) { const msg = error instanceof Error ? error.message : String(error); console.error(`[SubprocessManager] Failed to send to metrics subprocess: ${msg}`); } } } /** * Send command to events subprocess */ private sendToEvents(command: MainProcessCommand): void { if (this.eventsState.process) { try { this.eventsState.process.send(command); } catch (error) { const msg = error instanceof Error ? error.message : String(error); console.error(`[SubprocessManager] Failed to send to events subprocess: ${msg}`); } } } /** * Get metrics subprocess PID (for HMR cleanup) */ getMetricsPid(): number | null { return this.metricsState.process?.pid ?? null; } /** * Get events subprocess PID (for HMR cleanup) */ getEventsPid(): number | null { return this.eventsState.process?.pid ?? null; } } // Singleton instance let manager: SubprocessManager | null = null; // Store PIDs globally to survive HMR reloads // Using globalThis to persist across module reloads in dev mode const GLOBAL_KEY = '__dockhand_subprocess_pids__'; interface SubprocessPids { metrics: number | null; events: number | null; } function getStoredPids(): SubprocessPids { return (globalThis as any)[GLOBAL_KEY] || { metrics: null, events: null }; } function setStoredPids(pids: SubprocessPids): void { (globalThis as any)[GLOBAL_KEY] = pids; } /** * Kill any orphaned processes from previous HMR reloads */ function killOrphanedProcesses(): void { const pids = getStoredPids(); if (pids.metrics) { try { process.kill(pids.metrics, 'SIGTERM'); console.log(`[SubprocessManager] Killed orphaned metrics process (PID: ${pids.metrics})`); } catch { // Process already dead, ignore } } if (pids.events) { try { process.kill(pids.events, 'SIGTERM'); console.log(`[SubprocessManager] Killed orphaned events process (PID: ${pids.events})`); } catch { // Process already dead, ignore } } setStoredPids({ metrics: null, events: null }); } /** * Start background subprocesses */ export async function startSubprocesses(): Promise { // Kill any orphaned processes from HMR reloads killOrphanedProcesses(); if (manager) { console.warn('[SubprocessManager] Subprocesses already started'); return; } manager = new SubprocessManager(); await manager.start(); // Store PIDs for HMR cleanup setStoredPids({ metrics: manager.getMetricsPid(), events: manager.getEventsPid() }); } /** * Stop background subprocesses */ export async function stopSubprocesses(): Promise { if (manager) { await manager.stop(); manager = null; } setStoredPids({ metrics: null, events: null }); } /** * Notify subprocesses to refresh environments */ export function refreshSubprocessEnvironments(): void { if (manager) { manager.refreshEnvironments(); } }