mirror of
https://github.com/khoaliber/dockhand.git
synced 2026-03-04 13:19:57 +00:00
Initial commit
This commit is contained in:
593
lib/server/subprocess-manager.ts
Normal file
593
lib/server/subprocess-manager.ts
Normal file
@@ -0,0 +1,593 @@
|
||||
/**
|
||||
* Subprocess Manager
|
||||
*
|
||||
* Manages background subprocesses for metrics and event collection using Bun.spawn.
|
||||
* Provides crash recovery, graceful shutdown, and IPC message routing.
|
||||
*/
|
||||
|
||||
import { Subprocess } from 'bun';
|
||||
import { saveHostMetric, logContainerEvent, type ContainerEventAction } from './db';
|
||||
import { sendEventNotification, sendEnvironmentNotification } from './notifications';
|
||||
import { containerEventEmitter } from './event-collector';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { existsSync } from 'node:fs';
|
||||
|
||||
// Get the directory of this file (works in both Vite and Bun)
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
// Determine subprocess script paths
|
||||
// In development: src/lib/server/subprocesses/*.ts (via __dirname)
|
||||
// In production: /app/subprocesses/*.js (bundled by scripts/build-subprocesses.ts)
|
||||
function getSubprocessPath(name: string): string {
|
||||
// Production path (Docker container) - bundled JS files
|
||||
const prodPath = `/app/subprocesses/${name}.js`;
|
||||
if (existsSync(prodPath)) {
|
||||
return prodPath;
|
||||
}
|
||||
// Development path (relative to this file) - raw TS files
|
||||
return path.join(__dirname, 'subprocesses', `${name}.ts`);
|
||||
}
|
||||
|
||||
// IPC Message Types (Subprocess → Main)
|
||||
export interface MetricMessage {
|
||||
type: 'metric';
|
||||
envId: number;
|
||||
cpu: number;
|
||||
memPercent: number;
|
||||
memUsed: number;
|
||||
memTotal: number;
|
||||
}
|
||||
|
||||
export interface DiskWarningMessage {
|
||||
type: 'disk_warning';
|
||||
envId: number;
|
||||
envName: string;
|
||||
message: string;
|
||||
diskPercent?: number;
|
||||
}
|
||||
|
||||
export interface ContainerEventMessage {
|
||||
type: 'container_event';
|
||||
event: {
|
||||
environmentId: number;
|
||||
containerId: string;
|
||||
containerName: string | null;
|
||||
image: string | null;
|
||||
action: ContainerEventAction;
|
||||
actorAttributes: Record<string, string> | null;
|
||||
timestamp: string;
|
||||
};
|
||||
notification?: {
|
||||
action: ContainerEventAction;
|
||||
title: string;
|
||||
message: string;
|
||||
notificationType: 'success' | 'error' | 'warning' | 'info';
|
||||
image?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface EnvStatusMessage {
|
||||
type: 'env_status';
|
||||
envId: number;
|
||||
envName: string;
|
||||
online: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface ReadyMessage {
|
||||
type: 'ready';
|
||||
}
|
||||
|
||||
export interface ErrorMessage {
|
||||
type: 'error';
|
||||
message: string;
|
||||
}
|
||||
|
||||
export type SubprocessMessage =
|
||||
| MetricMessage
|
||||
| DiskWarningMessage
|
||||
| ContainerEventMessage
|
||||
| EnvStatusMessage
|
||||
| ReadyMessage
|
||||
| ErrorMessage;
|
||||
|
||||
// IPC Message Types (Main → Subprocess)
|
||||
export interface RefreshEnvironmentsCommand {
|
||||
type: 'refresh_environments';
|
||||
}
|
||||
|
||||
export interface ShutdownCommand {
|
||||
type: 'shutdown';
|
||||
}
|
||||
|
||||
export type MainProcessCommand = RefreshEnvironmentsCommand | ShutdownCommand;
|
||||
|
||||
// Subprocess configuration
|
||||
interface SubprocessConfig {
|
||||
name: string;
|
||||
scriptPath: string;
|
||||
restartDelayMs: number;
|
||||
maxRestarts: number;
|
||||
}
|
||||
|
||||
// Subprocess state
|
||||
interface SubprocessState {
|
||||
process: Subprocess<'ignore', 'inherit', 'inherit'> | null;
|
||||
restartCount: number;
|
||||
lastRestartTime: number;
|
||||
isShuttingDown: boolean;
|
||||
}
|
||||
|
||||
class SubprocessManager {
|
||||
private metricsState: SubprocessState = {
|
||||
process: null,
|
||||
restartCount: 0,
|
||||
lastRestartTime: 0,
|
||||
isShuttingDown: false
|
||||
};
|
||||
|
||||
private eventsState: SubprocessState = {
|
||||
process: null,
|
||||
restartCount: 0,
|
||||
lastRestartTime: 0,
|
||||
isShuttingDown: false
|
||||
};
|
||||
|
||||
private readonly metricsConfig: SubprocessConfig = {
|
||||
name: 'metrics-subprocess',
|
||||
scriptPath: getSubprocessPath('metrics-subprocess'),
|
||||
restartDelayMs: 5000,
|
||||
maxRestarts: 10
|
||||
};
|
||||
|
||||
private readonly eventsConfig: SubprocessConfig = {
|
||||
name: 'event-subprocess',
|
||||
scriptPath: getSubprocessPath('event-subprocess'),
|
||||
restartDelayMs: 5000,
|
||||
maxRestarts: 10
|
||||
};
|
||||
|
||||
/**
|
||||
* Start all subprocesses
|
||||
*/
|
||||
async start(): Promise<void> {
|
||||
console.log('[SubprocessManager] Starting background subprocesses...');
|
||||
|
||||
await this.startMetricsSubprocess();
|
||||
await this.startEventsSubprocess();
|
||||
|
||||
console.log('[SubprocessManager] All subprocesses started');
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop all subprocesses gracefully
|
||||
*/
|
||||
async stop(): Promise<void> {
|
||||
console.log('[SubprocessManager] Stopping background subprocesses...');
|
||||
|
||||
this.metricsState.isShuttingDown = true;
|
||||
this.eventsState.isShuttingDown = true;
|
||||
|
||||
// Send shutdown commands
|
||||
this.sendToMetrics({ type: 'shutdown' });
|
||||
this.sendToEvents({ type: 'shutdown' });
|
||||
|
||||
// Wait a bit for graceful shutdown
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
|
||||
// Force kill if still running
|
||||
if (this.metricsState.process) {
|
||||
this.metricsState.process.kill();
|
||||
this.metricsState.process = null;
|
||||
}
|
||||
if (this.eventsState.process) {
|
||||
this.eventsState.process.kill();
|
||||
this.eventsState.process = null;
|
||||
}
|
||||
|
||||
console.log('[SubprocessManager] All subprocesses stopped');
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify subprocesses to refresh their environment list
|
||||
*/
|
||||
refreshEnvironments(): void {
|
||||
this.sendToMetrics({ type: 'refresh_environments' });
|
||||
this.sendToEvents({ type: 'refresh_environments' });
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the metrics collection subprocess
|
||||
*/
|
||||
private async startMetricsSubprocess(): Promise<void> {
|
||||
if (this.metricsState.isShuttingDown) return;
|
||||
|
||||
try {
|
||||
console.log(`[SubprocessManager] Starting ${this.metricsConfig.name}...`);
|
||||
|
||||
const proc = Bun.spawn(['bun', 'run', this.metricsConfig.scriptPath], {
|
||||
stdio: ['inherit', 'inherit', 'inherit'],
|
||||
env: { ...process.env, SKIP_MIGRATIONS: '1' },
|
||||
ipc: (message) => this.handleMetricsMessage(message as SubprocessMessage),
|
||||
onExit: (proc, exitCode, signalCode) => {
|
||||
this.handleMetricsExit(exitCode, signalCode);
|
||||
}
|
||||
});
|
||||
|
||||
this.metricsState.process = proc;
|
||||
this.metricsState.restartCount = 0;
|
||||
|
||||
console.log(`[SubprocessManager] ${this.metricsConfig.name} started (PID: ${proc.pid})`);
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.error(`[SubprocessManager] Failed to start ${this.metricsConfig.name}: ${msg}`);
|
||||
this.scheduleMetricsRestart();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the event collection subprocess
|
||||
*/
|
||||
private async startEventsSubprocess(): Promise<void> {
|
||||
if (this.eventsState.isShuttingDown) return;
|
||||
|
||||
try {
|
||||
console.log(`[SubprocessManager] Starting ${this.eventsConfig.name}...`);
|
||||
|
||||
const proc = Bun.spawn(['bun', 'run', this.eventsConfig.scriptPath], {
|
||||
stdio: ['inherit', 'inherit', 'inherit'],
|
||||
env: { ...process.env, SKIP_MIGRATIONS: '1' },
|
||||
ipc: (message) => this.handleEventsMessage(message as SubprocessMessage),
|
||||
onExit: (proc, exitCode, signalCode) => {
|
||||
this.handleEventsExit(exitCode, signalCode);
|
||||
}
|
||||
});
|
||||
|
||||
this.eventsState.process = proc;
|
||||
this.eventsState.restartCount = 0;
|
||||
|
||||
console.log(`[SubprocessManager] ${this.eventsConfig.name} started (PID: ${proc.pid})`);
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.error(`[SubprocessManager] Failed to start ${this.eventsConfig.name}: ${msg}`);
|
||||
this.scheduleEventsRestart();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle IPC messages from metrics subprocess
|
||||
*/
|
||||
private async handleMetricsMessage(message: SubprocessMessage): Promise<void> {
|
||||
try {
|
||||
switch (message.type) {
|
||||
case 'ready':
|
||||
console.log(`[SubprocessManager] ${this.metricsConfig.name} is ready`);
|
||||
break;
|
||||
|
||||
case 'metric':
|
||||
// Save metric to database
|
||||
await saveHostMetric(
|
||||
message.cpu,
|
||||
message.memPercent,
|
||||
message.memUsed,
|
||||
message.memTotal,
|
||||
message.envId
|
||||
);
|
||||
break;
|
||||
|
||||
case 'disk_warning':
|
||||
// Send disk warning notification
|
||||
await sendEventNotification(
|
||||
'disk_space_warning',
|
||||
{
|
||||
title: message.diskPercent ? 'Disk space warning' : 'High Docker disk usage',
|
||||
message: message.message,
|
||||
type: 'warning'
|
||||
},
|
||||
message.envId
|
||||
);
|
||||
break;
|
||||
|
||||
case 'error':
|
||||
console.error(`[SubprocessManager] ${this.metricsConfig.name} error:`, message.message);
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.error(`[SubprocessManager] Error handling metrics message: ${msg}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle IPC messages from events subprocess
|
||||
*/
|
||||
private async handleEventsMessage(message: SubprocessMessage): Promise<void> {
|
||||
try {
|
||||
switch (message.type) {
|
||||
case 'ready':
|
||||
console.log(`[SubprocessManager] ${this.eventsConfig.name} is ready`);
|
||||
break;
|
||||
|
||||
case 'container_event':
|
||||
// Save event to database
|
||||
const savedEvent = await logContainerEvent(message.event);
|
||||
|
||||
// Broadcast to SSE clients
|
||||
containerEventEmitter.emit('event', savedEvent);
|
||||
|
||||
// Send notification if provided
|
||||
if (message.notification) {
|
||||
const { action, title, message: notifMessage, notificationType, image } = message.notification;
|
||||
sendEnvironmentNotification(message.event.environmentId, action, {
|
||||
title,
|
||||
message: notifMessage,
|
||||
type: notificationType
|
||||
}, image).catch((err) => {
|
||||
console.error('[SubprocessManager] Failed to send notification:', err);
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'env_status':
|
||||
// Broadcast to dashboard via containerEventEmitter
|
||||
containerEventEmitter.emit('env_status', {
|
||||
envId: message.envId,
|
||||
envName: message.envName,
|
||||
online: message.online,
|
||||
error: message.error
|
||||
});
|
||||
|
||||
// Send environment status notification
|
||||
if (message.online) {
|
||||
await sendEventNotification(
|
||||
'environment_online',
|
||||
{
|
||||
title: 'Environment online',
|
||||
message: `Environment "${message.envName}" is now reachable`,
|
||||
type: 'success'
|
||||
},
|
||||
message.envId
|
||||
).catch((err) => {
|
||||
console.error('[SubprocessManager] Failed to send online notification:', err);
|
||||
});
|
||||
} else {
|
||||
await sendEventNotification(
|
||||
'environment_offline',
|
||||
{
|
||||
title: 'Environment offline',
|
||||
message: `Environment "${message.envName}" is unreachable${message.error ? `: ${message.error}` : ''}`,
|
||||
type: 'error'
|
||||
},
|
||||
message.envId
|
||||
).catch((err) => {
|
||||
console.error('[SubprocessManager] Failed to send offline notification:', err);
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'error':
|
||||
console.error(`[SubprocessManager] ${this.eventsConfig.name} error:`, message.message);
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.error(`[SubprocessManager] Error handling events message: ${msg}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle metrics subprocess exit
|
||||
*/
|
||||
private handleMetricsExit(exitCode: number | null, signalCode: string | null): void {
|
||||
if (this.metricsState.isShuttingDown) {
|
||||
console.log(`[SubprocessManager] ${this.metricsConfig.name} stopped`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.error(
|
||||
`[SubprocessManager] ${this.metricsConfig.name} exited unexpectedly (code: ${exitCode}, signal: ${signalCode})`
|
||||
);
|
||||
|
||||
this.metricsState.process = null;
|
||||
this.scheduleMetricsRestart();
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle events subprocess exit
|
||||
*/
|
||||
private handleEventsExit(exitCode: number | null, signalCode: string | null): void {
|
||||
if (this.eventsState.isShuttingDown) {
|
||||
console.log(`[SubprocessManager] ${this.eventsConfig.name} stopped`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.error(
|
||||
`[SubprocessManager] ${this.eventsConfig.name} exited unexpectedly (code: ${exitCode}, signal: ${signalCode})`
|
||||
);
|
||||
|
||||
this.eventsState.process = null;
|
||||
this.scheduleEventsRestart();
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedule metrics subprocess restart with backoff
|
||||
*/
|
||||
private scheduleMetricsRestart(): void {
|
||||
if (this.metricsState.isShuttingDown) return;
|
||||
|
||||
if (this.metricsState.restartCount >= this.metricsConfig.maxRestarts) {
|
||||
console.error(
|
||||
`[SubprocessManager] ${this.metricsConfig.name} exceeded max restarts (${this.metricsConfig.maxRestarts}), giving up`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const delay = this.metricsConfig.restartDelayMs * Math.pow(2, this.metricsState.restartCount);
|
||||
this.metricsState.restartCount++;
|
||||
|
||||
console.log(
|
||||
`[SubprocessManager] Restarting ${this.metricsConfig.name} in ${delay}ms (attempt ${this.metricsState.restartCount}/${this.metricsConfig.maxRestarts})`
|
||||
);
|
||||
|
||||
setTimeout(() => {
|
||||
this.startMetricsSubprocess();
|
||||
}, delay);
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedule events subprocess restart with backoff
|
||||
*/
|
||||
private scheduleEventsRestart(): void {
|
||||
if (this.eventsState.isShuttingDown) return;
|
||||
|
||||
if (this.eventsState.restartCount >= this.eventsConfig.maxRestarts) {
|
||||
console.error(
|
||||
`[SubprocessManager] ${this.eventsConfig.name} exceeded max restarts (${this.eventsConfig.maxRestarts}), giving up`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const delay = this.eventsConfig.restartDelayMs * Math.pow(2, this.eventsState.restartCount);
|
||||
this.eventsState.restartCount++;
|
||||
|
||||
console.log(
|
||||
`[SubprocessManager] Restarting ${this.eventsConfig.name} in ${delay}ms (attempt ${this.eventsState.restartCount}/${this.eventsConfig.maxRestarts})`
|
||||
);
|
||||
|
||||
setTimeout(() => {
|
||||
this.startEventsSubprocess();
|
||||
}, delay);
|
||||
}
|
||||
|
||||
/**
|
||||
* Send command to metrics subprocess
|
||||
*/
|
||||
private sendToMetrics(command: MainProcessCommand): void {
|
||||
if (this.metricsState.process) {
|
||||
try {
|
||||
this.metricsState.process.send(command);
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.error(`[SubprocessManager] Failed to send to metrics subprocess: ${msg}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send command to events subprocess
|
||||
*/
|
||||
private sendToEvents(command: MainProcessCommand): void {
|
||||
if (this.eventsState.process) {
|
||||
try {
|
||||
this.eventsState.process.send(command);
|
||||
} catch (error) {
|
||||
const msg = error instanceof Error ? error.message : String(error);
|
||||
console.error(`[SubprocessManager] Failed to send to events subprocess: ${msg}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get metrics subprocess PID (for HMR cleanup)
|
||||
*/
|
||||
getMetricsPid(): number | null {
|
||||
return this.metricsState.process?.pid ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get events subprocess PID (for HMR cleanup)
|
||||
*/
|
||||
getEventsPid(): number | null {
|
||||
return this.eventsState.process?.pid ?? null;
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance
|
||||
let manager: SubprocessManager | null = null;
|
||||
|
||||
// Store PIDs globally to survive HMR reloads
|
||||
// Using globalThis to persist across module reloads in dev mode
|
||||
const GLOBAL_KEY = '__dockhand_subprocess_pids__';
|
||||
interface SubprocessPids {
|
||||
metrics: number | null;
|
||||
events: number | null;
|
||||
}
|
||||
|
||||
function getStoredPids(): SubprocessPids {
|
||||
return (globalThis as any)[GLOBAL_KEY] || { metrics: null, events: null };
|
||||
}
|
||||
|
||||
function setStoredPids(pids: SubprocessPids): void {
|
||||
(globalThis as any)[GLOBAL_KEY] = pids;
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill any orphaned processes from previous HMR reloads
|
||||
*/
|
||||
function killOrphanedProcesses(): void {
|
||||
const pids = getStoredPids();
|
||||
|
||||
if (pids.metrics) {
|
||||
try {
|
||||
process.kill(pids.metrics, 'SIGTERM');
|
||||
console.log(`[SubprocessManager] Killed orphaned metrics process (PID: ${pids.metrics})`);
|
||||
} catch {
|
||||
// Process already dead, ignore
|
||||
}
|
||||
}
|
||||
|
||||
if (pids.events) {
|
||||
try {
|
||||
process.kill(pids.events, 'SIGTERM');
|
||||
console.log(`[SubprocessManager] Killed orphaned events process (PID: ${pids.events})`);
|
||||
} catch {
|
||||
// Process already dead, ignore
|
||||
}
|
||||
}
|
||||
|
||||
setStoredPids({ metrics: null, events: null });
|
||||
}
|
||||
|
||||
/**
|
||||
* Start background subprocesses
|
||||
*/
|
||||
export async function startSubprocesses(): Promise<void> {
|
||||
// Kill any orphaned processes from HMR reloads
|
||||
killOrphanedProcesses();
|
||||
|
||||
if (manager) {
|
||||
console.warn('[SubprocessManager] Subprocesses already started');
|
||||
return;
|
||||
}
|
||||
|
||||
manager = new SubprocessManager();
|
||||
await manager.start();
|
||||
|
||||
// Store PIDs for HMR cleanup
|
||||
setStoredPids({
|
||||
metrics: manager.getMetricsPid(),
|
||||
events: manager.getEventsPid()
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop background subprocesses
|
||||
*/
|
||||
export async function stopSubprocesses(): Promise<void> {
|
||||
if (manager) {
|
||||
await manager.stop();
|
||||
manager = null;
|
||||
}
|
||||
setStoredPids({ metrics: null, events: null });
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify subprocesses to refresh environments
|
||||
*/
|
||||
export function refreshSubprocessEnvironments(): void {
|
||||
if (manager) {
|
||||
manager.refreshEnvironments();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user