Monitoring - RumenDamyanov/js-chess GitHub Wiki
Monitoring & Logging
Comprehensive monitoring, logging, and observability setup for the chess showcase application.
Overview
This guide covers monitoring and observability for:
- Application performance monitoring (APM)
- Error tracking and alerting
- User behavior analytics
- Infrastructure monitoring
- Log aggregation and analysis
- Real-time metrics and dashboards
- Uptime monitoring
Application Performance Monitoring
Performance Metrics Collection
// shared/monitoring/PerformanceTracker.js
export class PerformanceTracker {
constructor() {
this.metrics = new Map();
this.startTimes = new Map();
this.observers = [];
this.isEnabled = process.env.NODE_ENV === 'production';
this.initializeObservers();
}
initializeObservers() {
if (!this.isEnabled || typeof window === 'undefined') return;
// Performance Observer for various metrics
if ('PerformanceObserver' in window) {
// Navigation timing
const navObserver = new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
this.recordNavigationMetrics(entry);
}
});
navObserver.observe({ entryTypes: ['navigation'] });
this.observers.push(navObserver);
// Paint timing
const paintObserver = new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
this.recordPaintMetrics(entry);
}
});
paintObserver.observe({ entryTypes: ['paint'] });
this.observers.push(paintObserver);
// Largest Contentful Paint
const lcpObserver = new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
this.recordMetric('largest_contentful_paint', entry.renderTime || entry.loadTime);
}
});
lcpObserver.observe({ entryTypes: ['largest-contentful-paint'] });
this.observers.push(lcpObserver);
// Layout Shift
let clsValue = 0;
const clsObserver = new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
if (!entry.hadRecentInput) {
clsValue += entry.value;
this.recordMetric('cumulative_layout_shift', clsValue);
}
}
});
clsObserver.observe({ entryTypes: ['layout-shift'] });
this.observers.push(clsObserver);
// Long Tasks
const longTaskObserver = new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
this.recordLongTask(entry);
}
});
longTaskObserver.observe({ entryTypes: ['longtask'] });
this.observers.push(longTaskObserver);
}
}
recordNavigationMetrics(entry) {
const metrics = {
dns_lookup: entry.domainLookupEnd - entry.domainLookupStart,
tcp_connection: entry.connectEnd - entry.connectStart,
tls_handshake: entry.secureConnectionStart > 0 ?
entry.connectEnd - entry.secureConnectionStart : 0,
request_time: entry.responseStart - entry.requestStart,
response_time: entry.responseEnd - entry.responseStart,
dom_loading: entry.domContentLoadedEventStart - entry.responseEnd,
dom_interactive: entry.domInteractive - entry.navigationStart,
dom_complete: entry.domComplete - entry.navigationStart,
page_load: entry.loadEventEnd - entry.navigationStart
};
Object.entries(metrics).forEach(([name, value]) => {
this.recordMetric(`navigation.${name}`, value);
});
}
recordPaintMetrics(entry) {
this.recordMetric(`paint.${entry.name.replace('-', '_')}`, entry.startTime);
}
recordLongTask(entry) {
this.recordMetric('long_task', entry.duration);
// Alert on very long tasks
if (entry.duration > 500) {
console.warn('Very long task detected:', {
duration: entry.duration,
startTime: entry.startTime
});
}
}
startTimer(name) {
this.startTimes.set(name, performance.now());
}
endTimer(name, metadata = {}) {
const startTime = this.startTimes.get(name);
if (startTime) {
const duration = performance.now() - startTime;
this.recordMetric(name, duration, metadata);
this.startTimes.delete(name);
return duration;
}
return null;
}
recordMetric(name, value, metadata = {}) {
if (!this.isEnabled) return;
const metric = {
name,
value,
timestamp: Date.now(),
url: typeof window !== 'undefined' ? window.location.pathname : null,
userAgent: typeof navigator !== 'undefined' ? navigator.userAgent : null,
...metadata
};
// Store locally
if (!this.metrics.has(name)) {
this.metrics.set(name, []);
}
this.metrics.get(name).push(metric);
// Send to monitoring service
this.sendMetric(metric);
}
recordChessMetrics(gameId, moveCount, gameTime) {
this.recordMetric('chess.game_duration', gameTime, { gameId, moveCount });
this.recordMetric('chess.moves_per_minute', (moveCount / gameTime) * 60000, { gameId });
}
recordUserAction(action, metadata = {}) {
this.recordMetric(`user.${action}`, 1, {
...metadata,
timestamp: Date.now()
});
}
async sendMetric(metric) {
try {
await fetch('/api/metrics', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(metric)
});
} catch (error) {
// Fail silently in production
if (process.env.NODE_ENV === 'development') {
console.error('Failed to send metric:', error);
}
}
}
getMetrics() {
return Object.fromEntries(this.metrics);
}
clearMetrics() {
this.metrics.clear();
}
destroy() {
this.observers.forEach(observer => observer.disconnect());
this.observers = [];
}
}
export const performanceTracker = new PerformanceTracker();
Real User Monitoring (RUM)
// shared/monitoring/RumTracker.js
export class RumTracker {
constructor() {
this.sessionId = this.generateSessionId();
this.userId = null;
this.pageViews = [];
this.errors = [];
this.interactions = [];
this.initializeTracking();
}
initializeTracking() {
if (typeof window === 'undefined') return;
// Track page views
this.trackPageView();
// Track user interactions
this.trackInteractions();
// Track errors
this.trackErrors();
// Track resource loading
this.trackResources();
// Send data periodically
this.startBatching();
}
generateSessionId() {
return `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
setUserId(userId) {
this.userId = userId;
}
trackPageView() {
const pageView = {
sessionId: this.sessionId,
userId: this.userId,
url: window.location.href,
title: document.title,
referrer: document.referrer,
timestamp: Date.now(),
viewport: {
width: window.innerWidth,
height: window.innerHeight
},
screen: {
width: screen.width,
height: screen.height,
pixelRatio: window.devicePixelRatio
},
connection: navigator.connection ? {
effectiveType: navigator.connection.effectiveType,
downlink: navigator.connection.downlink,
rtt: navigator.connection.rtt
} : null
};
this.pageViews.push(pageView);
this.sendEvent('page_view', pageView);
}
trackInteractions() {
// Track clicks
document.addEventListener('click', (event) => {
this.recordInteraction('click', event);
});
// Track form submissions
document.addEventListener('submit', (event) => {
this.recordInteraction('form_submit', event);
});
// Track chess-specific interactions
document.addEventListener('chess-move', (event) => {
this.recordChessInteraction('move', event.detail);
});
document.addEventListener('chess-game-start', (event) => {
this.recordChessInteraction('game_start', event.detail);
});
document.addEventListener('chess-game-end', (event) => {
this.recordChessInteraction('game_end', event.detail);
});
}
recordInteraction(type, event) {
const interaction = {
sessionId: this.sessionId,
userId: this.userId,
type,
timestamp: Date.now(),
element: this.getElementInfo(event.target),
coordinates: event.clientX ? {
x: event.clientX,
y: event.clientY
} : null
};
this.interactions.push(interaction);
}
recordChessInteraction(type, details) {
const interaction = {
sessionId: this.sessionId,
userId: this.userId,
type: `chess_${type}`,
timestamp: Date.now(),
details
};
this.interactions.push(interaction);
this.sendEvent('chess_interaction', interaction);
}
getElementInfo(element) {
return {
tagName: element.tagName,
className: element.className,
id: element.id,
textContent: element.textContent?.substring(0, 100)
};
}
trackErrors() {
window.addEventListener('error', (event) => {
this.recordError('javascript', {
message: event.message,
filename: event.filename,
lineno: event.lineno,
colno: event.colno,
stack: event.error?.stack
});
});
window.addEventListener('unhandledrejection', (event) => {
this.recordError('promise', {
reason: event.reason?.toString(),
stack: event.reason?.stack
});
});
}
recordError(type, details) {
const error = {
sessionId: this.sessionId,
userId: this.userId,
type,
timestamp: Date.now(),
url: window.location.href,
userAgent: navigator.userAgent,
...details
};
this.errors.push(error);
this.sendEvent('error', error);
}
trackResources() {
if ('PerformanceObserver' in window) {
const resourceObserver = new PerformanceObserver((list) => {
for (const entry of list.getEntries()) {
this.recordResourceTiming(entry);
}
});
resourceObserver.observe({ entryTypes: ['resource'] });
}
}
recordResourceTiming(entry) {
const resource = {
sessionId: this.sessionId,
name: entry.name,
type: entry.initiatorType,
size: entry.transferSize,
duration: entry.duration,
timestamp: Date.now()
};
// Only track slow resources
if (entry.duration > 1000) {
this.sendEvent('slow_resource', resource);
}
}
startBatching() {
setInterval(() => {
this.sendBatch();
}, 30000); // Send every 30 seconds
// Send on page unload
window.addEventListener('beforeunload', () => {
this.sendBatch();
});
}
sendBatch() {
const data = {
sessionId: this.sessionId,
userId: this.userId,
timestamp: Date.now(),
pageViews: [...this.pageViews],
interactions: [...this.interactions],
errors: [...this.errors]
};
if (data.pageViews.length || data.interactions.length || data.errors.length) {
this.sendEvent('batch', data);
// Clear sent data
this.pageViews = [];
this.interactions = [];
this.errors = [];
}
}
async sendEvent(type, data) {
try {
await fetch('/api/rum', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ type, data })
});
} catch (error) {
// Fail silently
}
}
}
export const rumTracker = new RumTracker();
Error Tracking and Alerting
Error Monitoring System
// shared/monitoring/ErrorTracker.js
export class ErrorTracker {
constructor() {
this.errors = [];
this.errorCounts = new Map();
this.alertThresholds = {
javascript: 5,
network: 10,
chess_engine: 3,
websocket: 5
};
this.setupErrorHandling();
}
setupErrorHandling() {
if (typeof window === 'undefined') return;
// Global error handler
window.addEventListener('error', (event) => {
this.captureError({
type: 'javascript',
message: event.message,
filename: event.filename,
lineno: event.lineno,
colno: event.colno,
stack: event.error?.stack,
timestamp: Date.now()
});
});
// Unhandled promise rejections
window.addEventListener('unhandledrejection', (event) => {
this.captureError({
type: 'promise',
message: event.reason?.toString(),
stack: event.reason?.stack,
timestamp: Date.now()
});
});
// Network errors
this.interceptFetch();
}
interceptFetch() {
const originalFetch = window.fetch;
window.fetch = async (...args) => {
try {
const response = await originalFetch(...args);
if (!response.ok) {
this.captureError({
type: 'network',
message: `HTTP ${response.status}: ${response.statusText}`,
url: args[0],
status: response.status,
timestamp: Date.now()
});
}
return response;
} catch (error) {
this.captureError({
type: 'network',
message: error.message,
url: args[0],
timestamp: Date.now()
});
throw error;
}
};
}
captureError(error) {
// Enrich error with context
const enrichedError = {
...error,
id: this.generateErrorId(),
url: window.location.href,
userAgent: navigator.userAgent,
userId: this.getCurrentUserId(),
sessionId: this.getSessionId(),
context: this.getErrorContext()
};
this.errors.push(enrichedError);
this.updateErrorCounts(error.type);
this.checkAlertThresholds(error.type);
// Send to error tracking service
this.sendError(enrichedError);
console.error('Error captured:', enrichedError);
}
generateErrorId() {
return `error_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
getCurrentUserId() {
// Get from your auth system
return localStorage.getItem('user-id') || 'anonymous';
}
getSessionId() {
// Get from your session system
return sessionStorage.getItem('session-id') || 'unknown';
}
getErrorContext() {
return {
route: window.location.pathname,
gameState: this.getGameState(),
userActions: this.getRecentUserActions(),
performanceMetrics: this.getBasicPerformanceMetrics()
};
}
getGameState() {
// Get current chess game state if available
try {
const gameElement = document.querySelector('[data-game-id]');
return gameElement ? {
gameId: gameElement.dataset.gameId,
moveCount: gameElement.dataset.moveCount,
currentPlayer: gameElement.dataset.currentPlayer
} : null;
} catch {
return null;
}
}
getRecentUserActions() {
// Get recent user actions from RUM tracker
return rumTracker?.interactions?.slice(-5) || [];
}
getBasicPerformanceMetrics() {
return {
memory: performance.memory ? {
usedJSHeapSize: performance.memory.usedJSHeapSize,
totalJSHeapSize: performance.memory.totalJSHeapSize
} : null,
timing: performance.timing ? {
loadTime: performance.timing.loadEventEnd - performance.timing.navigationStart
} : null
};
}
updateErrorCounts(errorType) {
const key = `${errorType}:${Date.now() - (Date.now() % 60000)}`; // Per minute
this.errorCounts.set(key, (this.errorCounts.get(key) || 0) + 1);
// Clean old counts
this.cleanOldCounts();
}
cleanOldCounts() {
const tenMinutesAgo = Date.now() - (10 * 60 * 1000);
for (const [key] of this.errorCounts) {
const timestamp = parseInt(key.split(':')[1]);
if (timestamp < tenMinutesAgo) {
this.errorCounts.delete(key);
}
}
}
checkAlertThresholds(errorType) {
const threshold = this.alertThresholds[errorType];
if (!threshold) return;
const recentCounts = Array.from(this.errorCounts.entries())
.filter(([key]) => key.startsWith(`${errorType}:`))
.reduce((sum, [, count]) => sum + count, 0);
if (recentCounts >= threshold) {
this.triggerAlert(errorType, recentCounts, threshold);
}
}
triggerAlert(errorType, count, threshold) {
const alert = {
type: 'error_threshold_exceeded',
errorType,
count,
threshold,
timestamp: Date.now(),
severity: count >= threshold * 2 ? 'critical' : 'warning'
};
this.sendAlert(alert);
console.warn('Error threshold exceeded:', alert);
}
async sendError(error) {
try {
await fetch('/api/errors', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(error)
});
} catch (e) {
// Fail silently
}
}
async sendAlert(alert) {
try {
await fetch('/api/alerts', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(alert)
});
} catch (e) {
// Fail silently
}
}
getErrorSummary() {
const summary = {
totalErrors: this.errors.length,
errorsByType: {},
recentErrors: this.errors.slice(-10)
};
this.errors.forEach(error => {
summary.errorsByType[error.type] = (summary.errorsByType[error.type] || 0) + 1;
});
return summary;
}
clearErrors() {
this.errors = [];
this.errorCounts.clear();
}
}
export const errorTracker = new ErrorTracker();
Infrastructure Monitoring
System Metrics Collection
// backend/monitoring/SystemMonitor.js
const os = require('os');
const process = require('process');
const { EventEmitter } = require('events');
class SystemMonitor extends EventEmitter {
constructor() {
super();
this.metrics = new Map();
this.interval = null;
this.isRunning = false;
}
start(intervalMs = 5000) {
if (this.isRunning) return;
this.isRunning = true;
this.interval = setInterval(() => {
this.collectMetrics();
}, intervalMs);
console.log('System monitoring started');
}
stop() {
if (this.interval) {
clearInterval(this.interval);
this.interval = null;
}
this.isRunning = false;
console.log('System monitoring stopped');
}
collectMetrics() {
const timestamp = Date.now();
const metrics = {
timestamp,
system: this.getSystemMetrics(),
process: this.getProcessMetrics(),
nodejs: this.getNodeJSMetrics(),
custom: this.getCustomMetrics()
};
this.metrics.set(timestamp, metrics);
this.emit('metrics', metrics);
// Keep only last 100 metric points
if (this.metrics.size > 100) {
const oldestKey = this.metrics.keys().next().value;
this.metrics.delete(oldestKey);
}
// Check for alerts
this.checkAlerts(metrics);
}
getSystemMetrics() {
const loadAvg = os.loadavg();
const totalMem = os.totalmem();
const freeMem = os.freemem();
return {
hostname: os.hostname(),
platform: os.platform(),
arch: os.arch(),
uptime: os.uptime(),
loadAverage: {
'1m': loadAvg[0],
'5m': loadAvg[1],
'15m': loadAvg[2]
},
memory: {
total: totalMem,
free: freeMem,
used: totalMem - freeMem,
usagePercent: ((totalMem - freeMem) / totalMem) * 100
},
cpus: os.cpus().length
};
}
getProcessMetrics() {
const memUsage = process.memoryUsage();
const cpuUsage = process.cpuUsage();
return {
pid: process.pid,
uptime: process.uptime(),
memory: {
rss: memUsage.rss,
heapTotal: memUsage.heapTotal,
heapUsed: memUsage.heapUsed,
external: memUsage.external,
arrayBuffers: memUsage.arrayBuffers
},
cpu: {
user: cpuUsage.user,
system: cpuUsage.system
}
};
}
getNodeJSMetrics() {
return {
version: process.version,
versions: process.versions,
env: process.env.NODE_ENV
};
}
getCustomMetrics() {
// Add application-specific metrics
return {
activeGames: this.getActiveGameCount(),
connectedUsers: this.getConnectedUserCount(),
websocketConnections: this.getWebSocketConnectionCount(),
gameEngineRequests: this.getGameEngineRequestCount()
};
}
getActiveGameCount() {
// Implement based on your game state management
return global.gameManager?.getActiveGameCount() || 0;
}
getConnectedUserCount() {
// Implement based on your user management
return global.userManager?.getConnectedUserCount() || 0;
}
getWebSocketConnectionCount() {
// Implement based on your WebSocket management
return global.wsManager?.getConnectionCount() || 0;
}
getGameEngineRequestCount() {
// Implement based on your game engine
return global.chessEngine?.getRequestCount() || 0;
}
checkAlerts(metrics) {
const alerts = [];
// High memory usage
if (metrics.system.memory.usagePercent > 90) {
alerts.push({
type: 'high_memory_usage',
severity: 'critical',
value: metrics.system.memory.usagePercent,
threshold: 90
});
}
// High load average
if (metrics.system.loadAverage['1m'] > metrics.system.cpus * 2) {
alerts.push({
type: 'high_load_average',
severity: 'warning',
value: metrics.system.loadAverage['1m'],
threshold: metrics.system.cpus * 2
});
}
// High heap usage
const heapUsagePercent = (metrics.process.memory.heapUsed / metrics.process.memory.heapTotal) * 100;
if (heapUsagePercent > 85) {
alerts.push({
type: 'high_heap_usage',
severity: 'warning',
value: heapUsagePercent,
threshold: 85
});
}
if (alerts.length > 0) {
this.emit('alerts', alerts);
}
}
getMetricsSummary(timeRangeMs = 300000) { // Last 5 minutes
const cutoff = Date.now() - timeRangeMs;
const recentMetrics = Array.from(this.metrics.entries())
.filter(([timestamp]) => timestamp >= cutoff)
.map(([, metrics]) => metrics);
if (recentMetrics.length === 0) return null;
return {
count: recentMetrics.length,
timeRange: timeRangeMs,
averages: this.calculateAverages(recentMetrics),
latest: recentMetrics[recentMetrics.length - 1]
};
}
calculateAverages(metrics) {
const count = metrics.length;
return {
memoryUsagePercent: metrics.reduce((sum, m) => sum + m.system.memory.usagePercent, 0) / count,
loadAverage1m: metrics.reduce((sum, m) => sum + m.system.loadAverage['1m'], 0) / count,
heapUsed: metrics.reduce((sum, m) => sum + m.process.memory.heapUsed, 0) / count,
activeGames: metrics.reduce((sum, m) => sum + m.custom.activeGames, 0) / count,
connectedUsers: metrics.reduce((sum, m) => sum + m.custom.connectedUsers, 0) / count
};
}
}
module.exports = SystemMonitor;
Docker Container Monitoring
# docker-compose.monitoring.yml
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
grafana:
image: grafana/grafana:latest
ports:
- "3001:3000"
volumes:
- grafana_data:/var/lib/grafana
- ./monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards
- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123
node-exporter:
image: prom/node-exporter:latest
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
ports:
- "8080:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
volumes:
prometheus_data:
grafana_data:
Log Aggregation and Analysis
Structured Logging
// shared/logging/Logger.js
export class Logger {
constructor(context = '') {
this.context = context;
this.logLevel = process.env.LOG_LEVEL || 'info';
this.transports = [];
this.setupTransports();
}
setupTransports() {
// Console transport (always enabled)
this.transports.push(new ConsoleTransport());
// File transport (in production)
if (process.env.NODE_ENV === 'production') {
this.transports.push(new FileTransport());
}
// Remote transport (if configured)
if (process.env.LOG_ENDPOINT) {
this.transports.push(new RemoteTransport(process.env.LOG_ENDPOINT));
}
}
log(level, message, metadata = {}) {
const logEntry = {
timestamp: new Date().toISOString(),
level,
message,
context: this.context,
metadata,
requestId: this.getRequestId(),
userId: this.getUserId(),
sessionId: this.getSessionId()
};
// Check log level
if (!this.shouldLog(level)) return;
// Send to all transports
this.transports.forEach(transport => {
transport.log(logEntry);
});
}
shouldLog(level) {
const levels = { error: 0, warn: 1, info: 2, debug: 3 };
return levels[level] <= levels[this.logLevel];
}
getRequestId() {
// Get from async context or headers
return global.requestId || null;
}
getUserId() {
// Get from session or token
return global.userId || null;
}
getSessionId() {
// Get from session
return global.sessionId || null;
}
error(message, error = null, metadata = {}) {
this.log('error', message, {
...metadata,
error: error ? {
name: error.name,
message: error.message,
stack: error.stack
} : null
});
}
warn(message, metadata = {}) {
this.log('warn', message, metadata);
}
info(message, metadata = {}) {
this.log('info', message, metadata);
}
debug(message, metadata = {}) {
this.log('debug', message, metadata);
}
// Chess-specific logging methods
logGameEvent(event, gameId, metadata = {}) {
this.info(`Chess game event: ${event}`, {
gameId,
event,
...metadata
});
}
logMoveAttempt(gameId, move, isValid, metadata = {}) {
this.info('Chess move attempt', {
gameId,
move,
isValid,
...metadata
});
}
logAiRequest(gameId, requestType, duration, metadata = {}) {
this.info('AI request completed', {
gameId,
requestType,
duration,
...metadata
});
}
child(additionalContext) {
const childLogger = new Logger(`${this.context}:${additionalContext}`);
childLogger.transports = this.transports;
return childLogger;
}
}
class ConsoleTransport {
log(entry) {
const { level, message, context, metadata } = entry;
const contextStr = context ? `[${context}]` : '';
const metadataStr = Object.keys(metadata).length > 0 ?
`\n${JSON.stringify(metadata, null, 2)}` : '';
console[level](`${entry.timestamp} ${level.toUpperCase()} ${contextStr} ${message}${metadataStr}`);
}
}
class FileTransport {
constructor() {
this.fs = require('fs');
this.path = require('path');
this.logDir = process.env.LOG_DIR || './logs';
this.ensureLogDir();
}
ensureLogDir() {
if (!this.fs.existsSync(this.logDir)) {
this.fs.mkdirSync(this.logDir, { recursive: true });
}
}
log(entry) {
const date = new Date().toISOString().split('T')[0];
const filename = this.path.join(this.logDir, `${date}.log`);
const logLine = JSON.stringify(entry) + '\n';
this.fs.appendFileSync(filename, logLine);
}
}
class RemoteTransport {
constructor(endpoint) {
this.endpoint = endpoint;
this.buffer = [];
this.flushInterval = 5000; // 5 seconds
this.startFlushing();
}
log(entry) {
this.buffer.push(entry);
}
startFlushing() {
setInterval(() => {
this.flush();
}, this.flushInterval);
}
async flush() {
if (this.buffer.length === 0) return;
const logs = [...this.buffer];
this.buffer = [];
try {
await fetch(this.endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ logs })
});
} catch (error) {
// Put logs back in buffer if failed
this.buffer.unshift(...logs);
console.error('Failed to send logs to remote endpoint:', error);
}
}
}
export const logger = new Logger('chess-app');
Dashboard and Visualization
Grafana Dashboard Configuration
{
"dashboard": {
"id": null,
"title": "Chess Application Monitoring",
"tags": ["chess", "application"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Active Games",
"type": "stat",
"targets": [
{
"expr": "chess_active_games",
"legendFormat": "Active Games"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"thresholds": {
"steps": [
{"color": "green", "value": null},
{"color": "yellow", "value": 50},
{"color": "red", "value": 100}
]
}
}
}
},
{
"id": 2,
"title": "Response Time",
"type": "graph",
"targets": [
{
"expr": "rate(http_request_duration_seconds_sum[5m]) / rate(http_request_duration_seconds_count[5m])",
"legendFormat": "Average Response Time"
}
],
"yAxes": [
{
"label": "Response Time (ms)",
"min": 0
}
]
},
{
"id": 3,
"title": "Error Rate",
"type": "graph",
"targets": [
{
"expr": "rate(http_requests_total{status=~\"5..\"}[5m]) / rate(http_requests_total[5m]) * 100",
"legendFormat": "Error Rate %"
}
],
"alert": {
"conditions": [
{
"query": {
"queryType": "",
"refId": "A"
},
"reducer": {
"type": "last",
"params": []
},
"evaluator": {
"params": [5],
"type": "gt"
}
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "10s",
"handler": 1,
"name": "High Error Rate",
"noDataState": "no_data",
"notifications": []
}
},
{
"id": 4,
"title": "System Resources",
"type": "graph",
"targets": [
{
"expr": "100 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100)",
"legendFormat": "Memory Usage %"
},
{
"expr": "100 - (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]) * 100)",
"legendFormat": "CPU Usage %"
}
]
},
{
"id": 5,
"title": "WebSocket Connections",
"type": "stat",
"targets": [
{
"expr": "chess_websocket_connections",
"legendFormat": "Active Connections"
}
]
},
{
"id": 6,
"title": "Game Engine Performance",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(chess_engine_request_duration_bucket[5m]))",
"legendFormat": "95th percentile"
},
{
"expr": "histogram_quantile(0.50, rate(chess_engine_request_duration_bucket[5m]))",
"legendFormat": "50th percentile"
}
]
}
],
"time": {
"from": "now-1h",
"to": "now"
},
"refresh": "5s"
}
}
Uptime Monitoring
Health Check System
// backend/monitoring/HealthChecker.js
class HealthChecker {
constructor() {
this.checks = new Map();
this.results = new Map();
this.interval = null;
}
addCheck(name, checkFunction, options = {}) {
this.checks.set(name, {
function: checkFunction,
timeout: options.timeout || 5000,
critical: options.critical || false,
interval: options.interval || 30000
});
}
async runAllChecks() {
const results = new Map();
const promises = [];
for (const [name, check] of this.checks) {
promises.push(this.runSingleCheck(name, check));
}
const checkResults = await Promise.allSettled(promises);
checkResults.forEach((result, index) => {
const checkName = Array.from(this.checks.keys())[index];
results.set(checkName, result.status === 'fulfilled' ? result.value : {
status: 'error',
error: result.reason.message,
timestamp: Date.now()
});
});
this.results = results;
return this.getHealthStatus();
}
async runSingleCheck(name, check) {
const startTime = Date.now();
try {
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error('Health check timeout')), check.timeout);
});
const result = await Promise.race([
check.function(),
timeoutPromise
]);
return {
status: 'healthy',
duration: Date.now() - startTime,
result,
timestamp: Date.now()
};
} catch (error) {
return {
status: 'unhealthy',
duration: Date.now() - startTime,
error: error.message,
timestamp: Date.now()
};
}
}
getHealthStatus() {
const overall = {
status: 'healthy',
timestamp: Date.now(),
checks: Object.fromEntries(this.results)
};
// Check if any critical services are down
for (const [name, result] of this.results) {
const check = this.checks.get(name);
if (check.critical && result.status !== 'healthy') {
overall.status = 'unhealthy';
break;
}
}
// If no critical failures, check for any failures
if (overall.status === 'healthy') {
for (const [, result] of this.results) {
if (result.status !== 'healthy') {
overall.status = 'degraded';
break;
}
}
}
return overall;
}
setupDefaultChecks() {
// Database connectivity
this.addCheck('database', async () => {
// Replace with your database check
const result = await this.checkDatabase();
return { connected: result };
}, { critical: true });
// Redis connectivity
this.addCheck('redis', async () => {
const result = await this.checkRedis();
return { connected: result };
}, { critical: false });
// Chess engine
this.addCheck('chess_engine', async () => {
const result = await this.checkChessEngine();
return { responding: result };
}, { critical: true });
// External API
this.addCheck('external_api', async () => {
const result = await this.checkExternalAPI();
return { available: result };
}, { critical: false });
// File system
this.addCheck('filesystem', async () => {
const result = await this.checkFileSystem();
return { writable: result };
}, { critical: true });
}
async checkDatabase() {
// Implement database connectivity check
try {
// Example: await db.query('SELECT 1');
return true;
} catch (error) {
throw new Error(`Database check failed: ${error.message}`);
}
}
async checkRedis() {
// Implement Redis connectivity check
try {
// Example: await redis.ping();
return true;
} catch (error) {
throw new Error(`Redis check failed: ${error.message}`);
}
}
async checkChessEngine() {
// Implement chess engine check
try {
// Example: await chessEngine.validateMove('e2e4');
return true;
} catch (error) {
throw new Error(`Chess engine check failed: ${error.message}`);
}
}
async checkExternalAPI() {
// Implement external API check
try {
const response = await fetch('https://api.example.com/health');
return response.ok;
} catch (error) {
throw new Error(`External API check failed: ${error.message}`);
}
}
async checkFileSystem() {
// Implement file system check
try {
const fs = require('fs').promises;
const testFile = '/tmp/health-check';
await fs.writeFile(testFile, 'test');
await fs.unlink(testFile);
return true;
} catch (error) {
throw new Error(`File system check failed: ${error.message}`);
}
}
start() {
this.setupDefaultChecks();
// Run checks immediately
this.runAllChecks();
// Set up periodic checks
this.interval = setInterval(() => {
this.runAllChecks();
}, 30000); // Every 30 seconds
}
stop() {
if (this.interval) {
clearInterval(this.interval);
this.interval = null;
}
}
}
module.exports = HealthChecker;
Next Steps
- Troubleshooting - Monitoring-based troubleshooting guide
- Security - Security monitoring and incident response
- Performance - Performance optimization based on monitoring data