Production Deployment Guide¶

Complete guide for deploying TrojanHorse.js in production environments with security, scalability, and reliability best practices.

Production Readiness Checklist¶

✅ Security¶

API keys stored in encrypted vault or secure environment variables
HTTPS enforced for all communications
SSL certificate validation enabled
Audit logging configured
Access controls implemented
Rate limiting configured
Input validation enabled

✅ Performance¶

✅ Reliability¶

✅ Operational¶

Architecture Overview¶

graph TB
    subgraph "Load Balancer"
        LB[Load Balancer]
    end

    subgraph "Application Tier"
        APP1[TrojanHorse App 1]
        APP2[TrojanHorse App 2]
        APP3[TrojanHorse App N]
    end

    subgraph "Caching Layer"
        REDIS[Redis Cluster]
        MEMCACHE[Memcached]
    end

    subgraph "Monitoring"
        PROM[Prometheus]
        GRAF[Grafana]
        ALERT[AlertManager]
    end

    subgraph "External APIs"
        URL[URLhaus]
        AV[AlienVault]
        AB[AbuseIPDB]
        VT[VirusTotal]
    end

    LB --> APP1
    LB --> APP2
    LB --> APP3

    APP1 --> REDIS
    APP2 --> REDIS
    APP3 --> REDIS

    APP1 --> URL
    APP1 --> AV
    APP1 --> AB
    APP1 --> VT

    APP1 --> PROM
    APP2 --> PROM
    APP3 --> PROM

    PROM --> GRAF
    PROM --> ALERT

Docker Deployment¶

Production Dockerfile¶

# Multi-stage build for optimized production image
FROM node:18-alpine AS builder

WORKDIR /app

# Copy package files
COPY package*.json ./
COPY tsconfig.json ./

# Install dependencies
RUN npm ci --only=production && npm cache clean --force

# Copy source code
COPY src/ ./src/

# Build application
RUN npm run build

# Production stage
FROM node:18-alpine AS production

# Security: Create non-root user
RUN addgroup -g 1001 -S trojanhorse && \
    adduser -S trojanhorse -u 1001

# Install security updates
RUN apk --no-cache upgrade && \
    apk --no-cache add dumb-init

# Set working directory
WORKDIR /app

# Copy built application
COPY --from=builder --chown=trojanhorse:trojanhorse /app/dist ./dist
COPY --from=builder --chown=trojanhorse:trojanhorse /app/node_modules ./node_modules
COPY --chown=trojanhorse:trojanhorse package*.json ./

# Security: Remove package manager
RUN apk del npm

# Set security headers
ENV NODE_ENV=production
ENV NODE_OPTIONS="--max-old-space-size=512"

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD node -e "
        const http = require('http');
        const options = { host: 'localhost', port: 3000, path: '/health', timeout: 2000 };
        const req = http.request(options, (res) => {
            if (res.statusCode === 200) process.exit(0);
            else process.exit(1);
        });
        req.on('error', () => process.exit(1));
        req.end();
    "

# Switch to non-root user
USER trojanhorse

# Expose port
EXPOSE 3000

# Use dumb-init for proper signal handling
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "dist/server.js"]

Docker Compose for Production¶

# docker-compose.prod.yml
version: '3.8'

services:
  trojanhorse-app:
    build:
      context: .
      dockerfile: Dockerfile
      target: production
    deploy:
      replicas: 3
      restart_policy:
        condition: on-failure
        delay: 5s
        max_attempts: 3
      resources:
        limits:
          cpus: '1.0'
          memory: 512M
        reservations:
          cpus: '0.25'
          memory: 256M
    environment:
      - NODE_ENV=production
      - REDIS_URL=redis://redis:6379
      - ALIENVAULT_API_KEY=${ALIENVAULT_API_KEY}
      - ABUSEIPDB_API_KEY=${ABUSEIPDB_API_KEY}
      - VIRUSTOTAL_API_KEY=${VIRUSTOTAL_API_KEY}
    depends_on:
      - redis
      - prometheus
    networks:
      - app-network
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.trojanhorse.rule=Host(`trojanhorse.company.com`)"
      - "traefik.http.routers.trojanhorse.tls=true"

  redis:
    image: redis:7-alpine
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 256M
    command: redis-server --appendonly yes --maxmemory 200mb --maxmemory-policy allkeys-lru
    volumes:
      - redis-data:/data
    networks:
      - app-network
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 30s
      timeout: 10s
      retries: 3

  traefik:
    image: traefik:v2.10
    command:
      - "--api.dashboard=true"
      - "--providers.docker=true"
      - "--entrypoints.web.address=:80"
      - "--entrypoints.websecure.address=:443"
      - "--certificatesresolvers.letsencrypt.acme.email=admin@company.com"
      - "--certificatesresolvers.letsencrypt.acme.storage=/acme.json"
      - "--certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web"
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - traefik-data:/acme.json
    networks:
      - app-network

  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus-data:/prometheus
    networks:
      - app-network

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3001:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
    volumes:
      - grafana-data:/var/lib/grafana
      - ./monitoring/grafana:/etc/grafana/provisioning
    networks:
      - app-network

volumes:
  redis-data:
  traefik-data:
  prometheus-data:
  grafana-data:

networks:
  app-network:
    driver: overlay
    attachable: true

Kubernetes Deployment¶

Namespace and ConfigMap¶

# k8s/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
  name: trojanhorse
  labels:
    name: trojanhorse
---
# k8s/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: trojanhorse-config
  namespace: trojanhorse
data:
  NODE_ENV: "production"
  REDIS_URL: "redis://redis-service:6379"
  LOG_LEVEL: "info"
  MAX_CONCURRENCY: "10"
  CACHE_TTL: "3600000"

Secrets Management¶

# k8s/secrets.yaml
apiVersion: v1
kind: Secret
metadata:
  name: trojanhorse-secrets
  namespace: trojanhorse
type: Opaque
data:
  # Base64 encoded API keys
  ALIENVAULT_API_KEY: <base64-encoded-key>
  ABUSEIPDB_API_KEY: <base64-encoded-key>
  VIRUSTOTAL_API_KEY: <base64-encoded-key>

Deployment¶

# k8s/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: trojanhorse-app
  namespace: trojanhorse
  labels:
    app: trojanhorse
spec:
  replicas: 3
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  selector:
    matchLabels:
      app: trojanhorse
  template:
    metadata:
      labels:
        app: trojanhorse
    spec:
      securityContext:
        runAsNonRoot: true
        runAsUser: 1001
        fsGroup: 1001
      containers:
      - name: trojanhorse
        image: trojanhorse/trojanhorse-js:1.0.0
        imagePullPolicy: Always
        ports:
        - containerPort: 3000
          name: http
        env:
        - name: NODE_ENV
          valueFrom:
            configMapKeyRef:
              name: trojanhorse-config
              key: NODE_ENV
        - name: REDIS_URL
          valueFrom:
            configMapKeyRef:
              name: trojanhorse-config
              key: REDIS_URL
        - name: ALIENVAULT_API_KEY
          valueFrom:
            secretKeyRef:
              name: trojanhorse-secrets
              key: ALIENVAULT_API_KEY
        - name: ABUSEIPDB_API_KEY
          valueFrom:
            secretKeyRef:
              name: trojanhorse-secrets
              key: ABUSEIPDB_API_KEY
        - name: VIRUSTOTAL_API_KEY
          valueFrom:
            secretKeyRef:
              name: trojanhorse-secrets
              key: VIRUSTOTAL_API_KEY
        resources:
          requests:
            memory: "256Mi"
            cpu: "250m"
          limits:
            memory: "512Mi"
            cpu: "500m"
        livenessProbe:
          httpGet:
            path: /health
            port: 3000
          initialDelaySeconds: 30
          periodSeconds: 30
          timeoutSeconds: 10
          failureThreshold: 3
        readinessProbe:
          httpGet:
            path: /ready
            port: 3000
          initialDelaySeconds: 5
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        securityContext:
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
          capabilities:
            drop:
            - ALL
        volumeMounts:
        - name: tmp-volume
          mountPath: /tmp
        - name: logs-volume
          mountPath: /app/logs
      volumes:
      - name: tmp-volume
        emptyDir: {}
      - name: logs-volume
        emptyDir: {}
      serviceAccountName: trojanhorse-service-account
      automountServiceAccountToken: false

Service and Ingress¶

# k8s/service.yaml
apiVersion: v1
kind: Service
metadata:
  name: trojanhorse-service
  namespace: trojanhorse
  labels:
    app: trojanhorse
spec:
  type: ClusterIP
  ports:
  - port: 80
    targetPort: 3000
    protocol: TCP
    name: http
  selector:
    app: trojanhorse
---
# k8s/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: trojanhorse-ingress
  namespace: trojanhorse
  annotations:
    kubernetes.io/ingress.class: nginx
    cert-manager.io/cluster-issuer: letsencrypt-prod
    nginx.ingress.kubernetes.io/rate-limit: "100"
    nginx.ingress.kubernetes.io/rate-limit-window: "1m"
spec:
  tls:
  - hosts:
    - trojanhorse.company.com
    secretName: trojanhorse-tls
  rules:
  - host: trojanhorse.company.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: trojanhorse-service
            port:
              number: 80

Horizontal Pod Autoscaler¶

# k8s/hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: trojanhorse-hpa
  namespace: trojanhorse
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: trojanhorse-app
  minReplicas: 3
  maxReplicas: 10
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  behavior:
    scaleUp:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 100
        periodSeconds: 15
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 10
        periodSeconds: 60

Monitoring and Observability¶

Prometheus Configuration¶

# monitoring/prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "trojanhorse_rules.yml"

scrape_configs:
  - job_name: 'trojanhorse'
    static_configs:
      - targets: ['trojanhorse-app:3000']
    metrics_path: '/metrics'
    scrape_interval: 30s

  - job_name: 'redis'
    static_configs:
      - targets: ['redis:6379']

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - alertmanager:9093

Alerting Rules¶

# monitoring/trojanhorse_rules.yml
groups:
- name: trojanhorse
  rules:
  - alert: HighErrorRate
    expr: rate(trojanhorse_requests_total{status=~"5.."}[5m]) > 0.1
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "High error rate detected"
      description: "Error rate is {{ $value }} for the last 5 minutes"

  - alert: HighLatency
    expr: histogram_quantile(0.95, rate(trojanhorse_request_duration_seconds_bucket[5m])) > 1
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "High latency detected"
      description: "95th percentile latency is {{ $value }}s"

  - alert: FeedDown
    expr: trojanhorse_feed_up == 0
    for: 2m
    labels:
      severity: critical
    annotations:
      summary: "Threat feed is down"
      description: "Feed {{ $labels.feed }} has been down for more than 2 minutes"

  - alert: VaultLocked
    expr: trojanhorse_vault_locked == 1
    for: 10m
    labels:
      severity: warning
    annotations:
      summary: "Vault has been locked"
      description: "API key vault has been locked for more than 10 minutes"

Application Metrics¶

// server/metrics.js
import promClient from 'prom-client';

// Create metrics registry
const register = new promClient.Registry();

// Default metrics
promClient.collectDefaultMetrics({ register });

// Custom metrics
const requestDuration = new promClient.Histogram({
  name: 'trojanhorse_request_duration_seconds',
  help: 'Duration of HTTP requests in seconds',
  labelNames: ['method', 'route', 'status'],
  buckets: [0.1, 0.5, 1, 2, 5]
});

const requestsTotal = new promClient.Counter({
  name: 'trojanhorse_requests_total',
  help: 'Total number of HTTP requests',
  labelNames: ['method', 'route', 'status']
});

const feedStatus = new promClient.Gauge({
  name: 'trojanhorse_feed_up',
  help: 'Status of threat feeds (1=up, 0=down)',
  labelNames: ['feed']
});

const threatsDetected = new promClient.Counter({
  name: 'trojanhorse_threats_detected_total',
  help: 'Total number of threats detected',
  labelNames: ['feed', 'severity']
});

const vaultStatus = new promClient.Gauge({
  name: 'trojanhorse_vault_locked',
  help: 'Vault lock status (1=locked, 0=unlocked)'
});

const cacheHitRate = new promClient.Gauge({
  name: 'trojanhorse_cache_hit_rate',
  help: 'Cache hit rate percentage'
});

// Register metrics
register.registerMetric(requestDuration);
register.registerMetric(requestsTotal);
register.registerMetric(feedStatus);
register.registerMetric(threatsDetected);
register.registerMetric(vaultStatus);
register.registerMetric(cacheHitRate);

export {
  register,
  requestDuration,
  requestsTotal,
  feedStatus,
  threatsDetected,
  vaultStatus,
  cacheHitRate
};

Health Check Implementation¶

// server/health.js
import { TrojanHorse } from '../src/index.js';

class HealthChecker {
  constructor(trojan) {
    this.trojan = trojan;
    this.healthCache = null;
    this.lastCheck = 0;
    this.cacheTimeout = 30000; // 30 seconds
  }

  async getHealth() {
    const now = Date.now();

    // Return cached result if recent
    if (this.healthCache && (now - this.lastCheck) < this.cacheTimeout) {
      return this.healthCache;
    }

    const health = {
      status: 'healthy',
      timestamp: new Date().toISOString(),
      uptime: process.uptime(),
      version: process.env.npm_package_version,
      checks: {}
    };

    try {
      // Check TrojanHorse instance
      const trojanHealth = await this.trojan.getHealth();
      health.checks.trojanhorse = {
        status: trojanHealth.overall,
        feeds: trojanHealth.feeds
      };

      // Check memory usage
      const memUsage = process.memoryUsage();
      health.checks.memory = {
        status: memUsage.heapUsed < 400 * 1024 * 1024 ? 'healthy' : 'warning', // 400MB threshold
        heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
        heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024),
        rss: Math.round(memUsage.rss / 1024 / 1024)
      };

      // Check event loop lag
      const start = process.hrtime();
      setImmediate(() => {
        const lag = process.hrtime(start);
        const lagMs = lag[0] * 1000 + lag[1] * 1e-6;
        health.checks.eventLoop = {
          status: lagMs < 100 ? 'healthy' : 'warning', // 100ms threshold
          lag: Math.round(lagMs)
        };
      });

      // Overall status
      const statuses = Object.values(health.checks).map(check => check.status);
      if (statuses.includes('unhealthy')) {
        health.status = 'unhealthy';
      } else if (statuses.includes('warning')) {
        health.status = 'warning';
      }

    } catch (error) {
      health.status = 'unhealthy';
      health.error = error.message;
    }

    // Cache result
    this.healthCache = health;
    this.lastCheck = now;

    return health;
  }

  async getReadiness() {
    try {
      // Simple readiness check
      const health = await this.trojan.getHealth();
      return {
        status: health.overall === 'unhealthy' ? 'not_ready' : 'ready',
        timestamp: new Date().toISOString()
      };
    } catch (error) {
      return {
        status: 'not_ready',
        error: error.message,
        timestamp: new Date().toISOString()
      };
    }
  }
}

export default HealthChecker;

Configuration Management¶

Environment-Based Configuration¶

// config/production.js
export default {
  server: {
    port: process.env.PORT || 3000,
    host: process.env.HOST || '0.0.0.0'
  },

  trojanhorse: {
    sources: ['urlhaus', 'alienvault', 'abuseipdb', 'virustotal'],
    strategy: process.env.THREAT_STRATEGY || 'balanced',

    apiKeys: {
      alienVault: process.env.ALIENVAULT_API_KEY,
      abuseipdb: process.env.ABUSEIPDB_API_KEY,
      virustotal: process.env.VIRUSTOTAL_API_KEY
    },

    security: {
      enforceHttps: true,
      autoLock: true,
      lockTimeout: parseInt(process.env.VAULT_LOCK_TIMEOUT) || 600000
    },

    performance: {
      maxConcurrency: parseInt(process.env.MAX_CONCURRENCY) || 15,
      requestTimeout: parseInt(process.env.REQUEST_TIMEOUT) || 45000,
      retryAttempts: 3,
      circuitBreaker: {
        enabled: true,
        failureThreshold: 5,
        resetTimeout: 60000
      }
    },

    caching: {
      enabled: true,
      type: 'redis',
      redis: {
        url: process.env.REDIS_URL || 'redis://localhost:6379'
      },
      ttl: parseInt(process.env.CACHE_TTL) || 3600000
    },

    logging: {
      level: process.env.LOG_LEVEL || 'info',
      transports: [
        {
          type: 'file',
          filename: './logs/app.log',
          maxSize: '10MB',
          maxFiles: 5
        },
        {
          type: 'console',
          colorize: false
        }
      ]
    }
  }
};

Secrets Management with HashiCorp Vault¶

// config/vault.js
import vault from 'node-vault';

class SecretsManager {
  constructor(options) {
    this.client = vault({
      apiVersion: 'v1',
      endpoint: options.endpoint,
      token: options.token
    });
    this.secretPath = options.secretPath;
  }

  async getSecrets() {
    try {
      const result = await this.client.read(this.secretPath);
      return result.data.data; // Vault v2 KV engine
    } catch (error) {
      console.error('Failed to retrieve secrets:', error.message);
      throw error;
    }
  }

  async rotateSecret(key, newValue) {
    try {
      const secrets = await this.getSecrets();
      secrets[key] = newValue;

      await this.client.write(this.secretPath, { data: secrets });
      console.log(`Secret ${key} rotated successfully`);
    } catch (error) {
      console.error(`Failed to rotate secret ${key}:`, error.message);
      throw error;
    }
  }
}

export default SecretsManager;

Security Hardening¶

Security Headers Middleware¶

// middleware/security.js
import helmet from 'helmet';
import rateLimit from 'express-rate-limit';

// Security headers
export const securityHeaders = helmet({
  contentSecurityPolicy: {
    directives: {
      defaultSrc: ["'self'"],
      styleSrc: ["'self'", "'unsafe-inline'"],
      scriptSrc: ["'self'"],
      imgSrc: ["'self'", "data:", "https:"],
      connectSrc: ["'self'"],
      fontSrc: ["'self'"],
      objectSrc: ["'none'"],
      mediaSrc: ["'self'"],
      frameSrc: ["'none'"],
      baseUri: ["'self'"],
      formAction: ["'self'"]
    }
  },
  hsts: {
    maxAge: 31536000,
    includeSubDomains: true,
    preload: true
  }
});

// Rate limiting
export const rateLimiter = rateLimit({
  windowMs: 15 * 60 * 1000, // 15 minutes
  max: 1000, // Limit each IP to 1000 requests per windowMs
  message: {
    error: 'Too many requests from this IP, please try again later.'
  },
  standardHeaders: true,
  legacyHeaders: false,
  skip: (req) => {
    // Skip rate limiting for health checks
    return req.path === '/health' || req.path === '/ready';
  }
});

// API-specific rate limiting
export const apiRateLimiter = rateLimit({
  windowMs: 5 * 60 * 1000, // 5 minutes
  max: 100, // Limit API calls
  message: {
    error: 'API rate limit exceeded, please try again later.'
  },
  keyGenerator: (req) => {
    // Use API key or IP for rate limiting
    return req.headers['x-api-key'] || req.ip;
  }
});

Input Validation¶

// middleware/validation.js
import Joi from 'joi';

const threatCheckSchema = Joi.object({
  target: Joi.string()
    .required()
    .max(255)
    .pattern(/^[a-zA-Z0-9.-]+$/)
    .messages({
      'string.pattern.base': 'Invalid target format'
    }),
  type: Joi.string()
    .valid('auto', 'domain', 'url', 'ip', 'hash')
    .default('auto')
});

const batchCheckSchema = Joi.object({
  targets: Joi.array()
    .items(Joi.string().max(255))
    .min(1)
    .max(100)
    .required()
});

export const validateThreatCheck = (req, res, next) => {
  const { error, value } = threatCheckSchema.validate(req.body);

  if (error) {
    return res.status(400).json({
      error: 'Validation failed',
      details: error.details.map(d => d.message)
    });
  }

  req.body = value;
  next();
};

export const validateBatchCheck = (req, res, next) => {
  const { error, value } = batchCheckSchema.validate(req.body);

  if (error) {
    return res.status(400).json({
      error: 'Validation failed',
      details: error.details.map(d => d.message)
    });
  }

  req.body = value;
  next();
};

Performance Optimization¶

Connection Pooling¶

// utils/connectionPool.js
import Agent from 'agentkeepalive';
import https from 'https';

// HTTP Keep-Alive agent
export const httpAgent = new Agent({
  keepAlive: true,
  maxSockets: 100,
  maxFreeSockets: 10,
  timeout: 60000,
  freeSocketTimeout: 30000
});

export const httpsAgent = new Agent.HttpsAgent({
  keepAlive: true,
  maxSockets: 100,
  maxFreeSockets: 10,
  timeout: 60000,
  freeSocketTimeout: 30000,
  rejectUnauthorized: true
});

// Global agent configuration
https.globalAgent = httpsAgent;

Memory Management¶

// utils/memoryManager.js
class MemoryManager {
  constructor() {
    this.gcThreshold = 400 * 1024 * 1024; // 400MB
    this.checkInterval = 30000; // 30 seconds
    this.startMonitoring();
  }

  startMonitoring() {
    setInterval(() => {
      this.checkMemoryUsage();
    }, this.checkInterval);
  }

  checkMemoryUsage() {
    const usage = process.memoryUsage();

    if (usage.heapUsed > this.gcThreshold) {
      console.log('Memory threshold exceeded, forcing garbage collection');

      if (global.gc) {
        global.gc();
      }

      // Log memory usage after GC
      const afterGC = process.memoryUsage();
      console.log(`Memory usage: ${Math.round(afterGC.heapUsed / 1024 / 1024)}MB`);
    }
  }

  getMemoryStats() {
    const usage = process.memoryUsage();
    return {
      heapUsed: Math.round(usage.heapUsed / 1024 / 1024),
      heapTotal: Math.round(usage.heapTotal / 1024 / 1024),
      rss: Math.round(usage.rss / 1024 / 1024),
      external: Math.round(usage.external / 1024 / 1024)
    };
  }
}

export default MemoryManager;

Disaster Recovery¶

Backup Strategy¶

#!/bin/bash
# scripts/backup.sh

# Backup configuration and logs
BACKUP_DIR="/backups/trojanhorse/$(date +%Y%m%d_%H%M%S)"
mkdir -p "$BACKUP_DIR"

# Backup configuration
cp -r ./config "$BACKUP_DIR/"
cp ./package*.json "$BACKUP_DIR/"

# Backup logs (last 7 days)
find ./logs -name "*.log" -mtime -7 -exec cp {} "$BACKUP_DIR/" \;

# Backup Redis data (if using Redis for caching)
if command -v redis-cli &> /dev/null; then
    redis-cli BGSAVE
    sleep 5
    cp /var/lib/redis/dump.rdb "$BACKUP_DIR/"
fi

# Compress backup
tar -czf "$BACKUP_DIR.tar.gz" -C "$BACKUP_DIR" .
rm -rf "$BACKUP_DIR"

# Upload to S3 (optional)
if [ -n "$AWS_S3_BUCKET" ]; then
    aws s3 cp "$BACKUP_DIR.tar.gz" "s3://$AWS_S3_BUCKET/backups/"
fi

# Cleanup old backups (keep last 30 days)
find /backups/trojanhorse -name "*.tar.gz" -mtime +30 -delete

echo "Backup completed: $BACKUP_DIR.tar.gz"

Recovery Procedures¶

#!/bin/bash
# scripts/restore.sh

if [ -z "$1" ]; then
    echo "Usage: $0 <backup_file>"
    exit 1
fi

BACKUP_FILE="$1"
RESTORE_DIR="/tmp/trojanhorse_restore"

# Extract backup
mkdir -p "$RESTORE_DIR"
tar -xzf "$BACKUP_FILE" -C "$RESTORE_DIR"

# Stop services
docker-compose down

# Restore configuration
cp -r "$RESTORE_DIR/config" ./

# Restore Redis data
if [ -f "$RESTORE_DIR/dump.rdb" ]; then
    cp "$RESTORE_DIR/dump.rdb" /var/lib/redis/
fi

# Start services
docker-compose up -d

# Cleanup
rm -rf "$RESTORE_DIR"

echo "Restore completed from: $BACKUP_FILE"

Troubleshooting¶

Common Production Issues¶

High Memory Usage¶

// Debugging memory leaks
const v8 = require('v8');
const fs = require('fs');

function takeHeapSnapshot() {
  const filename = `heap-${Date.now()}.heapsnapshot`;
  const heapSnapshot = v8.getHeapSnapshot();
  const fileStream = fs.createWriteStream(filename);
  heapSnapshot.pipe(fileStream);
  console.log(`Heap snapshot saved to ${filename}`);
}

// Take snapshots on high memory usage
process.on('SIGTERM', takeHeapSnapshot);

Feed Connectivity Issues¶

// Network troubleshooting
import { TrojanHorse } from 'trojanhorse-js';

const trojan = new TrojanHorse({
  events: {
    feedError: (error, feedName) => {
      console.error(`Feed ${feedName} error:`, {
        message: error.message,
        code: error.code,
        statusCode: error.statusCode,
        timeout: error.timeout,
        timestamp: new Date().toISOString()
      });

      // Implement exponential backoff
      const backoffTime = Math.min(1000 * Math.pow(2, error.retryCount || 0), 60000);
      setTimeout(() => {
        console.log(`Retrying ${feedName} in ${backoffTime}ms`);
      }, backoffTime);
    }
  }
});

Performance Degradation¶

// Performance monitoring
class PerformanceMonitor {
  constructor() {
    this.metrics = {
      requestTimes: [],
      errorCounts: new Map(),
      memoryUsage: []
    };
  }

  recordRequest(duration, error = null) {
    this.metrics.requestTimes.push({
      duration,
      timestamp: Date.now()
    });

    if (error) {
      const count = this.metrics.errorCounts.get(error.type) || 0;
      this.metrics.errorCounts.set(error.type, count + 1);
    }

    // Keep only last hour of data
    const oneHourAgo = Date.now() - 60 * 60 * 1000;
    this.metrics.requestTimes = this.metrics.requestTimes.filter(
      r => r.timestamp > oneHourAgo
    );
  }

  getStats() {
    const times = this.metrics.requestTimes.map(r => r.duration);
    const avg = times.reduce((a, b) => a + b, 0) / times.length;
    const p95 = times.sort((a, b) => a - b)[Math.floor(times.length * 0.95)];

    return {
      averageResponseTime: Math.round(avg),
      p95ResponseTime: Math.round(p95 || 0),
      requestCount: times.length,
      errorCounts: Object.fromEntries(this.metrics.errorCounts)
    };
  }
}

Maintenance¶

Scheduled Maintenance Tasks¶

#!/bin/bash
# scripts/maintenance.sh

echo "Starting maintenance tasks..."

# Clear old logs (keep last 30 days)
find ./logs -name "*.log" -mtime +30 -delete

# Compact Redis memory (if using Redis)
if command -v redis-cli &> /dev/null; then
    redis-cli MEMORY PURGE
fi

# Update threat feed data
curl -s http://localhost:3000/api/admin/refresh-feeds

# Check disk usage
df -h

# Check memory usage
free -h

# Rotate logs
logrotate /etc/logrotate.d/trojanhorse

echo "Maintenance tasks completed"

Health Check Script¶

#!/bin/bash
# scripts/health-check.sh

API_URL="http://localhost:3000"
EXPECTED_STATUS="healthy"

# Check health endpoint
HEALTH_RESPONSE=$(curl -s "$API_URL/health")
STATUS=$(echo "$HEALTH_RESPONSE" | jq -r '.status')

if [ "$STATUS" != "$EXPECTED_STATUS" ]; then
    echo "Health check failed: $STATUS"
    echo "Response: $HEALTH_RESPONSE"
    exit 1
fi

# Check readiness
READY_RESPONSE=$(curl -s "$API_URL/ready")
READY_STATUS=$(echo "$READY_RESPONSE" | jq -r '.status')

if [ "$READY_STATUS" != "ready" ]; then
    echo "Readiness check failed: $READY_STATUS"
    exit 1
fi

echo "All health checks passed"
exit 0

Next Steps¶

Browser Deployment - Static site integration
Docker Guide - Containerization details
Kubernetes Guide - Orchestration setup
Monitoring Guide - Advanced monitoring

Production deployment questions? Check our troubleshooting guide or join the community discussions.