Production Deployment

Scaling, monitoring, and reliability best practices for production AI systems.

25 min read
Advanced level

Production Infrastructure

Environment Configuration

Production Environment Setup
# docker-compose.prod.yml
version: '3.8'
services:
  app:
    image: your-app:latest
    environment:
      NODE_ENV: production
      RODGER_API_KEY: ${RODGER_API_KEY}
      RODGER_API_URL: https://api.rodger.ai
      REDIS_URL: redis://redis:6379
      DATABASE_URL: ${DATABASE_URL}
    
    # Health checks
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
    
    # Resource limits
    deploy:
      resources:
        limits:
          cpus: '2'
          memory: 4G
        reservations:
          cpus: '1'
          memory: 2G
    
    # Scaling configuration
    scale: 3

  redis:
    image: redis:7-alpine
    command: redis-server --maxmemory 512mb --maxmemory-policy allkeys-lru
    
  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
      - ./ssl:/etc/nginx/ssl

Load Balancing & Caching

Production Application Code
// Production-ready client with caching and resilience
class ProductionRodgerClient {
  constructor() {
    this.client = new RodgerClient({
      apiKey: process.env.RODGER_API_KEY,
      apiUrl: process.env.RODGER_API_URL,
      timeout: 30000,
      retries: 3
    });

    // Redis for caching
    this.cache = new Redis(process.env.REDIS_URL);
    
    // Circuit breaker for resilience
    this.circuitBreaker = new CircuitBreaker(this.client.query, {
      timeout: 30000,
      errorThresholdPercentage: 50,
      resetTimeout: 60000
    });
  }

  async query(agentId: string, message: string, options: any = {}) {
    // Check cache first for repeated queries
    const cacheKey = `query:${agentId}:${hashMessage(message)}`;
    const cached = await this.cache.get(cacheKey);
    
    if (cached && !options.skipCache) {
      return JSON.parse(cached);
    }

    try {
      // Use circuit breaker for resilient queries
      const response = await this.circuitBreaker.fire(agentId, {
        message,
        ...options
      });

      // Cache successful responses
      if (response && !options.skipCache) {
        await this.cache.setex(cacheKey, 300, JSON.stringify(response)); // 5min cache
      }

      return response;
    } catch (error) {
      // Fallback strategies
      if (error.code === 'CIRCUIT_OPEN') {
        return this.handleFallbackResponse(agentId, message);
      }
      throw error;
    }
  }

  async handleFallbackResponse(agentId: string, message: string) {
    // Use cached similar responses or default fallback
    const fallback = await this.findSimilarCachedResponse(message);
    
    if (fallback) {
      return {
        ...fallback,
        metadata: { source: 'cache_fallback' }
      };
    }

    return {
      message: "I'm experiencing technical difficulties. Please try again in a moment.",
      metadata: { source: 'default_fallback' }
    };
  }
}

Monitoring & Observability

Comprehensive Monitoring Setup
// Set up comprehensive monitoring
const monitoringSetup = {
  // Application Performance Monitoring
  apm: {
    service: 'datadog', // or 'newrelic', 'honeycomb'
    config: {
      apiKey: process.env.DATADOG_API_KEY,
      service: 'rodger-ai-app',
      environment: 'production'
    }
  },

  // Custom metrics tracking
  metrics: [
    {
      name: 'rodger.query.duration',
      type: 'histogram',
      tags: ['agent_id', 'success', 'channel']
    },
    {
      name: 'rodger.escalation.rate', 
      type: 'gauge',
      tags: ['team_id', 'time_of_day']
    },
    {
      name: 'rodger.customer.satisfaction',
      type: 'gauge',
      tags: ['agent_type', 'channel', 'resolution_type']
    }
  ],

  // Alerts configuration
  alerts: [
    {
      name: 'High Error Rate',
      condition: 'error_rate > 5%',
      timeWindow: '5m',
      notification: ['slack://devops', 'email://oncall@company.com']
    },
    {
      name: 'Slow Response Time',
      condition: 'avg(response_time) > 3s',
      timeWindow: '10m',
      notification: ['slack://performance']
    },
    {
      name: 'Low AI Confidence',
      condition: 'avg(ai_confidence) < 0.7',
      timeWindow: '1h',
      notification: ['email://ai-team@company.com']
    }
  ]
};

// Initialize monitoring
await initializeMonitoring(monitoringSetup);

Security & Compliance

Production Security Configuration
// Comprehensive security setup for production
const securityConfig = {
  // API key management
  apiKeyRotation: {
    enabled: true,
    rotationInterval: '90d',
    notifyBeforeExpiry: '7d'
  },

  // Request validation
  inputSanitization: {
    maxMessageLength: 2000,
    allowedCharacters: /^[a-zA-Z0-9s.,!?-]+$/,
    filterProfanity: true,
    detectPromptInjection: true
  },

  // Rate limiting
  rateLimiting: {
    global: {
      rpm: 1000,          // Requests per minute
      rph: 50000,         // Requests per hour
      rpd: 1000000        // Requests per day
    },
    perUser: {
      rpm: 60,
      burst: 10
    },
    perIP: {
      rpm: 100,
      blockDuration: '1h'
    }
  },

  // Data protection
  dataProtection: {
    encryptSensitiveData: true,
    maskPersonalInfo: true,
    dataRetention: '2y',
    automaticDeletion: true,
    gdprCompliant: true
  },

  // Audit logging
  auditLogging: {
    enabled: true,
    logLevel: 'info',
    includeRequestData: false, // Don't log user messages
    includeResponseData: false,
    retentionDays: 90
  }
};

// Apply security configuration
await client.configureSecuritySettings(securityConfig);

Scaling Strategies

Horizontal Scaling

  • Deploy multiple application instances
  • Use load balancers for traffic distribution
  • Implement session affinity for conversations
  • Scale based on concurrent conversations

Auto-scaling Configuration

Kubernetes Auto-scaling
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: rodger-app-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: rodger-app
  minReplicas: 3
  maxReplicas: 20
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource  
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  behavior:
    scaleUp:
      stabilizationWindowSeconds: 60
      policies:
      - type: Percent
        value: 100
        periodSeconds: 60
    scaleDown:
      stabilizationWindowSeconds: 300

Disaster Recovery

Backup & Recovery Strategy
// Implement comprehensive backup strategy
const backupConfig = {
  // Conversation data backup
  conversationBackup: {
    frequency: 'hourly',
    retention: '90d',
    encryption: true,
    destinations: ['s3://backups/conversations', 'gcs://backup-bucket']
  },

  // Knowledge base backup
  knowledgeBackup: {
    frequency: 'daily',
    includeEmbeddings: true,
    compression: true,
    versioning: true
  },

  // Configuration backup
  configBackup: {
    frequency: 'on_change',
    includeSecrets: false, // Secrets managed separately
    gitRepository: 'git://config-repo.git'
  },

  // Disaster recovery procedures
  recoveryProcedures: {
    rpo: '1h',  // Recovery Point Objective
    rto: '15m', // Recovery Time Objective
    
    failoverSteps: [
      'switch_dns_to_backup_region',
      'restore_latest_backup', 
      'validate_service_health',
      'notify_stakeholders'
    ]
  }
};

// Automated backup execution
cron.schedule('0 * * * *', async () => {
  await executeBackup(backupConfig);
});