Production Deployment
Scaling, monitoring, and reliability best practices for production AI systems.
25 min read
Advanced level
Production Infrastructure
Environment Configuration
Production Environment Setup
# docker-compose.prod.yml
version: '3.8'
services:
app:
image: your-app:latest
environment:
NODE_ENV: production
RODGER_API_KEY: ${RODGER_API_KEY}
RODGER_API_URL: https://api.rodger.ai
REDIS_URL: redis://redis:6379
DATABASE_URL: ${DATABASE_URL}
# Health checks
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
# Resource limits
deploy:
resources:
limits:
cpus: '2'
memory: 4G
reservations:
cpus: '1'
memory: 2G
# Scaling configuration
scale: 3
redis:
image: redis:7-alpine
command: redis-server --maxmemory 512mb --maxmemory-policy allkeys-lru
nginx:
image: nginx:alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf
- ./ssl:/etc/nginx/sslLoad Balancing & Caching
Production Application Code
// Production-ready client with caching and resilience
class ProductionRodgerClient {
constructor() {
this.client = new RodgerClient({
apiKey: process.env.RODGER_API_KEY,
apiUrl: process.env.RODGER_API_URL,
timeout: 30000,
retries: 3
});
// Redis for caching
this.cache = new Redis(process.env.REDIS_URL);
// Circuit breaker for resilience
this.circuitBreaker = new CircuitBreaker(this.client.query, {
timeout: 30000,
errorThresholdPercentage: 50,
resetTimeout: 60000
});
}
async query(agentId: string, message: string, options: any = {}) {
// Check cache first for repeated queries
const cacheKey = `query:${agentId}:${hashMessage(message)}`;
const cached = await this.cache.get(cacheKey);
if (cached && !options.skipCache) {
return JSON.parse(cached);
}
try {
// Use circuit breaker for resilient queries
const response = await this.circuitBreaker.fire(agentId, {
message,
...options
});
// Cache successful responses
if (response && !options.skipCache) {
await this.cache.setex(cacheKey, 300, JSON.stringify(response)); // 5min cache
}
return response;
} catch (error) {
// Fallback strategies
if (error.code === 'CIRCUIT_OPEN') {
return this.handleFallbackResponse(agentId, message);
}
throw error;
}
}
async handleFallbackResponse(agentId: string, message: string) {
// Use cached similar responses or default fallback
const fallback = await this.findSimilarCachedResponse(message);
if (fallback) {
return {
...fallback,
metadata: { source: 'cache_fallback' }
};
}
return {
message: "I'm experiencing technical difficulties. Please try again in a moment.",
metadata: { source: 'default_fallback' }
};
}
}Monitoring & Observability
Comprehensive Monitoring Setup
// Set up comprehensive monitoring
const monitoringSetup = {
// Application Performance Monitoring
apm: {
service: 'datadog', // or 'newrelic', 'honeycomb'
config: {
apiKey: process.env.DATADOG_API_KEY,
service: 'rodger-ai-app',
environment: 'production'
}
},
// Custom metrics tracking
metrics: [
{
name: 'rodger.query.duration',
type: 'histogram',
tags: ['agent_id', 'success', 'channel']
},
{
name: 'rodger.escalation.rate',
type: 'gauge',
tags: ['team_id', 'time_of_day']
},
{
name: 'rodger.customer.satisfaction',
type: 'gauge',
tags: ['agent_type', 'channel', 'resolution_type']
}
],
// Alerts configuration
alerts: [
{
name: 'High Error Rate',
condition: 'error_rate > 5%',
timeWindow: '5m',
notification: ['slack://devops', 'email://oncall@company.com']
},
{
name: 'Slow Response Time',
condition: 'avg(response_time) > 3s',
timeWindow: '10m',
notification: ['slack://performance']
},
{
name: 'Low AI Confidence',
condition: 'avg(ai_confidence) < 0.7',
timeWindow: '1h',
notification: ['email://ai-team@company.com']
}
]
};
// Initialize monitoring
await initializeMonitoring(monitoringSetup);Security & Compliance
Production Security Configuration
// Comprehensive security setup for production
const securityConfig = {
// API key management
apiKeyRotation: {
enabled: true,
rotationInterval: '90d',
notifyBeforeExpiry: '7d'
},
// Request validation
inputSanitization: {
maxMessageLength: 2000,
allowedCharacters: /^[a-zA-Z0-9s.,!?-]+$/,
filterProfanity: true,
detectPromptInjection: true
},
// Rate limiting
rateLimiting: {
global: {
rpm: 1000, // Requests per minute
rph: 50000, // Requests per hour
rpd: 1000000 // Requests per day
},
perUser: {
rpm: 60,
burst: 10
},
perIP: {
rpm: 100,
blockDuration: '1h'
}
},
// Data protection
dataProtection: {
encryptSensitiveData: true,
maskPersonalInfo: true,
dataRetention: '2y',
automaticDeletion: true,
gdprCompliant: true
},
// Audit logging
auditLogging: {
enabled: true,
logLevel: 'info',
includeRequestData: false, // Don't log user messages
includeResponseData: false,
retentionDays: 90
}
};
// Apply security configuration
await client.configureSecuritySettings(securityConfig);Scaling Strategies
Horizontal Scaling
- Deploy multiple application instances
- Use load balancers for traffic distribution
- Implement session affinity for conversations
- Scale based on concurrent conversations
Auto-scaling Configuration
Kubernetes Auto-scaling
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: rodger-app-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: rodger-app
minReplicas: 3
maxReplicas: 20
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
behavior:
scaleUp:
stabilizationWindowSeconds: 60
policies:
- type: Percent
value: 100
periodSeconds: 60
scaleDown:
stabilizationWindowSeconds: 300Disaster Recovery
Backup & Recovery Strategy
// Implement comprehensive backup strategy
const backupConfig = {
// Conversation data backup
conversationBackup: {
frequency: 'hourly',
retention: '90d',
encryption: true,
destinations: ['s3://backups/conversations', 'gcs://backup-bucket']
},
// Knowledge base backup
knowledgeBackup: {
frequency: 'daily',
includeEmbeddings: true,
compression: true,
versioning: true
},
// Configuration backup
configBackup: {
frequency: 'on_change',
includeSecrets: false, // Secrets managed separately
gitRepository: 'git://config-repo.git'
},
// Disaster recovery procedures
recoveryProcedures: {
rpo: '1h', // Recovery Point Objective
rto: '15m', // Recovery Time Objective
failoverSteps: [
'switch_dns_to_backup_region',
'restore_latest_backup',
'validate_service_health',
'notify_stakeholders'
]
}
};
// Automated backup execution
cron.schedule('0 * * * *', async () => {
await executeBackup(backupConfig);
});