Production Deployment

This guide covers deploying ConduitLLM in production environments using Kubernetes, Docker, and cloud platforms with proper monitoring, scaling, and security configurations.

Overview

Production deployment considerations:

High availability and fault tolerance
Horizontal scaling capabilities
Comprehensive monitoring and alerting
Security hardening
Performance optimization
Disaster recovery

Docker Deployment

Production Docker Image

Multi-stage Dockerfile for optimized production image:

# Build stage
FROM mcr.microsoft.com/dotnet/sdk:9.0 AS build
WORKDIR /src

# Copy and restore dependencies
COPY ["ConduitLLM.Http/ConduitLLM.Http.csproj", "ConduitLLM.Http/"]
COPY ["ConduitLLM.Core/ConduitLLM.Core.csproj", "ConduitLLM.Core/"]
COPY ["ConduitLLM.Providers/ConduitLLM.Providers.csproj", "ConduitLLM.Providers/"]
RUN dotnet restore "ConduitLLM.Http/ConduitLLM.Http.csproj"

# Copy and build
COPY . .
WORKDIR "/src/ConduitLLM.Http"
RUN dotnet build "ConduitLLM.Http.csproj" -c Release -o /app/build

# Publish
FROM build AS publish
RUN dotnet publish "ConduitLLM.Http.csproj" -c Release -o /app/publish /p:UseAppHost=false

# Runtime stage
FROM mcr.microsoft.com/dotnet/aspnet:9.0-alpine AS final
WORKDIR /app

# Install dependencies for health checks
RUN apk add --no-cache curl

# Create non-root user
RUN addgroup -g 1000 conduit && \
    adduser -u 1000 -G conduit -s /bin/sh -D conduit

# Copy published app
COPY --from=publish /app/publish .
RUN chown -R conduit:conduit /app

# Security hardening
RUN chmod -R 550 /app

USER conduit
EXPOSE 8080
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
  CMD curl -f http://localhost:8080/health/ready || exit 1

ENTRYPOINT ["dotnet", "ConduitLLM.Http.dll"]

Docker Compose Production

version: '3.8'

services:
  api:
    image: conduit:latest
    restart: always
    environment:
      - ASPNETCORE_ENVIRONMENT=Production
      - ASPNETCORE_URLS=http://+:8080
      - ConnectionStrings__DefaultConnection=${DB_CONNECTION}
      - ConnectionStrings__Redis=${REDIS_CONNECTION}
      - Monitoring__Prometheus__Enabled=true
    ports:
      - "8080:8080"
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    deploy:
      resources:
        limits:
          cpus: '2'
          memory: 2G
        reservations:
          cpus: '1'
          memory: 1G
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"

  admin:
    image: conduit-admin:latest
    restart: always
    environment:
      - ASPNETCORE_ENVIRONMENT=Production
      - MasterKey=${ADMIN_MASTER_KEY}
    ports:
      - "8081:8080"
    depends_on:
      - api

  postgres:
    image: postgres:16-alpine
    restart: always
    environment:
      - POSTGRES_DB=conduit
      - POSTGRES_USER=${DB_USER}
      - POSTGRES_PASSWORD=${DB_PASSWORD}
      - POSTGRES_INITDB_ARGS=--auth-host=scram-sha-256
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./init-scripts:/docker-entrypoint-initdb.d
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
      interval: 10s
      timeout: 5s
      retries: 5
    deploy:
      resources:
        limits:
          cpus: '1'
          memory: 1G

  redis:
    image: redis:7-alpine
    restart: always
    command: >
      redis-server
      --requirepass ${REDIS_PASSWORD}
      --maxmemory 512mb
      --maxmemory-policy allkeys-lru
      --save 900 1
      --save 300 10
      --save 60 10000
    volumes:
      - redis_data:/data
    healthcheck:
      test: ["CMD", "redis-cli", "--auth", "${REDIS_PASSWORD}", "ping"]
      interval: 10s
      timeout: 5s
      retries: 5

  prometheus:
    image: prom/prometheus:latest
    restart: always
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--storage.tsdb.retention.time=30d'
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    ports:
      - "9090:9090"

  grafana:
    image: grafana/grafana:latest
    restart: always
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
      - GF_INSTALL_PLUGINS=redis-datasource
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/dashboards:/etc/grafana/provisioning/dashboards
      - ./grafana/datasources:/etc/grafana/provisioning/datasources
    ports:
      - "3000:3000"
    depends_on:
      - prometheus

volumes:
  postgres_data:
  redis_data:
  prometheus_data:
  grafana_data:

networks:
  default:
    driver: bridge
    ipam:
      config:
        - subnet: 172.20.0.0/16

Kubernetes Deployment

Namespace and ConfigMap

apiVersion: v1
kind: Namespace
metadata:
  name: conduit

---
apiVersion: v1
kind: ConfigMap
metadata:
  name: conduit-config
  namespace: conduit
data:
  appsettings.Production.json: |
    {
      "Logging": {
        "LogLevel": {
          "Default": "Information",
          "Microsoft.AspNetCore": "Warning"
        }
      },
      "HealthChecks": {
        "Enabled": true
      },
      "Monitoring": {
        "Prometheus": {
          "Enabled": true,
          "Endpoint": "/metrics"
        }
      }
    }

Secrets

apiVersion: v1
kind: Secret
metadata:
  name: conduit-secrets
  namespace: conduit
type: Opaque
data:
  db-connection: <base64-encoded-connection-string>
  redis-connection: <base64-encoded-redis-connection>
  master-key: <base64-encoded-master-key>

API Deployment

apiVersion: apps/v1
kind: Deployment
metadata:
  name: conduit-api
  namespace: conduit
spec:
  replicas: 3
  selector:
    matchLabels:
      app: conduit-api
  template:
    metadata:
      labels:
        app: conduit-api
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
    spec:
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchExpressions:
              - key: app
                operator: In
                values:
                - conduit-api
            topologyKey: kubernetes.io/hostname
      
      containers:
      - name: api
        image: your-registry/conduit:latest
        imagePullPolicy: Always
        ports:
        - containerPort: 8080
          name: http
        
        env:
        - name: ASPNETCORE_ENVIRONMENT
          value: "Production"
        - name: ConnectionStrings__DefaultConnection
          valueFrom:
            secretKeyRef:
              name: conduit-secrets
              key: db-connection
        - name: ConnectionStrings__Redis
          valueFrom:
            secretKeyRef:
              name: conduit-secrets
              key: redis-connection
        
        resources:
          requests:
            memory: "1Gi"
            cpu: "500m"
          limits:
            memory: "2Gi"
            cpu: "2000m"
        
        livenessProbe:
          httpGet:
            path: /health/live
            port: 8080
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
        
        readinessProbe:
          httpGet:
            path: /health/ready
            port: 8080
          initialDelaySeconds: 20
          periodSeconds: 10
          timeoutSeconds: 10
          failureThreshold: 3
        
        volumeMounts:
        - name: config
          mountPath: /app/appsettings.Production.json
          subPath: appsettings.Production.json
      
      volumes:
      - name: config
        configMap:
          name: conduit-config

Service and Ingress

apiVersion: v1
kind: Service
metadata:
  name: conduit-api
  namespace: conduit
spec:
  selector:
    app: conduit-api
  ports:
  - port: 80
    targetPort: 8080
    name: http
  type: ClusterIP

---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: conduit-api
  namespace: conduit
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt-prod
    nginx.ingress.kubernetes.io/rate-limit: "100"
    nginx.ingress.kubernetes.io/proxy-body-size: "50m"
    nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
spec:
  ingressClassName: nginx
  tls:
  - hosts:
    - api.conduit.example.com
    secretName: conduit-tls
  rules:
  - host: api.conduit.example.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: conduit-api
            port:
              number: 80

Horizontal Pod Autoscaler

apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: conduit-api-hpa
  namespace: conduit
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: conduit-api
  minReplicas: 3
  maxReplicas: 20
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80
  - type: Pods
    pods:
      metric:
        name: conduit_active_requests
      target:
        type: AverageValue
        averageValue: "100"
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
      - type: Percent
        value: 10
        periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 60
      policies:
      - type: Percent
        value: 100
        periodSeconds: 60
      - type: Pods
        value: 4
        periodSeconds: 60

Database Setup

apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
  name: postgres-cluster
  namespace: conduit
spec:
  instances: 3
  
  postgresql:
    parameters:
      max_connections: "200"
      shared_buffers: "256MB"
      effective_cache_size: "1GB"
      
  bootstrap:
    initdb:
      database: conduit
      owner: conduit
      secret:
        name: postgres-credentials
  
  monitoring:
    enabled: true
    customQueries:
      - name: "conduit_queries"
        query: |
          SELECT query, calls, mean_exec_time
          FROM pg_stat_statements
          WHERE query LIKE '%conduit%'
        
  storage:
    size: 100Gi
    storageClass: fast-ssd

Monitoring Stack

Prometheus Configuration

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: conduit
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      evaluation_interval: 15s
    
    scrape_configs:
    - job_name: 'conduit-api'
      kubernetes_sd_configs:
      - role: pod
        namespaces:
          names:
          - conduit
      relabel_configs:
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
        action: replace
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
        target_label: __address__
    
    rule_files:
    - '/etc/prometheus/rules/*.yml'
    
    alerting:
      alertmanagers:
      - static_configs:
        - targets:
          - alertmanager:9093

Grafana Dashboards

Deploy pre-configured dashboards:

apiVersion: v1
kind: ConfigMap
metadata:
  name: grafana-dashboards
  namespace: conduit
data:
  conduit-overview.json: |
    {
      "dashboard": {
        "title": "ConduitLLM Overview",
        "panels": [
          {
            "title": "Request Rate",
            "targets": [
              {
                "expr": "rate(conduit_llm_requests_total[5m])"
              }
            ]
          },
          {
            "title": "Response Time",
            "targets": [
              {
                "expr": "histogram_quantile(0.95, conduit_llm_request_duration_seconds_bucket)"
              }
            ]
          }
        ]
      }
    }

Security Hardening

Network Policies

apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: conduit-api-netpol
  namespace: conduit
spec:
  podSelector:
    matchLabels:
      app: conduit-api
  policyTypes:
  - Ingress
  - Egress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          name: ingress-nginx
    - podSelector:
        matchLabels:
          app: prometheus
    ports:
    - protocol: TCP
      port: 8080
  egress:
  - to:
    - podSelector:
        matchLabels:
          app: postgres
    ports:
    - protocol: TCP
      port: 5432
  - to:
    - podSelector:
        matchLabels:
          app: redis
    ports:
    - protocol: TCP
      port: 6379
  - to:
    - namespaceSelector: {}
      podSelector:
        matchLabels:
          k8s-app: kube-dns
    ports:
    - protocol: UDP
      port: 53

Pod Security Policy

apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
  name: conduit-api-pdb
  namespace: conduit
spec:
  minAvailable: 2
  selector:
    matchLabels:
      app: conduit-api

Disaster Recovery

Backup Strategy

apiVersion: batch/v1
kind: CronJob
metadata:
  name: postgres-backup
  namespace: conduit
spec:
  schedule: "0 2 * * *"
  jobTemplate:
    spec:
      template:
        spec:
          containers:
          - name: backup
            image: postgres:16-alpine
            command:
            - /bin/bash
            - -c
            - |
              pg_dump $DATABASE_URL | gzip > /backup/conduit-$(date +%Y%m%d-%H%M%S).sql.gz
              # Upload to S3
              aws s3 cp /backup/conduit-*.sql.gz s3://conduit-backups/
              # Keep only last 30 days
              find /backup -name "*.sql.gz" -mtime +30 -delete
            env:
            - name: DATABASE_URL
              valueFrom:
                secretKeyRef:
                  name: conduit-secrets
                  key: db-connection
            volumeMounts:
            - name: backup
              mountPath: /backup
          volumes:
          - name: backup
            persistentVolumeClaim:
              claimName: backup-pvc
          restartPolicy: OnFailure

Performance Optimization

Connection Pooling

// In appsettings.Production.json
{
  "ConnectionStrings": {
    "DefaultConnection": "Host=postgres;Database=conduit;Username=conduit;Password=xxx;Maximum Pool Size=100;Connection Idle Lifetime=300"
  },
  "Redis": {
    "Configuration": "redis:6379,password=xxx,connectTimeout=5000,syncTimeout=5000,abortConnect=false,connectRetry=3"
  }
}

Caching Configuration

services.AddStackExchangeRedisCache(options =>
{
    options.Configuration = Configuration.GetConnectionString("Redis");
    options.InstanceName = "conduit";
});

services.AddMemoryCache(options =>
{
    options.SizeLimit = 1024 * 1024 * 100; // 100MB
});

Observability

Distributed Tracing

apiVersion: v1
kind: Service
metadata:
  name: jaeger
  namespace: conduit
spec:
  ports:
  - name: collector
    port: 14268
    targetPort: 14268
  - name: query
    port: 16686
    targetPort: 16686
  selector:
    app: jaeger

Log Aggregation

apiVersion: v1
kind: ConfigMap
metadata:
  name: fluent-bit-config
  namespace: conduit
data:
  fluent-bit.conf: |
    [SERVICE]
        Flush         1
        Log_Level     info
        Daemon        off

    [INPUT]
        Name              tail
        Path              /var/log/containers/*conduit*.log
        Parser            docker
        Tag               conduit.*
        Refresh_Interval  5

    [OUTPUT]
        Name              es
        Match             conduit.*
        Host              elasticsearch
        Port              9200
        Index             conduit
        Type              _doc

Deployment Checklist

Next Steps

Health Checks - Configure health monitoring
Metrics Monitoring - Set up Prometheus
Runbooks - Operational procedures

Overview​

Docker Deployment​

Production Docker Image​

Docker Compose Production​

Kubernetes Deployment​

Namespace and ConfigMap​

Secrets​

API Deployment​

Service and Ingress​

Horizontal Pod Autoscaler​

Database Setup​

Monitoring Stack​

Prometheus Configuration​

Grafana Dashboards​

Security Hardening​

Network Policies​

Pod Security Policy​

Disaster Recovery​

Backup Strategy​

Performance Optimization​

Connection Pooling​

Caching Configuration​

Observability​

Distributed Tracing​

Log Aggregation​

Deployment Checklist​

Next Steps​