Docker Compose for Production: Patterns That Actually Work

Docker Compose isn’t just for development. With the right patterns, it’s a legitimate production deployment tool for small-to-medium workloads. Here’s how to do it without the footguns.

Base Structure

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
# docker-compose.yml
version: "3.8"

services:
  app:
    image: myapp:${VERSION:-latest}
    restart: unless-stopped
    environment:
      - NODE_ENV=production
    deploy:
      resources:
        limits:
          cpus: '2'
          memory: 1G
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

Key elements:

Explicit image tags (not latest in prod)
Restart policy for resilience
Resource limits to prevent runaway containers
Health checks for orchestration

Environment Management

Multiple Environments

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
# docker-compose.yml (base)
services:
  app:
    image: myapp:${VERSION}
    environment:
      - DATABASE_URL
      - REDIS_URL

# docker-compose.prod.yml (production additions)
services:
  app:
    restart: unless-stopped
    deploy:
      replicas: 3
      resources:
        limits:
          memory: 2G
    logging:
      driver: "json-file"
      options:
        max-size: "100m"
        max-file: "5"

1
2
3
4
5
# Development (uses override automatically)
docker compose up

# Production
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d

Secrets Management

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
services:
  app:
    secrets:
      - db_password
      - api_key

secrets:
  db_password:
    file: ./secrets/db_password.txt
  api_key:
    external: true  # Created via `docker secret create`

1
2
3
# Read secret in application
with open('/run/secrets/db_password') as f:
    db_password = f.read().strip()

Environment Files

1
2
3
4
5
services:
  app:
    env_file:
      - .env.common
      - .env.${ENVIRONMENT:-dev}

1
2
3
# .env.prod
DATABASE_URL=postgres://prod-db:5432/app
LOG_LEVEL=warn

Networking

Internal Services

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
services:
  app:
    networks:
      - frontend
      - backend
  
  api:
    networks:
      - backend  # Not exposed to frontend
  
  db:
    networks:
      - backend

networks:
  frontend:
  backend:
    internal: true  # No external access

Reverse Proxy

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
services:
  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf:ro
      - ./certs:/etc/nginx/certs:ro
    depends_on:
      - app
    networks:
      - frontend

  app:
    expose:
      - "3000"  # Internal only, not published
    networks:
      - frontend

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
# nginx.conf
upstream app {
    server app:3000;
}

server {
    listen 443 ssl;
    ssl_certificate /etc/nginx/certs/cert.pem;
    ssl_certificate_key /etc/nginx/certs/key.pem;
    
    location / {
        proxy_pass http://app;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
    }
}

Persistence

Named Volumes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
services:
  db:
    image: postgres:15
    volumes:
      - postgres_data:/var/lib/postgresql/data

volumes:
  postgres_data:
    driver: local
    driver_opts:
      type: none
      o: bind
      device: /data/postgres  # Specific host path

Backup Strategy

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
services:
  backup:
    image: postgres:15
    volumes:
      - postgres_data:/source:ro
      - ./backups:/backups
    command: >
      sh -c "pg_dump -h db -U postgres mydb > /backups/backup_$$(date +%Y%m%d).sql"
    profiles:
      - backup  # Only runs when explicitly called

1
2
# Run backup
docker compose --profile backup run --rm backup

Logging

Centralized Logging

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
services:
  app:
    logging:
      driver: "fluentd"
      options:
        fluentd-address: localhost:24224
        tag: app.{{.Name}}

  fluentd:
    image: fluent/fluentd:v1.16
    volumes:
      - ./fluentd.conf:/fluentd/etc/fluent.conf
    ports:
      - "24224:24224"

JSON Logging with Limits

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
services:
  app:
    logging:
      driver: "json-file"
      options:
        max-size: "50m"
        max-file: "10"
        labels: "service,environment"
    labels:
      service: "myapp"
      environment: "production"

Health Checks & Dependencies

Proper Startup Order

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
services:
  app:
    depends_on:
      db:
        condition: service_healthy
      redis:
        condition: service_healthy

  db:
    image: postgres:15
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 10s
      timeout: 5s
      retries: 5

  redis:
    image: redis:7
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 5s
      retries: 5

Application Health Check

1
2
3
4
5
6
7
8
services:
  app:
    healthcheck:
      test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s  # Grace period for startup

Zero-Downtime Deploys

Rolling Update Script

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
#!/bin/bash
set -e

# Pull new image
docker compose pull app

# Scale up new instances
docker compose up -d --scale app=2 --no-recreate

# Wait for new instance to be healthy
sleep 30

# Remove old instance
docker compose up -d --scale app=1

echo "Deploy complete"

Blue-Green with Traefik

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
services:
  traefik:
    image: traefik:v2.10
    command:
      - "--providers.docker=true"
      - "--providers.docker.exposedbydefault=false"
    ports:
      - "80:80"
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro

  app:
    image: myapp:${VERSION}
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.app.rule=Host(`app.example.com`)"
      - "traefik.http.services.app.loadbalancer.healthcheck.path=/health"
      - "traefik.http.services.app.loadbalancer.healthcheck.interval=10s"

Monitoring

Prometheus + Grafana Stack

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
services:
  prometheus:
    image: prom/prometheus:latest
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.retention.time=15d'

  grafana:
    image: grafana/grafana:latest
    volumes:
      - grafana_data:/var/lib/grafana
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
    depends_on:
      - prometheus

  cadvisor:
    image: gcr.io/cadvisor/cadvisor:latest
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:ro
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
    privileged: true

volumes:
  prometheus_data:
  grafana_data:

Security

Read-Only Containers

1
2
3
4
5
6
7
8
services:
  app:
    read_only: true
    tmpfs:
      - /tmp
      - /var/run
    security_opt:
      - no-new-privileges:true

Non-Root User

1
2
3
4
# Dockerfile
FROM node:20-slim
RUN useradd -r -u 1001 appuser
USER appuser

1
2
3
services:
  app:
    user: "1001:1001"

Limit Capabilities

1
2
3
4
5
6
services:
  app:
    cap_drop:
      - ALL
    cap_add:
      - NET_BIND_SERVICE  # Only if needed

Production Checklist

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# production-ready-compose.yml
version: "3.8"

services:
  app:
    image: myapp:${VERSION}  # ✓ Pinned version
    restart: unless-stopped   # ✓ Restart policy
    read_only: true           # ✓ Security
    user: "1001:1001"         # ✓ Non-root
    
    deploy:
      resources:
        limits:
          cpus: '2'
          memory: 1G
        reservations:
          memory: 512M
    
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    
    logging:
      driver: "json-file"
      options:
        max-size: "100m"
        max-file: "5"
    
    security_opt:
      - no-new-privileges:true
    
    cap_drop:
      - ALL
    
    tmpfs:
      - /tmp
    
    depends_on:
      db:
        condition: service_healthy

When to Graduate

Docker Compose works great until:

You need multi-host deployments
Auto-scaling based on metrics
Complex service mesh requirements
Hundreds of containers

Then consider: Docker Swarm, Kubernetes, or managed container services (ECS, Cloud Run).

Start Here

Today: Add health checks to all services
This week: Implement proper logging with rotation
This month: Add resource limits and security hardening
This quarter: Set up monitoring stack

Docker Compose in production isn’t a compromise — it’s pragmatism. Not everything needs Kubernetes.

The best infrastructure is the one you can understand, debug, and maintain at 3am.

Base Structure#

Environment Management#

Multiple Environments#

Secrets Management#

Environment Files#

Networking#

Internal Services#

Reverse Proxy#

Persistence#

Named Volumes#

Backup Strategy#

Logging#

Centralized Logging#

JSON Logging with Limits#

Health Checks & Dependencies#

Proper Startup Order#

Application Health Check#

Zero-Downtime Deploys#

Rolling Update Script#

Blue-Green with Traefik#

Monitoring#

Prometheus + Grafana Stack#

Security#

Read-Only Containers#

Non-Root User#

Limit Capabilities#

Production Checklist#

When to Graduate#

Start Here#

📬 Get the Newsletter