Docker Compose tutorials show you docker compose up. Production requires health checks, resource limits, proper logging, restart policies, and deployment strategies. Here’s how to bridge that gap.

Base Configuration

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
# docker-compose.yml
version: "3.8"

services:
  app:
    image: myapp:${VERSION:-latest}
    build:
      context: .
      dockerfile: Dockerfile
    restart: unless-stopped
    environment:
      - NODE_ENV=production
    env_file:
      - .env
    ports:
      - "3000:3000"

This is a starting point. Let’s make it production-ready.

Health Checks

Don’t assume a running container is healthy:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
services:
  app:
    image: myapp:latest
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    
  postgres:
    image: postgres:15
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 10s
      timeout: 5s
      retries: 5

  redis:
    image: redis:7-alpine
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 5s
      retries: 3

Depends On with Health

1
2
3
4
5
6
7
services:
  app:
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy

Now the app waits for dependencies to be actually ready, not just running.

Resource Limits

Prevent a single container from consuming everything:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
services:
  app:
    deploy:
      resources:
        limits:
          cpus: '2'
          memory: 2G
        reservations:
          cpus: '0.5'
          memory: 512M
    
  postgres:
    deploy:
      resources:
        limits:
          cpus: '4'
          memory: 4G
        reservations:
          cpus: '1'
          memory: 1G

Note: Resource limits require docker compose (v2) or docker-compose with --compatibility flag.

Logging Configuration

Default logging can fill your disk:

1
2
3
4
5
6
7
8
services:
  app:
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
        compress: "true"

Centralized Logging

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
services:
  app:
    logging:
      driver: "fluentd"
      options:
        fluentd-address: "localhost:24224"
        tag: "app.{{.Name}}"

  # Or syslog
  app-syslog:
    logging:
      driver: "syslog"
      options:
        syslog-address: "udp://logs.example.com:514"
        tag: "myapp"

Restart Policies

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
services:
  # Always restart (except manual stop)
  app:
    restart: unless-stopped
  
  # Always restart, period
  critical-service:
    restart: always
  
  # Restart on failure only
  worker:
    restart: on-failure
    deploy:
      restart_policy:
        condition: on-failure
        delay: 5s
        max_attempts: 3
        window: 120s

Networking

Internal Services

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
services:
  app:
    networks:
      - frontend
      - backend
    ports:
      - "443:3000"  # Exposed to host
  
  postgres:
    networks:
      - backend
    # No ports exposed - internal only
  
  redis:
    networks:
      - backend

networks:
  frontend:
    driver: bridge
  backend:
    driver: bridge
    internal: true  # No external access

Static IPs (When Needed)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
services:
  app:
    networks:
      backend:
        ipv4_address: 172.20.0.10

networks:
  backend:
    driver: bridge
    ipam:
      config:
        - subnet: 172.20.0.0/16

Volumes and Data

Named Volumes (Preferred)

1
2
3
4
5
6
7
8
services:
  postgres:
    volumes:
      - postgres_data:/var/lib/postgresql/data

volumes:
  postgres_data:
    driver: local

Bind Mounts for Config

1
2
3
4
5
6
services:
  nginx:
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf:ro
      - ./certs:/etc/nginx/certs:ro
      - static_files:/var/www/static:ro

Backup-Friendly Setup

1
2
3
4
5
6
7
volumes:
  postgres_data:
    driver: local
    driver_opts:
      type: none
      o: bind
      device: /data/postgres  # Known location on host

Environment Management

Multiple Environments

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
# docker-compose.yml (base)
services:
  app:
    image: myapp:latest
    environment:
      - LOG_LEVEL=info

# docker-compose.override.yml (dev - auto-loaded)
services:
  app:
    build: .
    volumes:
      - .:/app
    environment:
      - LOG_LEVEL=debug

# docker-compose.prod.yml
services:
  app:
    image: myapp:${VERSION}
    deploy:
      replicas: 3
1
2
3
4
5
# Development (loads override automatically)
docker compose up

# Production
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d

Secret Management

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
services:
  app:
    secrets:
      - db_password
      - api_key
    environment:
      - DB_PASSWORD_FILE=/run/secrets/db_password

secrets:
  db_password:
    file: ./secrets/db_password.txt
  api_key:
    external: true  # Created outside compose

Zero-Downtime Deployments

Rolling Updates

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
services:
  app:
    deploy:
      replicas: 3
      update_config:
        parallelism: 1
        delay: 10s
        failure_action: rollback
        order: start-first  # Start new before stopping old
      rollback_config:
        parallelism: 1
        delay: 10s

Blue-Green with Compose

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
#!/bin/bash
# deploy.sh

CURRENT=$(docker compose ps -q app | head -1)
NEW_VERSION=$1

# Start new version alongside old
VERSION=$NEW_VERSION docker compose up -d --no-deps --scale app=2 app

# Wait for health check
sleep 30

# Remove old container
docker stop $CURRENT
docker rm $CURRENT

# Scale back to 1
docker compose up -d --no-deps --scale app=1 app

Monitoring Integration

Prometheus Metrics

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
services:
  app:
    labels:
      - "prometheus.scrape=true"
      - "prometheus.port=9090"
      - "prometheus.path=/metrics"

  prometheus:
    image: prom/prometheus
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.retention.time=15d'

Container Metrics with cAdvisor

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
services:
  cadvisor:
    image: gcr.io/cadvisor/cadvisor:latest
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:ro
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
    ports:
      - "8080:8080"

Complete Production Example

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
version: "3.8"

services:
  app:
    image: myapp:${VERSION:-latest}
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_healthy
    environment:
      - NODE_ENV=production
      - DATABASE_URL=postgres://app:${DB_PASSWORD}@postgres:5432/app
      - REDIS_URL=redis://redis:6379
    deploy:
      resources:
        limits:
          cpus: '2'
          memory: 2G
        reservations:
          memory: 512M
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
    networks:
      - frontend
      - backend

  postgres:
    image: postgres:15-alpine
    restart: unless-stopped
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U app"]
      interval: 10s
      timeout: 5s
      retries: 5
    environment:
      - POSTGRES_USER=app
      - POSTGRES_PASSWORD=${DB_PASSWORD}
      - POSTGRES_DB=app
    volumes:
      - postgres_data:/var/lib/postgresql/data
    deploy:
      resources:
        limits:
          memory: 4G
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
    networks:
      - backend

  redis:
    image: redis:7-alpine
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 5s
      retries: 3
    volumes:
      - redis_data:/data
    command: redis-server --appendonly yes
    deploy:
      resources:
        limits:
          memory: 512M
    networks:
      - backend

volumes:
  postgres_data:
  redis_data:

networks:
  frontend:
  backend:
    internal: true

Quick Reference

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
# Start in background
docker compose up -d

# View logs
docker compose logs -f app

# Restart single service
docker compose restart app

# Update and restart
docker compose pull && docker compose up -d

# Scale service
docker compose up -d --scale app=3

# Execute in container
docker compose exec app sh

# View resource usage
docker compose stats

Docker Compose can absolutely run production workloads. The gap between tutorial and production is health checks, resource limits, proper logging, and deployment strategy. Add these incrementally—start with health checks and logging, then add resource limits and deployment patterns as you scale.