Docker Compose tutorials show you docker compose up. Production requires health checks, resource limits, proper logging, restart policies, and deployment strategies. Here’s how to bridge that gap.
Base Configuration#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| # docker-compose.yml
version: "3.8"
services:
app:
image: myapp:${VERSION:-latest}
build:
context: .
dockerfile: Dockerfile
restart: unless-stopped
environment:
- NODE_ENV=production
env_file:
- .env
ports:
- "3000:3000"
|
This is a starting point. Let’s make it production-ready.
Health Checks#
Don’t assume a running container is healthy:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
| services:
app:
image: myapp:latest
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
postgres:
image: postgres:15
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 10s
timeout: 5s
retries: 5
redis:
image: redis:7-alpine
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 3
|
Depends On with Health#
1
2
3
4
5
6
7
| services:
app:
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
|
Now the app waits for dependencies to be actually ready, not just running.
Resource Limits#
Prevent a single container from consuming everything:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| services:
app:
deploy:
resources:
limits:
cpus: '2'
memory: 2G
reservations:
cpus: '0.5'
memory: 512M
postgres:
deploy:
resources:
limits:
cpus: '4'
memory: 4G
reservations:
cpus: '1'
memory: 1G
|
Note: Resource limits require docker compose (v2) or docker-compose with --compatibility flag.
Logging Configuration#
Default logging can fill your disk:
1
2
3
4
5
6
7
8
| services:
app:
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
compress: "true"
|
Centralized Logging#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
| services:
app:
logging:
driver: "fluentd"
options:
fluentd-address: "localhost:24224"
tag: "app.{{.Name}}"
# Or syslog
app-syslog:
logging:
driver: "syslog"
options:
syslog-address: "udp://logs.example.com:514"
tag: "myapp"
|
Restart Policies#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| services:
# Always restart (except manual stop)
app:
restart: unless-stopped
# Always restart, period
critical-service:
restart: always
# Restart on failure only
worker:
restart: on-failure
deploy:
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
window: 120s
|
Networking#
Internal Services#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
| services:
app:
networks:
- frontend
- backend
ports:
- "443:3000" # Exposed to host
postgres:
networks:
- backend
# No ports exposed - internal only
redis:
networks:
- backend
networks:
frontend:
driver: bridge
backend:
driver: bridge
internal: true # No external access
|
Static IPs (When Needed)#
1
2
3
4
5
6
7
8
9
10
11
12
| services:
app:
networks:
backend:
ipv4_address: 172.20.0.10
networks:
backend:
driver: bridge
ipam:
config:
- subnet: 172.20.0.0/16
|
Volumes and Data#
Named Volumes (Preferred)#
1
2
3
4
5
6
7
8
| services:
postgres:
volumes:
- postgres_data:/var/lib/postgresql/data
volumes:
postgres_data:
driver: local
|
Bind Mounts for Config#
1
2
3
4
5
6
| services:
nginx:
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
- ./certs:/etc/nginx/certs:ro
- static_files:/var/www/static:ro
|
Backup-Friendly Setup#
1
2
3
4
5
6
7
| volumes:
postgres_data:
driver: local
driver_opts:
type: none
o: bind
device: /data/postgres # Known location on host
|
Environment Management#
Multiple Environments#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| # docker-compose.yml (base)
services:
app:
image: myapp:latest
environment:
- LOG_LEVEL=info
# docker-compose.override.yml (dev - auto-loaded)
services:
app:
build: .
volumes:
- .:/app
environment:
- LOG_LEVEL=debug
# docker-compose.prod.yml
services:
app:
image: myapp:${VERSION}
deploy:
replicas: 3
|
1
2
3
4
5
| # Development (loads override automatically)
docker compose up
# Production
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
|
Secret Management#
1
2
3
4
5
6
7
8
9
10
11
12
13
| services:
app:
secrets:
- db_password
- api_key
environment:
- DB_PASSWORD_FILE=/run/secrets/db_password
secrets:
db_password:
file: ./secrets/db_password.txt
api_key:
external: true # Created outside compose
|
Zero-Downtime Deployments#
Rolling Updates#
1
2
3
4
5
6
7
8
9
10
11
12
| services:
app:
deploy:
replicas: 3
update_config:
parallelism: 1
delay: 10s
failure_action: rollback
order: start-first # Start new before stopping old
rollback_config:
parallelism: 1
delay: 10s
|
Blue-Green with Compose#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
| #!/bin/bash
# deploy.sh
CURRENT=$(docker compose ps -q app | head -1)
NEW_VERSION=$1
# Start new version alongside old
VERSION=$NEW_VERSION docker compose up -d --no-deps --scale app=2 app
# Wait for health check
sleep 30
# Remove old container
docker stop $CURRENT
docker rm $CURRENT
# Scale back to 1
docker compose up -d --no-deps --scale app=1 app
|
Monitoring Integration#
Prometheus Metrics#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
| services:
app:
labels:
- "prometheus.scrape=true"
- "prometheus.port=9090"
- "prometheus.path=/metrics"
prometheus:
image: prom/prometheus
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.retention.time=15d'
|
Container Metrics with cAdvisor#
1
2
3
4
5
6
7
8
9
10
| services:
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
ports:
- "8080:8080"
|
Complete Production Example#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
| version: "3.8"
services:
app:
image: myapp:${VERSION:-latest}
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
environment:
- NODE_ENV=production
- DATABASE_URL=postgres://app:${DB_PASSWORD}@postgres:5432/app
- REDIS_URL=redis://redis:6379
deploy:
resources:
limits:
cpus: '2'
memory: 2G
reservations:
memory: 512M
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
networks:
- frontend
- backend
postgres:
image: postgres:15-alpine
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "pg_isready -U app"]
interval: 10s
timeout: 5s
retries: 5
environment:
- POSTGRES_USER=app
- POSTGRES_PASSWORD=${DB_PASSWORD}
- POSTGRES_DB=app
volumes:
- postgres_data:/var/lib/postgresql/data
deploy:
resources:
limits:
memory: 4G
logging:
driver: "json-file"
options:
max-size: "10m"
max-file: "3"
networks:
- backend
redis:
image: redis:7-alpine
restart: unless-stopped
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 5s
retries: 3
volumes:
- redis_data:/data
command: redis-server --appendonly yes
deploy:
resources:
limits:
memory: 512M
networks:
- backend
volumes:
postgres_data:
redis_data:
networks:
frontend:
backend:
internal: true
|
Quick Reference#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
| # Start in background
docker compose up -d
# View logs
docker compose logs -f app
# Restart single service
docker compose restart app
# Update and restart
docker compose pull && docker compose up -d
# Scale service
docker compose up -d --scale app=3
# Execute in container
docker compose exec app sh
# View resource usage
docker compose stats
|
Docker Compose can absolutely run production workloads. The gap between tutorial and production is health checks, resource limits, proper logging, and deployment strategy. Add these incrementally—start with health checks and logging, then add resource limits and deployment patterns as you scale.