Docker Compose is great for local development. Getting it production-ready requires a different mindset. Here’s what changes and why.
The Development vs Production Gap# Your dev docker-compose.yml probably looks like this:
1
2
3
4
5
6
7
8
9
10
version : '3.8'
services :
app :
build : .
ports :
- "3000:3000"
volumes :
- .:/app
environment :
- DEBUG=true
This works locally but fails in production:
Source code mounted as volume (no isolation) Debug mode enabled No resource limits No health checks No restart policies Building on deploy (slow, inconsistent) Production-Ready Structure# Split your configuration:
├ ├ ├ └ ─ ─ ─ ─ ─ ─ ─ ─ d d d . o o o e c c c n k k k v e e e . r r r p - - - r c c c o o o o d m m m u p p p c o o o t s s s i e e e o . . . n y o p m v r l e o r d r . i y d m e l . y m l # # # # B D P P a e r r s v o o e d d u u c v c c o e t t n r i i f r o o i i n n g d u e e r s v n a e v t ( r i i a r r o u i o n t d n o e m - s e l n o t a d e d )
Base Configuration# 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# docker-compose.yml
version : '3.8'
services :
app :
image : ${APP_IMAGE:-myapp:latest}
environment :
- NODE_ENV=${NODE_ENV:-development}
- DATABASE_URL=${DATABASE_URL}
depends_on :
db :
condition : service_healthy
networks :
- internal
db :
image : postgres:15-alpine
environment :
- POSTGRES_DB=${POSTGRES_DB}
- POSTGRES_USER=${POSTGRES_USER}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
volumes :
- postgres_data:/var/lib/postgresql/data
healthcheck :
test : [ "CMD-SHELL" , "pg_isready -U ${POSTGRES_USER}" ]
interval : 10s
timeout : 5s
retries : 5
networks :
- internal
networks :
internal :
volumes :
postgres_data :
Development Override# 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# docker-compose.override.yml (auto-loaded in dev)
version : '3.8'
services :
app :
build :
context : .
target : development
volumes :
- .:/app
- /app/node_modules
ports :
- "3000:3000"
- "9229:9229" # Debug port
environment :
- DEBUG=true
command : npm run dev
db :
ports :
- "5432:5432" # Expose for local tools
Production Override# 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# docker-compose.prod.yml
version : '3.8'
services :
app :
image : ${APP_IMAGE}
deploy :
replicas : 2
resources :
limits :
cpus : '1'
memory : 512M
reservations :
cpus : '0.25'
memory : 256M
restart_policy :
condition : on -failure
delay : 5s
max_attempts : 3
healthcheck :
test : [ "CMD" , "curl" , "-f" , "http://localhost:3000/health" ]
interval : 30s
timeout : 10s
retries : 3
start_period : 40s
logging :
driver : json-file
options :
max-size : "10m"
max-file : "3"
db :
deploy :
resources :
limits :
cpus : '2'
memory : 1G
Running in Production# 1
2
3
4
5
6
# Deploy with production config
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
# Or set COMPOSE_FILE
export COMPOSE_FILE = docker-compose.yml:docker-compose.prod.yml
docker compose up -d
Health Checks That Work# Health checks need to verify the service actually works, not just that it’s running:
1
2
3
4
5
6
7
8
services :
api :
healthcheck :
test : [ "CMD" , "curl" , "-f" , "http://localhost:8080/health" ]
interval : 30s
timeout : 10s
retries : 3
start_period : 60s # Give app time to start
For apps without curl:
1
2
healthcheck :
test : [ "CMD-SHELL" , "wget -q --spider http://localhost:8080/health || exit 1" ]
Or use a dedicated health check script:
1
2
3
4
5
6
7
8
9
10
11
#!/bin/sh
# healthcheck.sh
set -e
# Check the app responds
curl -sf http://localhost:8080/health > /dev/null
# Check database connectivity
pg_isready -h db -U $POSTGRES_USER > /dev/null
exit 0
1
2
healthcheck :
test : [ "CMD" , "/app/healthcheck.sh" ]
Resource Limits# Always set limits in production. Without them, one container can starve others:
1
2
3
4
5
6
7
8
9
10
services :
app :
deploy :
resources :
limits :
cpus : '1.0' # Max 1 CPU
memory : 512M # Max 512MB RAM
reservations :
cpus : '0.25' # Guaranteed 0.25 CPU
memory : 128M # Guaranteed 128MB RAM
For non-Swarm deployments, use the older syntax:
1
2
3
4
services :
app :
mem_limit : 512m
cpus : 1.0
Secrets Management# Never put secrets in compose files or images:
1
2
3
4
5
6
7
8
9
10
11
12
13
services :
app :
environment :
- DATABASE_URL # Value from host environment
secrets :
- db_password
- api_key
secrets :
db_password :
file : ./secrets/db_password.txt
api_key :
external : true # Created via `docker secret create`
In the app, secrets are available at /run/secrets/<secret_name>.
For simpler setups, use .env files:
1
2
3
# .env.production (not in git!)
DATABASE_URL = postgres://user:pass@db:5432/myapp
API_KEY = sk-xxxx
1
docker compose --env-file .env.production up -d
Logging Configuration# Default logging fills disks. Configure rotation:
1
2
3
4
5
6
7
8
9
services :
app :
logging :
driver : json-file
options :
max-size : "10m"
max-file : "5"
labels : "service"
tag : "{{.Name}}/{{.ID}}"
Or send to external logging:
1
2
3
4
5
logging :
driver : syslog
options :
syslog-address : "tcp://logs.example.com:514"
tag : "myapp/{{.Name}}"
Networking Best Practices# Internal Networks# Services that don’t need external access shouldn’t have it:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
services :
app :
networks :
- frontend
- backend
db :
networks :
- backend # Only app can reach db
nginx :
networks :
- frontend
ports :
- "80:80" # Only nginx exposed
networks :
frontend :
backend :
internal : true # No external access
Custom DNS# 1
2
3
4
5
6
7
services :
app :
dns :
- 8.8.8.8
- 8.8.4.4
dns_search :
- example.com
Deployment Strategies# Zero-Downtime Updates# 1
2
3
4
5
6
7
8
9
10
11
12
13
14
#!/bin/bash
# deploy.sh
# Pull new image
docker compose pull app
# Scale up new instances
docker compose up -d --no-deps --scale app = 4 app
# Wait for health checks
sleep 30
# Scale back down
docker compose up -d --no-deps --scale app = 2 app
Blue-Green with Compose# 1
2
3
4
5
6
7
8
9
10
11
# docker-compose.blue.yml
services :
app :
image : myapp:v1
container_name : app-blue
# docker-compose.green.yml
services :
app :
image : myapp:v2
container_name : app-green
Switch nginx upstream between blue and green.
Backup and Recovery# Database Backups# 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
services :
db-backup :
image : postgres:15-alpine
volumes :
- ./backups:/backups
environment :
- PGHOST=db
- PGUSER=${POSTGRES_USER}
- PGPASSWORD=${POSTGRES_PASSWORD}
command : >
sh -c 'while true; do
pg_dump -Fc myapp > /backups/myapp-$$(date +%Y%m%d-%H%M%S).dump
find /backups -mtime +7 -delete
sleep 86400
done'
depends_on :
- db
Volume Backups# 1
2
3
4
5
6
7
8
9
10
11
# Backup a volume
docker run --rm \
-v myapp_data:/data:ro \
-v $( pwd ) /backups:/backup \
alpine tar czf /backup/data-$( date +%Y%m%d) .tar.gz -C /data .
# Restore
docker run --rm \
-v myapp_data:/data \
-v $( pwd ) /backups:/backup \
alpine tar xzf /backup/data-20260313.tar.gz -C /data
Monitoring# Basic Stats# 1
2
3
4
5
6
# Watch resource usage
docker stats
# Compose-specific
docker compose top
docker compose logs -f --tail= 100
Prometheus Integration# 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
services :
app :
labels :
- "prometheus.scrape=true"
- "prometheus.port=9090"
- "prometheus.path=/metrics"
prometheus :
image : prom/prometheus
volumes :
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command :
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.retention.time=15d'
Common Mistakes# Building on Deploy# 1
2
3
4
5
6
7
8
9
# Bad: builds every deploy
services :
app :
build : .
# Good: use pre-built images
services :
app :
image : registry.example.com/myapp:${VERSION}
No Dependency Conditions# 1
2
3
4
5
6
7
8
# Bad: starts db before it's ready
depends_on :
- db
# Good: waits for health
depends_on :
db :
condition : service_healthy
Hardcoded Configurations# 1
2
3
4
5
6
7
# Bad
environment :
- DATABASE_URL=postgres://user:password@db:5432/app
# Good
environment :
- DATABASE_URL=${DATABASE_URL}
When to Move Beyond Compose# Compose works well for:
Single-host deployments Small teams Simple scaling needs Development environments Consider Kubernetes/Swarm when you need:
Multi-host orchestration Advanced scheduling Rolling updates with traffic shifting Service mesh features But don’t move too early. Many production workloads run fine on well-configured Compose for years. 🌍