Docker Compose is great for local development. Getting it production-ready requires a different mindset. Here’s what changes and why.

The Development vs Production Gap

Your dev docker-compose.yml probably looks like this:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
version: '3.8'
services:
  app:
    build: .
    ports:
      - "3000:3000"
    volumes:
      - .:/app
    environment:
      - DEBUG=true

This works locally but fails in production:

  • Source code mounted as volume (no isolation)
  • Debug mode enabled
  • No resource limits
  • No health checks
  • No restart policies
  • Building on deploy (slow, inconsistent)

Production-Ready Structure

Split your configuration:

ddd.oooecccnkkkveee.rrrp---rcccoooodmmmupppcoootsssieeeo...nyopmvrleordr.iydmel.yml####BDPPaerrsvooedduucvccoettnriifrooiinngdueersvnaevt(riiarrouiontdnoem-selnotaded)

Base Configuration

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# docker-compose.yml
version: '3.8'

services:
  app:
    image: ${APP_IMAGE:-myapp:latest}
    environment:
      - NODE_ENV=${NODE_ENV:-development}
      - DATABASE_URL=${DATABASE_URL}
    depends_on:
      db:
        condition: service_healthy
    networks:
      - internal

  db:
    image: postgres:15-alpine
    environment:
      - POSTGRES_DB=${POSTGRES_DB}
      - POSTGRES_USER=${POSTGRES_USER}
      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
    volumes:
      - postgres_data:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - internal

networks:
  internal:

volumes:
  postgres_data:

Development Override

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
# docker-compose.override.yml (auto-loaded in dev)
version: '3.8'

services:
  app:
    build:
      context: .
      target: development
    volumes:
      - .:/app
      - /app/node_modules
    ports:
      - "3000:3000"
      - "9229:9229"  # Debug port
    environment:
      - DEBUG=true
    command: npm run dev

  db:
    ports:
      - "5432:5432"  # Expose for local tools

Production Override

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# docker-compose.prod.yml
version: '3.8'

services:
  app:
    image: ${APP_IMAGE}
    deploy:
      replicas: 2
      resources:
        limits:
          cpus: '1'
          memory: 512M
        reservations:
          cpus: '0.25'
          memory: 256M
      restart_policy:
        condition: on-failure
        delay: 5s
        max_attempts: 3
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:3000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    logging:
      driver: json-file
      options:
        max-size: "10m"
        max-file: "3"

  db:
    deploy:
      resources:
        limits:
          cpus: '2'
          memory: 1G

Running in Production

1
2
3
4
5
6
# Deploy with production config
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d

# Or set COMPOSE_FILE
export COMPOSE_FILE=docker-compose.yml:docker-compose.prod.yml
docker compose up -d

Health Checks That Work

Health checks need to verify the service actually works, not just that it’s running:

1
2
3
4
5
6
7
8
services:
  api:
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s  # Give app time to start

For apps without curl:

1
2
healthcheck:
  test: ["CMD-SHELL", "wget -q --spider http://localhost:8080/health || exit 1"]

Or use a dedicated health check script:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
#!/bin/sh
# healthcheck.sh
set -e

# Check the app responds
curl -sf http://localhost:8080/health > /dev/null

# Check database connectivity
pg_isready -h db -U $POSTGRES_USER > /dev/null

exit 0
1
2
healthcheck:
  test: ["CMD", "/app/healthcheck.sh"]

Resource Limits

Always set limits in production. Without them, one container can starve others:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
services:
  app:
    deploy:
      resources:
        limits:
          cpus: '1.0'      # Max 1 CPU
          memory: 512M     # Max 512MB RAM
        reservations:
          cpus: '0.25'     # Guaranteed 0.25 CPU
          memory: 128M     # Guaranteed 128MB RAM

For non-Swarm deployments, use the older syntax:

1
2
3
4
services:
  app:
    mem_limit: 512m
    cpus: 1.0

Secrets Management

Never put secrets in compose files or images:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
services:
  app:
    environment:
      - DATABASE_URL  # Value from host environment
    secrets:
      - db_password
      - api_key

secrets:
  db_password:
    file: ./secrets/db_password.txt
  api_key:
    external: true  # Created via `docker secret create`

In the app, secrets are available at /run/secrets/<secret_name>.

For simpler setups, use .env files:

1
2
3
# .env.production (not in git!)
DATABASE_URL=postgres://user:pass@db:5432/myapp
API_KEY=sk-xxxx
1
docker compose --env-file .env.production up -d

Logging Configuration

Default logging fills disks. Configure rotation:

1
2
3
4
5
6
7
8
9
services:
  app:
    logging:
      driver: json-file
      options:
        max-size: "10m"
        max-file: "5"
        labels: "service"
        tag: "{{.Name}}/{{.ID}}"

Or send to external logging:

1
2
3
4
5
logging:
  driver: syslog
  options:
    syslog-address: "tcp://logs.example.com:514"
    tag: "myapp/{{.Name}}"

Networking Best Practices

Internal Networks

Services that don’t need external access shouldn’t have it:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
services:
  app:
    networks:
      - frontend
      - backend

  db:
    networks:
      - backend  # Only app can reach db

  nginx:
    networks:
      - frontend
    ports:
      - "80:80"  # Only nginx exposed

networks:
  frontend:
  backend:
    internal: true  # No external access

Custom DNS

1
2
3
4
5
6
7
services:
  app:
    dns:
      - 8.8.8.8
      - 8.8.4.4
    dns_search:
      - example.com

Deployment Strategies

Zero-Downtime Updates

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
#!/bin/bash
# deploy.sh

# Pull new image
docker compose pull app

# Scale up new instances
docker compose up -d --no-deps --scale app=4 app

# Wait for health checks
sleep 30

# Scale back down
docker compose up -d --no-deps --scale app=2 app

Blue-Green with Compose

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# docker-compose.blue.yml
services:
  app:
    image: myapp:v1
    container_name: app-blue
    
# docker-compose.green.yml
services:
  app:
    image: myapp:v2
    container_name: app-green

Switch nginx upstream between blue and green.

Backup and Recovery

Database Backups

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
services:
  db-backup:
    image: postgres:15-alpine
    volumes:
      - ./backups:/backups
    environment:
      - PGHOST=db
      - PGUSER=${POSTGRES_USER}
      - PGPASSWORD=${POSTGRES_PASSWORD}
    command: >
      sh -c 'while true; do
        pg_dump -Fc myapp > /backups/myapp-$$(date +%Y%m%d-%H%M%S).dump
        find /backups -mtime +7 -delete
        sleep 86400
      done'
    depends_on:
      - db

Volume Backups

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# Backup a volume
docker run --rm \
  -v myapp_data:/data:ro \
  -v $(pwd)/backups:/backup \
  alpine tar czf /backup/data-$(date +%Y%m%d).tar.gz -C /data .

# Restore
docker run --rm \
  -v myapp_data:/data \
  -v $(pwd)/backups:/backup \
  alpine tar xzf /backup/data-20260313.tar.gz -C /data

Monitoring

Basic Stats

1
2
3
4
5
6
# Watch resource usage
docker stats

# Compose-specific
docker compose top
docker compose logs -f --tail=100

Prometheus Integration

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
services:
  app:
    labels:
      - "prometheus.scrape=true"
      - "prometheus.port=9090"
      - "prometheus.path=/metrics"

  prometheus:
    image: prom/prometheus
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.retention.time=15d'

Common Mistakes

Building on Deploy

1
2
3
4
5
6
7
8
9
# Bad: builds every deploy
services:
  app:
    build: .

# Good: use pre-built images
services:
  app:
    image: registry.example.com/myapp:${VERSION}

No Dependency Conditions

1
2
3
4
5
6
7
8
# Bad: starts db before it's ready
depends_on:
  - db

# Good: waits for health
depends_on:
  db:
    condition: service_healthy

Hardcoded Configurations

1
2
3
4
5
6
7
# Bad
environment:
  - DATABASE_URL=postgres://user:password@db:5432/app

# Good
environment:
  - DATABASE_URL=${DATABASE_URL}

When to Move Beyond Compose

Compose works well for:

  • Single-host deployments
  • Small teams
  • Simple scaling needs
  • Development environments

Consider Kubernetes/Swarm when you need:

  • Multi-host orchestration
  • Advanced scheduling
  • Rolling updates with traffic shifting
  • Service mesh features

But don’t move too early. Many production workloads run fine on well-configured Compose for years. 🌍