One server is a single point of failure. Multiple servers need traffic distribution.

That’s where load balancers come in.

Why Load Balance?

  • High availability: If one server dies, others keep serving
  • Scalability: Add servers to handle more traffic
  • Performance: Distribute load evenly, reduce response times
  • Maintenance: Take servers offline without downtime

Load Balancing Algorithms

Round Robin

Requests cycle through servers in order.

RRRReeeeqqqquuuueeeesssstttt1234SSSSeeeerrrrvvvveeeerrrrABCA

Best for: Stateless applications, servers with equal capacity.

Weighted Round Robin

Some servers get more traffic than others.

1
2
3
4
5
upstream backend {
    server 192.168.1.10 weight=5;  # Gets 5x traffic
    server 192.168.1.11 weight=3;  # Gets 3x traffic
    server 192.168.1.12 weight=1;  # Gets 1x traffic
}

Best for: Mixed server sizes, gradual rollouts.

Least Connections

Send to the server handling fewest requests.

1
2
3
4
5
6
upstream backend {
    least_conn;
    server 192.168.1.10;
    server 192.168.1.11;
    server 192.168.1.12;
}

Best for: Long-lived connections, variable request durations.

IP Hash

Same client IP always goes to same server.

1
2
3
4
5
6
upstream backend {
    ip_hash;
    server 192.168.1.10;
    server 192.168.1.11;
    server 192.168.1.12;
}

Best for: Session affinity without cookies (not recommended for most cases).

Nginx Load Balancer

Basic HTTP Load Balancing

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
# /etc/nginx/nginx.conf
upstream app_servers {
    least_conn;
    server 10.0.1.10:8080;
    server 10.0.1.11:8080;
    server 10.0.1.12:8080;
}

server {
    listen 80;
    server_name example.com;

    location / {
        proxy_pass http://app_servers;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
    }
}

Health Checks

1
2
3
4
5
upstream app_servers {
    server 10.0.1.10:8080 max_fails=3 fail_timeout=30s;
    server 10.0.1.11:8080 max_fails=3 fail_timeout=30s;
    server 10.0.1.12:8080 backup;  # Only used when others fail
}

With nginx Plus or OpenResty, you get active health checks:

1
2
3
4
5
6
7
upstream app_servers {
    zone app_servers 64k;
    server 10.0.1.10:8080;
    server 10.0.1.11:8080;

    health_check interval=5s passes=2 fails=3 uri=/health;
}

WebSocket Support

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
upstream websocket_servers {
    server 10.0.1.10:8080;
    server 10.0.1.11:8080;
}

server {
    listen 80;

    location /ws {
        proxy_pass http://websocket_servers;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
        proxy_read_timeout 86400;
    }
}

AWS Application Load Balancer

Terraform Configuration

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# ALB
resource "aws_lb" "app" {
  name               = "app-lb"
  internal           = false
  load_balancer_type = "application"
  security_groups    = [aws_security_group.lb.id]
  subnets            = aws_subnet.public[*].id

  enable_deletion_protection = true

  tags = {
    Environment = "production"
  }
}

# Target Group
resource "aws_lb_target_group" "app" {
  name     = "app-tg"
  port     = 8080
  protocol = "HTTP"
  vpc_id   = aws_vpc.main.id

  health_check {
    enabled             = true
    healthy_threshold   = 2
    unhealthy_threshold = 3
    timeout             = 5
    interval            = 30
    path                = "/health"
    matcher             = "200"
  }

  stickiness {
    type            = "lb_cookie"
    cookie_duration = 3600
    enabled         = false  # Enable if needed
  }
}

# Listener
resource "aws_lb_listener" "https" {
  load_balancer_arn = aws_lb.app.arn
  port              = 443
  protocol          = "HTTPS"
  ssl_policy        = "ELBSecurityPolicy-TLS-1-2-2017-01"
  certificate_arn   = aws_acm_certificate.cert.arn

  default_action {
    type             = "forward"
    target_group_arn = aws_lb_target_group.app.arn
  }
}

# HTTP to HTTPS redirect
resource "aws_lb_listener" "http" {
  load_balancer_arn = aws_lb.app.arn
  port              = 80
  protocol          = "HTTP"

  default_action {
    type = "redirect"
    redirect {
      port        = "443"
      protocol    = "HTTPS"
      status_code = "HTTP_301"
    }
  }
}

Path-Based Routing

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
resource "aws_lb_listener_rule" "api" {
  listener_arn = aws_lb_listener.https.arn
  priority     = 100

  action {
    type             = "forward"
    target_group_arn = aws_lb_target_group.api.arn
  }

  condition {
    path_pattern {
      values = ["/api/*"]
    }
  }
}

resource "aws_lb_listener_rule" "static" {
  listener_arn = aws_lb_listener.https.arn
  priority     = 200

  action {
    type             = "forward"
    target_group_arn = aws_lb_target_group.static.arn
  }

  condition {
    path_pattern {
      values = ["/static/*", "/assets/*"]
    }
  }
}

HAProxy

Basic Configuration

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# /etc/haproxy/haproxy.cfg
global
    log /dev/log local0
    maxconn 4096
    user haproxy
    group haproxy
    daemon

defaults
    log     global
    mode    http
    option  httplog
    option  dontlognull
    timeout connect 5000ms
    timeout client  50000ms
    timeout server  50000ms

frontend http_front
    bind *:80
    bind *:443 ssl crt /etc/ssl/certs/combined.pem
    redirect scheme https code 301 if !{ ssl_fc }
    default_backend app_servers

backend app_servers
    balance roundrobin
    option httpchk GET /health
    http-check expect status 200
    
    server app1 10.0.1.10:8080 check inter 5s fall 3 rise 2
    server app2 10.0.1.11:8080 check inter 5s fall 3 rise 2
    server app3 10.0.1.12:8080 check inter 5s fall 3 rise 2 backup

listen stats
    bind *:8404
    stats enable
    stats uri /stats
    stats auth admin:password

Sticky Sessions

1
2
3
4
5
6
backend app_servers
    balance roundrobin
    cookie SERVERID insert indirect nocache
    
    server app1 10.0.1.10:8080 cookie s1 check
    server app2 10.0.1.11:8080 cookie s2 check

Health Check Endpoints

Your application needs a health endpoint:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Flask
from flask import Flask, jsonify

app = Flask(__name__)

@app.route('/health')
def health():
    # Check dependencies
    checks = {
        'database': check_database(),
        'redis': check_redis(),
        'disk': check_disk_space(),
    }
    
    all_healthy = all(checks.values())
    status_code = 200 if all_healthy else 503
    
    return jsonify({
        'status': 'healthy' if all_healthy else 'unhealthy',
        'checks': checks
    }), status_code

def check_database():
    try:
        db.execute('SELECT 1')
        return True
    except:
        return False

def check_redis():
    try:
        redis.ping()
        return True
    except:
        return False

def check_disk_space():
    import shutil
    total, used, free = shutil.disk_usage('/')
    return (free / total) > 0.1  # >10% free

Graceful Shutdown

Handle SIGTERM properly for zero-downtime deployments:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import signal
import sys
from flask import Flask

app = Flask(__name__)
shutting_down = False

def handle_sigterm(signum, frame):
    global shutting_down
    shutting_down = True
    print("Received SIGTERM, starting graceful shutdown...")
    # Give time for load balancer to stop sending traffic
    import time
    time.sleep(10)
    sys.exit(0)

signal.signal(signal.SIGTERM, handle_sigterm)

@app.route('/health')
def health():
    if shutting_down:
        return 'Shutting down', 503
    return 'OK', 200

Connection Draining

Allow in-flight requests to complete:

1
2
3
4
5
6
# AWS Target Group
resource "aws_lb_target_group" "app" {
  # ...
  
  deregistration_delay = 30  # Seconds to wait before removing target
}
1
2
3
4
# Nginx upstream
upstream app_servers {
    server 10.0.1.10:8080 slow_start=30s;
}

Common Patterns

Blue-Green Deployment

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# Switch traffic by changing target group
resource "aws_lb_listener" "https" {
  # ...
  
  default_action {
    type             = "forward"
    target_group_arn = var.active_color == "blue" ? 
                       aws_lb_target_group.blue.arn : 
                       aws_lb_target_group.green.arn
  }
}

Canary Releases

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
resource "aws_lb_listener_rule" "canary" {
  listener_arn = aws_lb_listener.https.arn
  priority     = 50

  action {
    type = "forward"
    forward {
      target_group {
        arn    = aws_lb_target_group.stable.arn
        weight = 90
      }
      target_group {
        arn    = aws_lb_target_group.canary.arn
        weight = 10
      }
    }
  }

  condition {
    path_pattern {
      values = ["/*"]
    }
  }
}

Monitoring

Key metrics to watch:

  • Request count — Traffic patterns
  • Latency (p50, p95, p99) — Response times
  • Error rate (5xx) — Backend health
  • Active connections — Load distribution
  • Healthy host count — Capacity
1
2
3
4
5
6
7
8
9
# CloudWatch metrics for ALB
aws cloudwatch get-metric-statistics \
    --namespace AWS/ApplicationELB \
    --metric-name RequestCount \
    --dimensions Name=LoadBalancer,Value=app/my-alb/1234567890 \
    --start-time 2026-02-11T00:00:00Z \
    --end-time 2026-02-11T03:00:00Z \
    --period 300 \
    --statistics Sum

The Checklist

  • Multiple backend servers
  • Health checks configured
  • Appropriate algorithm selected
  • SSL/TLS termination
  • Connection draining enabled
  • Monitoring and alerting
  • Graceful shutdown handling
  • Tested failover scenarios

A load balancer is only as good as the backends behind it. Keep them healthy.