Every public API needs rate limiting. Without it, one misbehaving client can take down your entire service—whether through malice, bugs, or just enthusiasm.

Rate limiting protects your infrastructure, ensures fair usage, and creates predictable behavior for all clients.

The Core Algorithms

Fixed Window

Count requests in fixed time intervals (e.g., per minute):

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
class FixedWindowLimiter {
  constructor(redis, limit, windowSeconds) {
    this.redis = redis;
    this.limit = limit;
    this.windowSeconds = windowSeconds;
  }

  async isAllowed(clientId) {
    const window = Math.floor(Date.now() / 1000 / this.windowSeconds);
    const key = `ratelimit:${clientId}:${window}`;
    
    const count = await this.redis.incr(key);
    if (count === 1) {
      await this.redis.expire(key, this.windowSeconds);
    }
    
    return count <= this.limit;
  }
}

Pros: Simple, memory-efficient. Cons: Burst at window boundaries. Client could hit 100 requests at 0:59 and 100 more at 1:00.

Sliding Window Log

Track timestamps of each request, count within the sliding window:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class SlidingWindowLogLimiter {
  constructor(redis, limit, windowSeconds) {
    this.redis = redis;
    this.limit = limit;
    this.windowMs = windowSeconds * 1000;
  }

  async isAllowed(clientId) {
    const now = Date.now();
    const windowStart = now - this.windowMs;
    const key = `ratelimit:${clientId}`;
    
    await this.redis.zremrangebyscore(key, 0, windowStart);
    const count = await this.redis.zcard(key);
    
    if (count < this.limit) {
      await this.redis.zadd(key, now, `${now}-${Math.random()}`);
      await this.redis.expire(key, this.windowMs / 1000);
      return true;
    }
    
    return false;
  }
}

Pros: Accurate, no boundary bursts. Cons: Memory-intensive for high-volume clients.

Sliding Window Counter

Hybrid approach—weighted average of current and previous windows:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
class SlidingWindowCounterLimiter {
  constructor(redis, limit, windowSeconds) {
    this.redis = redis;
    this.limit = limit;
    this.windowSeconds = windowSeconds;
  }

  async isAllowed(clientId) {
    const now = Date.now() / 1000;
    const currentWindow = Math.floor(now / this.windowSeconds);
    const previousWindow = currentWindow - 1;
    const windowProgress = (now % this.windowSeconds) / this.windowSeconds;
    
    const [current, previous] = await Promise.all([
      this.redis.get(`ratelimit:${clientId}:${currentWindow}`),
      this.redis.get(`ratelimit:${clientId}:${previousWindow}`),
    ]);
    
    const weightedCount = 
      (parseInt(previous) || 0) * (1 - windowProgress) +
      (parseInt(current) || 0);
    
    if (weightedCount < this.limit) {
      const key = `ratelimit:${clientId}:${currentWindow}`;
      await this.redis.incr(key);
      await this.redis.expire(key, this.windowSeconds * 2);
      return true;
    }
    
    return false;
  }
}

Pros: Memory-efficient, smooth limiting. Cons: Slightly more complex.

Token Bucket

Tokens regenerate over time; each request consumes a token:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class TokenBucketLimiter {
  constructor(redis, capacity, refillRate) {
    this.redis = redis;
    this.capacity = capacity;      // Max tokens
    this.refillRate = refillRate;  // Tokens per second
  }

  async isAllowed(clientId, cost = 1) {
    const key = `bucket:${clientId}`;
    const now = Date.now() / 1000;
    
    const data = await this.redis.hgetall(key);
    let tokens = parseFloat(data.tokens) || this.capacity;
    let lastRefill = parseFloat(data.lastRefill) || now;
    
    // Refill tokens based on time elapsed
    const elapsed = now - lastRefill;
    tokens = Math.min(this.capacity, tokens + elapsed * this.refillRate);
    
    if (tokens >= cost) {
      tokens -= cost;
      await this.redis.hset(key, 'tokens', tokens, 'lastRefill', now);
      await this.redis.expire(key, this.capacity / this.refillRate * 2);
      return true;
    }
    
    return false;
  }
}

Pros: Allows controlled bursts, smooth rate over time. Cons: More state to track.

Implementation Patterns

Middleware Pattern

Apply rate limiting at the edge:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
function rateLimitMiddleware(limiter) {
  return async (req, res, next) => {
    const clientId = req.ip || req.headers['x-api-key'];
    
    const result = await limiter.check(clientId);
    
    // Always include rate limit headers
    res.set({
      'X-RateLimit-Limit': result.limit,
      'X-RateLimit-Remaining': result.remaining,
      'X-RateLimit-Reset': result.resetAt,
    });
    
    if (!result.allowed) {
      res.set('Retry-After', result.retryAfter);
      return res.status(429).json({
        error: 'Too Many Requests',
        retryAfter: result.retryAfter,
      });
    }
    
    next();
  };
}

Tiered Limits

Different limits for different client types:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
const TIERS = {
  free: { requestsPerMinute: 60, requestsPerDay: 1000 },
  pro: { requestsPerMinute: 600, requestsPerDay: 50000 },
  enterprise: { requestsPerMinute: 6000, requestsPerDay: null },
};

async function getTierLimits(apiKey) {
  const client = await db.getClientByApiKey(apiKey);
  return TIERS[client.tier] || TIERS.free;
}

Endpoint-Specific Limits

Some endpoints are more expensive:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
const ENDPOINT_COSTS = {
  'GET /users': 1,
  'POST /search': 5,      // More expensive
  'POST /ai/generate': 50, // Much more expensive
};

function getEndpointCost(req) {
  const key = `${req.method} ${req.route.path}`;
  return ENDPOINT_COSTS[key] || 1;
}

Distributed Rate Limiting

For multi-region deployments, use centralized state or approximate locally:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
// Option 1: Central Redis (adds latency)
const centralLimiter = new Limiter(centralRedis);

// Option 2: Local limits with sync (approximate)
class DistributedLimiter {
  constructor(localRedis, syncInterval) {
    this.local = new Limiter(localRedis);
    this.buffer = [];
    
    setInterval(() => this.syncToGlobal(), syncInterval);
  }
  
  async isAllowed(clientId) {
    // Check local first (fast)
    const localAllowed = await this.local.isAllowed(clientId);
    if (!localAllowed) return false;
    
    // Buffer for async global sync
    this.buffer.push({ clientId, timestamp: Date.now() });
    return true;
  }
}

Response Headers

Always communicate limits clearly:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
HTTP/1.1 200 OK
X-RateLimit-Limit: 100
X-RateLimit-Remaining: 87
X-RateLimit-Reset: 1708084800

HTTP/1.1 429 Too Many Requests
X-RateLimit-Limit: 100
X-RateLimit-Remaining: 0
X-RateLimit-Reset: 1708084800
Retry-After: 47

Standard headers help clients implement backoff correctly.

Graceful Degradation

Rate limiting shouldn’t be binary. Consider:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
async function handleRequest(req, res) {
  const usage = await limiter.getUsage(req.clientId);
  
  if (usage.percentUsed > 100) {
    return res.status(429).json({ error: 'Rate limit exceeded' });
  }
  
  if (usage.percentUsed > 80) {
    // Warn the client
    res.set('X-RateLimit-Warning', 'Approaching limit');
  }
  
  if (usage.percentUsed > 90) {
    // Deprioritize but don't reject
    await delay(100 * (usage.percentUsed - 90));
  }
  
  // Process normally
  return handleNormally(req, res);
}

Common Mistakes

No rate limiting on auth endpoints: Login and password reset are prime abuse targets.

Limiting by IP only: Shared IPs (corporate NAT, VPNs) punish legitimate users.

No headers: Clients can’t implement proper backoff without feedback.

Hard cutoff at limit: Consider graceful degradation before hard rejection.

Same limits everywhere: Expensive operations should cost more.

No bypass for health checks: Your own monitoring shouldn’t get rate limited.

The Mental Model

Think of rate limiting like a bouncer at a club:

  • There’s a capacity (limit)
  • The bouncer counts people (requests)
  • VIPs get priority (tiered limits)
  • People leave over time (window resets)
  • The bouncer tells you when to come back (Retry-After)

Your API is the club. Rate limiting ensures everyone has a good time—not just the first person through the door.