Kill -9 is violence. Your application deserves a dignified death.
Graceful shutdown means finishing in-flight work before terminating. Without it, deployments cause dropped requests, broken connections, and data corruption. With it, users never notice you restarted.
The Problem#
When a process receives SIGTERM:
- Kubernetes/Docker sends the signal
- Your app has a grace period (default 30s)
- After the grace period, SIGKILL terminates forcefully
If your app doesn’t handle SIGTERM, in-flight requests get dropped. Database transactions abort. WebSocket connections die mid-message.
The Pattern#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
| const server = require('http').createServer(app);
let isShuttingDown = false;
// Handle shutdown signals
process.on('SIGTERM', gracefulShutdown);
process.on('SIGINT', gracefulShutdown);
async function gracefulShutdown(signal) {
console.log(`Received ${signal}, starting graceful shutdown`);
isShuttingDown = true;
// 1. Stop accepting new connections
server.close(async () => {
console.log('HTTP server closed');
// 2. Close database connections
await db.end();
console.log('Database connections closed');
// 3. Close other resources
await redis.quit();
await messageQueue.close();
console.log('Graceful shutdown complete');
process.exit(0);
});
// Force exit after timeout
setTimeout(() => {
console.error('Forced shutdown after timeout');
process.exit(1);
}, 25000); // Leave buffer before SIGKILL
}
// Reject new requests during shutdown
app.use((req, res, next) => {
if (isShuttingDown) {
res.set('Connection', 'close');
return res.status(503).json({ error: 'Server shutting down' });
}
next();
});
|
Connection Draining#
The key insight: stop accepting new work, but finish existing work.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
| class ConnectionDrainer {
constructor() {
this.activeConnections = new Set();
}
track(connection) {
this.activeConnections.add(connection);
connection.on('close', () => {
this.activeConnections.delete(connection);
});
}
async drain(timeoutMs = 10000) {
console.log(`Draining ${this.activeConnections.size} connections`);
// Tell clients to close after current request
for (const conn of this.activeConnections) {
conn.setHeader?.('Connection', 'close');
}
// Wait for connections to close naturally
const start = Date.now();
while (this.activeConnections.size > 0) {
if (Date.now() - start > timeoutMs) {
console.log(`Force closing ${this.activeConnections.size} connections`);
for (const conn of this.activeConnections) {
conn.destroy();
}
break;
}
await sleep(100);
}
}
}
|
Health Check Integration#
Load balancers need to know when to stop sending traffic:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
| let isReady = false;
let isHealthy = true;
// Readiness: can accept new traffic?
app.get('/ready', (req, res) => {
if (isReady && !isShuttingDown) {
res.status(200).json({ ready: true });
} else {
res.status(503).json({ ready: false });
}
});
// Liveness: is the process healthy?
app.get('/health', (req, res) => {
if (isHealthy) {
res.status(200).json({ healthy: true });
} else {
res.status(503).json({ healthy: false });
}
});
// During startup
async function startup() {
await db.connect();
await cache.connect();
isReady = true; // Now accept traffic
}
// During shutdown
async function gracefulShutdown() {
isReady = false; // Stop accepting new traffic
// ... drain connections ...
}
|
Kubernetes Configuration#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| apiVersion: v1
kind: Pod
spec:
terminationGracePeriodSeconds: 60
containers:
- name: app
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "sleep 5"]
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 10
periodSeconds: 10
|
The preStop sleep is crucial: it gives the load balancer time to remove the pod from rotation before the app starts shutting down.
Timeline:
- Pod receives SIGTERM
- preStop hook runs (sleep 5)
- Kubernetes removes pod from Service endpoints
- Load balancer stops sending traffic
- App handles SIGTERM, drains connections
- Clean exit
Without the sleep, traffic can arrive after shutdown begins.
Database Transactions#
In-flight transactions need special handling:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
| class TransactionManager {
constructor() {
this.activeTransactions = new Set();
}
async runInTransaction(fn) {
if (isShuttingDown) {
throw new Error('Cannot start transaction during shutdown');
}
const tx = await db.beginTransaction();
this.activeTransactions.add(tx);
try {
const result = await fn(tx);
await tx.commit();
return result;
} catch (error) {
await tx.rollback();
throw error;
} finally {
this.activeTransactions.delete(tx);
}
}
async waitForTransactions(timeoutMs = 10000) {
const start = Date.now();
while (this.activeTransactions.size > 0) {
if (Date.now() - start > timeoutMs) {
console.warn(`${this.activeTransactions.size} transactions still active`);
break;
}
await sleep(100);
}
}
}
|
Worker Processes#
Background workers need graceful shutdown too:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
| class Worker {
constructor(queue) {
this.queue = queue;
this.currentJob = null;
this.shouldStop = false;
}
async start() {
while (!this.shouldStop) {
this.currentJob = await this.queue.getNextJob();
if (this.currentJob) {
await this.processJob(this.currentJob);
this.currentJob = null;
}
}
}
async stop() {
this.shouldStop = true;
// Wait for current job to finish
while (this.currentJob) {
await sleep(100);
}
// Return unprocessed jobs to queue
await this.queue.close();
}
}
|
WebSocket Handling#
WebSockets need explicit close messages:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
| const activeWebSockets = new Set();
wss.on('connection', (ws) => {
activeWebSockets.add(ws);
ws.on('close', () => activeWebSockets.delete(ws));
});
async function shutdownWebSockets() {
for (const ws of activeWebSockets) {
// Send close frame with reason
ws.close(1001, 'Server shutting down');
}
// Wait for clients to acknowledge
await sleep(1000);
// Force close any remaining
for (const ws of activeWebSockets) {
ws.terminate();
}
}
|
Common Mistakes#
No SIGTERM handler: Process exits immediately, dropping everything.
Closing server before draining: New connections rejected, but existing ones not tracked.
Infinite grace period: Eventually SIGKILL comes. Plan for it.
Not marking unready first: Load balancer keeps sending traffic during shutdown.
Ignoring background jobs: Worker picks up job, gets killed mid-processing.
Database connections orphaned: Connection pool exhausted on restart.
The Complete Pattern#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
| const shutdown = {
isShuttingDown: false,
async graceful(signal) {
if (this.isShuttingDown) return;
this.isShuttingDown = true;
console.log(`${signal} received, graceful shutdown starting`);
// Phase 1: Stop accepting new work
server.close();
// Phase 2: Drain in-flight requests (with timeout)
await Promise.race([
connectionDrainer.drain(),
sleep(15000),
]);
// Phase 3: Wait for background jobs
await Promise.race([
worker.stop(),
sleep(10000),
]);
// Phase 4: Close external connections
await Promise.all([
db.end(),
redis.quit(),
messageQueue.close(),
]);
console.log('Graceful shutdown complete');
process.exit(0);
},
};
process.on('SIGTERM', () => shutdown.graceful('SIGTERM'));
process.on('SIGINT', () => shutdown.graceful('SIGINT'));
|
The Mental Model#
Think of graceful shutdown like closing a restaurant:
- Stop seating new guests (stop accepting connections)
- Let current diners finish (drain in-flight requests)
- Close the kitchen (stop background jobs)
- Clean up (close database connections)
- Lock the door (exit process)
You don’t throw people out mid-meal. You let them finish, then close.
Your users deserve the same courtesy. Handle SIGTERM with grace.