Skip to content

Commit 4d8e807

Browse files
implement graceful shutdown at node renderer workers
1 parent d3c02bf commit 4d8e807

File tree

5 files changed

+97
-20
lines changed

5 files changed

+97
-20
lines changed

react_on_rails_pro/packages/node-renderer/src/master.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,15 @@ export = function masterRun(runningConfig?: Partial<Config>) {
4848
allWorkersRestartInterval,
4949
delayBetweenIndividualWorkerRestarts,
5050
);
51-
setInterval(() => {
52-
restartWorkers(delayBetweenIndividualWorkerRestarts);
53-
}, allWorkersRestartInterval * MILLISECONDS_IN_MINUTE);
51+
52+
const allWorkersRestartIntervalMS = allWorkersRestartInterval * MILLISECONDS_IN_MINUTE;
53+
const scheduleWorkersRestart = () => {
54+
void restartWorkers(delayBetweenIndividualWorkerRestarts).finally(() => {
55+
setTimeout(scheduleWorkersRestart, allWorkersRestartIntervalMS);
56+
});
57+
};
58+
59+
setTimeout(scheduleWorkersRestart, allWorkersRestartIntervalMS);
5460
} else if (allWorkersRestartInterval || delayBetweenIndividualWorkerRestarts) {
5561
log.error(
5662
"Misconfiguration, please provide both 'allWorkersRestartInterval' and " +

react_on_rails_pro/packages/node-renderer/src/master/restartWorkers.ts

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import cluster from 'cluster';
77
import log from '../shared/log';
8+
import { SHUTDOWN_WORKER_MESSAGE } from '../shared/utils';
89

910
const MILLISECONDS_IN_MINUTE = 60000;
1011

@@ -14,26 +15,42 @@ declare module 'cluster' {
1415
}
1516
}
1617

17-
export = function restartWorkers(delayBetweenIndividualWorkerRestarts: number) {
18+
export = async function restartWorkers(delayBetweenIndividualWorkerRestarts: number) {
1819
log.info('Started scheduled restart of workers');
1920

20-
let delay = 0;
2121
if (!cluster.workers) {
2222
throw new Error('No workers to restart');
2323
}
24-
Object.values(cluster.workers).forEach((worker) => {
25-
const killWorker = () => {
26-
if (!worker) return;
27-
log.debug('Kill worker #%d', worker.id);
28-
// eslint-disable-next-line no-param-reassign -- necessary change
29-
worker.isScheduledRestart = true;
30-
worker.destroy();
31-
};
32-
setTimeout(killWorker, delay);
33-
delay += delayBetweenIndividualWorkerRestarts * MILLISECONDS_IN_MINUTE;
34-
});
35-
36-
setTimeout(() => {
37-
log.info('Finished scheduled restart of workers');
38-
}, delay);
24+
for (const worker of Object.values(cluster.workers)) {
25+
if (!worker) return;
26+
log.debug('Kill worker #%d', worker.id);
27+
worker.isScheduledRestart = true;
28+
29+
worker.send(SHUTDOWN_WORKER_MESSAGE);
30+
31+
// It's inteded to restart worker in sequence, it shouldn't happens in parallel
32+
// eslint-disable-next-line no-await-in-loop
33+
await new Promise<void>((resolve) => {
34+
let timeout: NodeJS.Timeout;
35+
36+
const onExit = () => {
37+
clearTimeout(timeout);
38+
resolve();
39+
};
40+
worker.on('exit', onExit);
41+
42+
timeout = setTimeout(() => {
43+
log.debug('Worker #%d timed out, forcing kill it', worker.id);
44+
worker.destroy();
45+
worker.off('exit', onExit);
46+
resolve();
47+
}, 100_000);
48+
});
49+
// eslint-disable-next-line no-await-in-loop
50+
await new Promise((resolve) => {
51+
setTimeout(resolve, delayBetweenIndividualWorkerRestarts * MILLISECONDS_IN_MINUTE);
52+
});
53+
}
54+
55+
log.info('Finished scheduled restart of workers');
3956
};

react_on_rails_pro/packages/node-renderer/src/shared/utils.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import type { RenderResult } from '../worker/vm';
1111

1212
export const TRUNCATION_FILLER = '\n... TRUNCATED ...\n';
1313

14+
export const SHUTDOWN_WORKER_MESSAGE = 'NODE_RENDERER_SHUTDOWN_WORKER';
15+
1416
export function workerIdLabel() {
1517
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- worker is nullable in the primary process
1618
return cluster?.worker?.id || 'NO WORKER ID';

react_on_rails_pro/packages/node-renderer/src/worker.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import type { FastifyInstance, FastifyReply, FastifyRequest } from './worker/typ
1717
import checkProtocolVersion from './worker/checkProtocolVersionHandler';
1818
import authenticate from './worker/authHandler';
1919
import { handleRenderRequest, type ProvidedNewBundle } from './worker/handleRenderRequest';
20+
import handleGracefulShutdown from './worker/handleGracefulShutdown';
2021
import {
2122
errorResponseResult,
2223
formatExceptionMessage,
@@ -127,6 +128,8 @@ export default function run(config: Partial<Config>) {
127128
...fastifyServerOptions,
128129
});
129130

131+
handleGracefulShutdown(app);
132+
130133
// We shouldn't have unhandled errors here, but just in case
131134
app.addHook('onError', (req, res, err, done) => {
132135
// Not errorReporter.error so that integrations can decide how to log the errors.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import cluster from 'cluster';
2+
import { FastifyInstance } from './types';
3+
import { SHUTDOWN_WORKER_MESSAGE } from '../shared/utils';
4+
import log from '../shared/log';
5+
6+
const handleGracefulShutdown = (app: FastifyInstance) => {
7+
const { worker } = cluster;
8+
if (!worker) {
9+
log.error('handleGracefulShutdown is called on master, expected to call it on worker only');
10+
return;
11+
}
12+
13+
let activeRequestsCount = 0;
14+
let isShuttingDown = false;
15+
16+
process.on('message', (msg) => {
17+
if (msg === SHUTDOWN_WORKER_MESSAGE) {
18+
log.debug('Worker #%d received graceful shutdown message', worker.id);
19+
isShuttingDown = true;
20+
if (activeRequestsCount === 0) {
21+
log.debug('Worker #%d has no active requests, killing the worker', worker.id);
22+
worker.destroy();
23+
} else {
24+
log.debug(
25+
'Worker #%d has "%d" active requests, disconnecting the worker',
26+
worker.id,
27+
activeRequestsCount,
28+
);
29+
worker.disconnect();
30+
}
31+
}
32+
});
33+
34+
app.addHook('onRequest', (_req, _reply, done) => {
35+
activeRequestsCount += 1;
36+
done();
37+
});
38+
39+
app.addHook('onResponse', (_req, _reply, done) => {
40+
activeRequestsCount -= 1;
41+
if (isShuttingDown && activeRequestsCount === 0) {
42+
log.debug('Worker #%d served all active requests and going to be killed', worker.id);
43+
worker.destroy();
44+
}
45+
done();
46+
});
47+
};
48+
49+
export default handleGracefulShutdown;

0 commit comments

Comments
 (0)