From 9bbd6ae1f7ab89f66c5d10c0b118797e6330692b Mon Sep 17 00:00:00 2001 From: Adrien Kunysz Date: Wed, 16 Apr 2025 10:42:17 +0200 Subject: [PATCH 1/4] Notify workers of quick shutdown after graceful shutdown times out. As per https://github.com/benoitc/gunicorn/issues/3385 this allows to distinguish between graceful and abnormal termination and gives a chance to workers to do something about it (e.g. to log a stack trace). For backward compatibility, we keep the old 2 signals model if quick_shutdown_timeout is set to 0. --- docs/source/signals.rst | 6 +++-- gunicorn/arbiter.py | 23 +++++++++++------- gunicorn/config.py | 27 ++++++++++++++++++++- gunicorn/workers/base.py | 10 ++++++-- gunicorn/workers/gthread.py | 3 +-- tests/test_arbiter.py | 48 +++++++++++++++++++++++++++++++++++-- 6 files changed, 99 insertions(+), 18 deletions(-) diff --git a/docs/source/signals.rst b/docs/source/signals.rst index c22ea0362..6bdbda99b 100644 --- a/docs/source/signals.rst +++ b/docs/source/signals.rst @@ -10,9 +10,11 @@ signals used internally by Gunicorn to communicate with the workers. Master process ============== -- ``QUIT``, ``INT``: Quick shutdown +- ``QUIT``, ``INT``: Quick shutdown. Waits for workers to finish their current + requests up to the :ref:`quick-shutdown-timeout`. - ``TERM``: Graceful shutdown. Waits for workers to finish their - current requests up to the :ref:`graceful-timeout`. + current requests up to the + :ref:`graceful-timeout` + :ref:`quick-shutdown-timeout`. - ``HUP``: Reload the configuration, start the new worker processes with a new configuration and gracefully shutdown older workers. If the application is not preloaded (using the :ref:`preload-app` option), Gunicorn will also load diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 646d684ef..15fecd07e 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -386,18 +386,23 @@ def stop(self, graceful=True): sock.close_sockets(self.LISTENERS, unlink) self.LISTENERS = [] - sig = signal.SIGTERM - if not graceful: - sig = signal.SIGQUIT - limit = time.time() + self.cfg.graceful_timeout - # instruct the workers to exit - self.kill_workers(sig) - # wait until the graceful timeout - while self.WORKERS and time.time() < limit: - time.sleep(0.1) + + if graceful: + deadline = time.time() + self.cfg.graceful_timeout + self.kill_workers(signal.SIGTERM) + self.sleep_until(deadline) + + if not graceful or self.cfg.quick_shutdown_timeout > 0: + deadline = time.time() + self.cfg.quick_shutdown_timeout + self.kill_workers(signal.SIGINT) + self.sleep_until(deadline) self.kill_workers(signal.SIGKILL) + def sleep_until(self, deadline): + while self.WORKERS and time.time() < deadline: + time.sleep(0.1) + def reexec(self): """\ Relaunch the master and workers. diff --git a/gunicorn/config.py b/gunicorn/config.py index 07c5aab34..b79aa1367 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -365,6 +365,13 @@ def validate_pos_int(val): return val +def validate_pos_float(val): + val = float(val) + if val < 0: + raise ValueError("Value must be positive: %s" % val) + return val + + def validate_ssl_version(val): if val != SSLVersion.default: sys.stderr.write("Warning: option `ssl_version` is deprecated and it is ignored. Use ssl_context instead.\n") @@ -811,7 +818,25 @@ class GracefulTimeout(Setting): After receiving a restart signal, workers have this much time to finish serving requests. Workers still alive after the timeout (starting from - the receipt of the restart signal) are force killed. + the receipt of the restart signal) are sent a quick shutdown signal (if + quick_shutdown_timeout is greater than zero) then are force killed. + """ + + +class QuickShutdownTimeout(Setting): + name = "quick_shutdown_timeout" + section = "Worker Processes" + cli = ["--quick-shutdown-timeout"] + meta = "INT" + validator = validate_pos_float + type = float + default = 0 + desc = """\ + Timeout for quick worker shutdown. + + After receiving a quick shutdown signal, workers have this much time to + finish serving requests. Workers still alive after the timeout (starting + from the receipt of the quick shutdown signal) are force killed. """ diff --git a/gunicorn/workers/base.py b/gunicorn/workers/base.py index 93c465c98..5a5ef64c1 100644 --- a/gunicorn/workers/base.py +++ b/gunicorn/workers/base.py @@ -191,12 +191,18 @@ def handle_usr1(self, sig, frame): def handle_exit(self, sig, frame): self.alive = False + def quick_exit(self): + timeout = self.cfg.quick_shutdown_timeout + if timeout <= 0: + timeout = 0.1 + time.sleep(timeout) + sys.exit(0) + def handle_quit(self, sig, frame): self.alive = False # worker_int callback self.cfg.worker_int(self) - time.sleep(0.1) - sys.exit(0) + self.quick_exit() def handle_abort(self, sig, frame): self.alive = False diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 7a23228cd..445cc1648 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -102,8 +102,7 @@ def handle_quit(self, sig, frame): # worker_int callback self.cfg.worker_int(self) self.tpool.shutdown(False) - time.sleep(0.1) - sys.exit(0) + self.quick_exit() def _wrap_future(self, fs, conn): fs.conn = conn diff --git a/tests/test_arbiter.py b/tests/test_arbiter.py index 8c1527e26..f30ad73c4 100644 --- a/tests/test_arbiter.py +++ b/tests/test_arbiter.py @@ -3,11 +3,12 @@ # See the NOTICE for more information. import os +import signal from unittest import mock import gunicorn.app.base import gunicorn.arbiter -from gunicorn.config import ReusePort +import gunicorn.config class DummyApplication(gunicorn.app.base.BaseApplication): @@ -63,12 +64,55 @@ def test_arbiter_stop_does_not_unlink_systemd_listeners(close_sockets): @mock.patch('gunicorn.sock.close_sockets') def test_arbiter_stop_does_not_unlink_when_using_reuse_port(close_sockets): arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) - arbiter.cfg.settings['reuse_port'] = ReusePort() + arbiter.cfg.settings['reuse_port'] = gunicorn.config.ReusePort() arbiter.cfg.settings['reuse_port'].set(True) arbiter.stop() close_sockets.assert_called_with([], False) +@mock.patch('os.kill') +@mock.patch('gunicorn.sock.close_sockets') +def test_arbiter_stop_graceful_no_sigquit(close_sockets, kill): + arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) + arbiter.cfg.settings['graceful_timeout'] = gunicorn.config.GracefulTimeout() + arbiter.cfg.settings['graceful_timeout'].set(1) + arbiter.WORKERS = {42: mock.Mock()} + arbiter.stop() + kill.assert_has_calls([ + mock.call(42, signal.SIGTERM), + mock.call(42, signal.SIGKILL), + ]) + + +@mock.patch('os.kill') +@mock.patch('gunicorn.sock.close_sockets') +def test_arbiter_stop_quick(close_sockets, kill): + arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) + arbiter.WORKERS = {42: mock.Mock()} + arbiter.stop(graceful=False) + kill.assert_has_calls([ + mock.call(42, signal.SIGINT), + mock.call(42, signal.SIGKILL), + ]) + + +@mock.patch('os.kill') +@mock.patch('gunicorn.sock.close_sockets') +def test_arbiter_stop_graceful_then_quick(close_sockets, kill): + arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) + arbiter.cfg.settings['graceful_timeout'] = gunicorn.config.GracefulTimeout() + arbiter.cfg.settings['graceful_timeout'].set(1) + arbiter.cfg.settings['quick_shutdown_timeout'] = gunicorn.config.QuickShutdownTimeout() + arbiter.cfg.settings['quick_shutdown_timeout'].set(0.1) + arbiter.WORKERS = {42: mock.Mock()} + arbiter.stop() + kill.assert_has_calls([ + mock.call(42, signal.SIGTERM), + mock.call(42, signal.SIGINT), + mock.call(42, signal.SIGKILL), + ]) + + @mock.patch('os.getpid') @mock.patch('os.fork') @mock.patch('os.execvpe') From 129e74e6709917860616c4e1060a73c6825734f0 Mon Sep 17 00:00:00 2001 From: Adrien Kunysz Date: Mon, 6 Oct 2025 09:20:13 +0200 Subject: [PATCH 2/4] Specify the unit of quick_shutdown_timeout. --- gunicorn/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/config.py b/gunicorn/config.py index b79aa1367..97e5626cf 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -832,7 +832,7 @@ class QuickShutdownTimeout(Setting): type = float default = 0 desc = """\ - Timeout for quick worker shutdown. + Timeout for quick worker shutdown in seconds. After receiving a quick shutdown signal, workers have this much time to finish serving requests. Workers still alive after the timeout (starting From ebe313fa3334c947288ddd8f3656b998a09a21e3 Mon Sep 17 00:00:00 2001 From: Adrien Kunysz Date: Mon, 20 Oct 2025 08:06:31 +0200 Subject: [PATCH 3/4] Revert back to SIGQUIT. In the previous commit, I changed that signal from SIGQUIT to SIGINT without apparent reason. Either should work as they are meant to be used the same way for the same purpose. Still, there was no reason to change so let's revert. --- gunicorn/arbiter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 15fecd07e..e0eac19b0 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -394,7 +394,7 @@ def stop(self, graceful=True): if not graceful or self.cfg.quick_shutdown_timeout > 0: deadline = time.time() + self.cfg.quick_shutdown_timeout - self.kill_workers(signal.SIGINT) + self.kill_workers(signal.SIGQUIT) self.sleep_until(deadline) self.kill_workers(signal.SIGKILL) From e2dada62daa9a0bb91cce588f790b521207c3c05 Mon Sep 17 00:00:00 2001 From: Adrien Kunysz Date: Tue, 21 Oct 2025 08:22:14 +0200 Subject: [PATCH 4/4] Revert tests back to SIGQUIT. Follow up on https://github.com/benoitc/gunicorn/pull/3388/commits/ebe313fa3334c947288ddd8f3656b998a09a21e3 --- tests/test_arbiter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_arbiter.py b/tests/test_arbiter.py index f30ad73c4..e34398dd6 100644 --- a/tests/test_arbiter.py +++ b/tests/test_arbiter.py @@ -91,7 +91,7 @@ def test_arbiter_stop_quick(close_sockets, kill): arbiter.WORKERS = {42: mock.Mock()} arbiter.stop(graceful=False) kill.assert_has_calls([ - mock.call(42, signal.SIGINT), + mock.call(42, signal.SIGQUIT), mock.call(42, signal.SIGKILL), ]) @@ -108,7 +108,7 @@ def test_arbiter_stop_graceful_then_quick(close_sockets, kill): arbiter.stop() kill.assert_has_calls([ mock.call(42, signal.SIGTERM), - mock.call(42, signal.SIGINT), + mock.call(42, signal.SIGQUIT), mock.call(42, signal.SIGKILL), ])