diff --git a/docs/source/deploy.rst b/docs/source/deploy.rst index 5f8689793..eb83baf35 100644 --- a/docs/source/deploy.rst +++ b/docs/source/deploy.rst @@ -247,7 +247,8 @@ to the newly created unix socket: [Service] # gunicorn can let systemd know when it is ready - Type=notify + # if systemd versions >= v253, otherwise use 'Type=notify' + Type=notify-reload NotifyAccess=main # the specific user that our service will run as User=someuser @@ -257,7 +258,8 @@ to the newly created unix socket: RuntimeDirectory=gunicorn WorkingDirectory=/home/someuser/applicationroot ExecStart=/usr/bin/gunicorn applicationname.wsgi - ExecReload=/bin/kill -s HUP $MAINPID + # if 'Type=notify' instead of 'Type=notify-reload' (depending on systemd version) + # ExecReload=/bin/kill -s HUP $MAINPID KillMode=mixed TimeoutStopSec=5 PrivateTmp=true diff --git a/docs/source/settings.rst b/docs/source/settings.rst index 046770616..10e7d41b3 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -1100,6 +1100,10 @@ A filename to use for the PID file. If not set, no PID file will be written. +.. note:: + During master re-exec, a ``.2`` suffix is added to + this path to store the PID of the newly launched master. + .. _worker-tmp-dir: ``worker_tmp_dir`` @@ -1591,6 +1595,11 @@ If the ``PORT`` environment variable is defined, the default is ``['0.0.0.0:$PORT']``. If it is not defined, the default is ``['127.0.0.1:8000']``. +.. note:: + Specifying any fd://FD socket or inheriting any socket from systemd + (LISTEN_FDS) results in other bind addresses to be skipped. + Do not mix fd://FD and systemd socket activation. + .. _backlog: ``backlog`` diff --git a/docs/source/signals.rst b/docs/source/signals.rst index c22ea0362..cea20dfe2 100644 --- a/docs/source/signals.rst +++ b/docs/source/signals.rst @@ -117,3 +117,9 @@ running:: 20859 benoitc 20 0 55748 11m 1500 S 0.0 0.1 0:00.02 gunicorn: worker [test:app] 20860 benoitc 20 0 55748 11m 1500 S 0.0 0.1 0:00.02 gunicorn: worker [test:app] 20861 benoitc 20 0 55748 11m 1500 S 0.0 0.1 0:00.01 gunicorn: worker [test:app] + +If no pidfile is available (``kill -TERM $(cat /var/run/gunicorn.pid)``) then killing +the *oldest* process (``pkill --oldest -TERM -f "gunicorn: master "``) should suffice. + +When running via systemd socket activation, Gunicorn will *automatically* issue the graceful +shutdown of the old master, as part of starting up the new one. diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 646d684ef..28b15ec1b 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -60,14 +60,19 @@ def __init__(self, app): self.pidfile = None self.systemd = False self.worker_age = 0 + # old master has != 0 until new master is dead or promoted self.reexec_pid = 0 + # new master has != 0 until old master is dead (until promotion) self.master_pid = 0 self.master_name = "Master" cwd = util.getcwd() - args = sys.argv[:] - args.insert(0, sys.executable) + if sys.version_info < (3, 10): + args = sys.argv[:] + args.insert(0, sys.executable) + else: + args = sys.orig_argv[:] # init start context self.START_CTX = { @@ -146,6 +151,7 @@ def start(self): self.systemd = True fds = range(systemd.SD_LISTEN_FDS_START, systemd.SD_LISTEN_FDS_START + listen_fds) + self.log.debug("Inherited sockets from systemd: %r", fds) elif self.master_pid: fds = [] @@ -167,6 +173,8 @@ def start(self): self.cfg.when_ready(self) + # systemd: not yet shutting down old master here (wait for workers) + def init_signals(self): """\ Initialize master signal handling. Most of the signals @@ -251,7 +259,10 @@ def handle_hup(self): - Gracefully shutdown the old worker processes """ self.log.info("Hang up: %s", self.master_name) + systemd.sd_notify("RELOADING=1\nSTATUS=Gunicorn arbiter reloading..", self.log) self.reload() + # possibly premature, newly launched workers might have failed + systemd.sd_notify("READY=1\nSTATUS=Gunicorn arbiter reloaded", self.log) def handle_term(self): "SIGTERM handling" @@ -327,6 +338,14 @@ def maybe_promote_master(self): self.pidfile.rename(self.cfg.pidfile) # reset proctitle util._setproctitle("master [%s]" % self.proc_name) + # MAINPID does not change here, it was already set on fork + systemd.sd_notify("READY=1\nMAINPID=%d\nSTATUS=Gunicorn arbiter promoted" % (os.getpid(), ), self.log) + + elif self.systemd and len(self.WORKERS) >= 1: + # still attached to old master, but we are ready to take over + # this automates `kill -TERM $(cat /var/run/gunicorn.pid)` + self.log.debug("systemd managed: shutting down old master %d after re-exec", self.master_pid) + os.kill(self.master_pid, signal.SIGTERM) def wakeup(self): """\ @@ -340,6 +359,13 @@ def wakeup(self): def halt(self, reason=None, exit_status=0): """ halt arbiter """ + if self.master_pid != 0: + # if NotifyAccess=main, systemd needs to know old master is in control + systemd.sd_notify("READY=1\nMAINPID=%d\nSTATUS=New arbiter shutdown" % (self.master_pid, ), self.log) + elif self.reexec_pid == 0: + # skip setting status if this is merely superseded master stopping + systemd.sd_notify("STOPPING=1\nSTATUS=Shutting down..", self.log) + self.stop() log_func = self.log.info if exit_status == 0 else self.log.error @@ -413,8 +439,14 @@ def reexec(self): master_pid = os.getpid() self.reexec_pid = os.fork() if self.reexec_pid != 0: + # let systemd know they will be in control after exec() + systemd.sd_notify( + "RELOADING=1\nMAINPID=%d\nSTATUS=Gunicorn arbiter re-exec in forked.." % (self.reexec_pid, ), self.log + ) + # old master return + # new master self.cfg.pre_exec(self) environ = self.cfg.env_orig.copy() @@ -430,7 +462,12 @@ def reexec(self): os.chdir(self.START_CTX['cwd']) # exec the process using the original environment - os.execvpe(self.START_CTX[0], self.START_CTX['args'], environ) + self.log.debug("exe=%r argv=%r" % (self.START_CTX[0], self.START_CTX['args'])) + # let systemd know we will be in control after exec() + systemd.sd_notify( + "RELOADING=1\nMAINPID=%d\nSTATUS=Gunicorn arbiter re-exec in progress.." % (os.getpid(), ), self.log + ) + os.execve(self.START_CTX[0], self.START_CTX['args'], environ) def reload(self): old_address = self.cfg.address @@ -519,7 +556,14 @@ def reap_workers(self): break if self.reexec_pid == wpid: self.reexec_pid = 0 + self.log.info("Master exited before promotion.") + # let systemd know we are (back) in control + systemd.sd_notify("READY=1\nMAINPID=%d\nSTATUS=Old arbiter promoted" % (os.getpid(), ), self.log) else: + worker = self.WORKERS.pop(wpid, None) + if not worker: + self.log.debug("Non-worker subprocess (pid:%s) exited", wpid) + continue # A worker was terminated. If the termination reason was # that it could not boot, we'll shut it down to avoid # infinite start/stop cycles. @@ -554,9 +598,6 @@ def reap_workers(self): msg += " Perhaps out of memory?" self.log.error(msg) - worker = self.WORKERS.pop(wpid, None) - if not worker: - continue worker.tmp.close() self.cfg.child_exit(self, worker) except OSError as e: diff --git a/gunicorn/config.py b/gunicorn/config.py index 29b30ad23..d37ce05a2 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -616,6 +616,11 @@ class Bind(Setting): If the ``PORT`` environment variable is defined, the default is ``['0.0.0.0:$PORT']``. If it is not defined, the default is ``['127.0.0.1:8000']``. + + .. note:: + Specifying any fd://FD socket or inheriting any socket from systemd + (LISTEN_FDS) results in other bind addresses to be skipped. + Do not mix fd://FD and systemd socket activation. """ @@ -1123,6 +1128,10 @@ class Pidfile(Setting): A filename to use for the PID file. If not set, no PID file will be written. + + .. note:: + During master re-exec, a ``.2`` suffix is added to + this path to store the PID of the newly launched master. """ diff --git a/gunicorn/instrument/statsd.py b/gunicorn/instrument/statsd.py index 7bc4e6ffd..5657c343e 100644 --- a/gunicorn/instrument/statsd.py +++ b/gunicorn/instrument/statsd.py @@ -35,6 +35,7 @@ def __init__(self, cfg): self.sock = socket.socket(address_family, socket.SOCK_DGRAM) self.sock.connect(cfg.statsd_host) except Exception: + self.sock.close() self.sock = None self.dogstatsd_tags = cfg.dogstatsd_tags diff --git a/gunicorn/sock.py b/gunicorn/sock.py index eb2b6fa9c..6e39b4d76 100644 --- a/gunicorn/sock.py +++ b/gunicorn/sock.py @@ -24,8 +24,8 @@ def __init__(self, address, conf, log, fd=None): sock = socket.socket(self.FAMILY, socket.SOCK_STREAM) bound = False else: - sock = socket.fromfd(fd, self.FAMILY, socket.SOCK_STREAM) - os.close(fd) + # does not duplicate the fd, this LISTEN_FDS stays at fds 3+N + sock = socket.socket(self.FAMILY, socket.SOCK_STREAM, fileno=fd) bound = True self.sock = self.set_options(sock, bound=bound) @@ -156,6 +156,12 @@ def create_sockets(conf, log, fds=None): fdaddr += list(fds) laddr = [bind for bind in addr if not isinstance(bind, int)] + # LISTEN_FDS=1 + fd://3 + uniq_fdaddr = set() + duped_fdaddr = {fd for fd in fdaddr if fd in uniq_fdaddr or uniq_fdaddr.add(fd)} + if duped_fdaddr: + log.warning("Binding with fd:// is unsupported with systemd/re-exec.") + # check ssl config early to raise the error on startup # only the certfile is needed since it can contains the keyfile if conf.certfile and not os.path.exists(conf.certfile): @@ -167,9 +173,11 @@ def create_sockets(conf, log, fds=None): # sockets are already bound if fdaddr: for fd in fdaddr: - sock = socket.fromfd(fd, socket.AF_UNIX, socket.SOCK_STREAM) + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, fileno=fd) sock_name = sock.getsockname() sock_type = _sock_type(sock_name) + log.debug("listen: fd %d => fd %d for %s", fd, sock.fileno(), sock_name) + sock.detach() # only created to call getsockname(), will re-attach shorty listener = sock_type(sock_name, conf, log, fd=fd) listeners.append(listener) diff --git a/gunicorn/systemd.py b/gunicorn/systemd.py index 9b1855060..18704fd0a 100644 --- a/gunicorn/systemd.py +++ b/gunicorn/systemd.py @@ -4,6 +4,7 @@ import os import socket +import time SD_LISTEN_FDS_START = 3 @@ -66,6 +67,13 @@ def sd_notify(state, logger, unset_environment=False): if addr[0] == '@': addr = '\0' + addr[1:] sock.connect(addr) + if state[-1] != '\n': + state += "\n" + # needed for notify-reload, but no harm in sending unconditionally + # nsec = 10**-9, usec = 10**-6 + monotonic_usecs = time.clock_gettime_ns(time.CLOCK_MONOTONIC) // 1000 + state += "MONOTONIC_USEC=%d\n" % (monotonic_usecs, ) + logger.debug("sd_notify: %r" % (state, )) sock.sendall(state.encode('utf-8')) except Exception: logger.debug("Exception while invoking sd_notify()", exc_info=True) diff --git a/tests/test_arbiter.py b/tests/test_arbiter.py index 8c1527e26..a12bb997a 100644 --- a/tests/test_arbiter.py +++ b/tests/test_arbiter.py @@ -71,24 +71,27 @@ def test_arbiter_stop_does_not_unlink_when_using_reuse_port(close_sockets): @mock.patch('os.getpid') @mock.patch('os.fork') -@mock.patch('os.execvpe') -def test_arbiter_reexec_passing_systemd_sockets(execvpe, fork, getpid): +@mock.patch('os.execve') +@mock.patch('gunicorn.systemd.sd_notify') +def test_arbiter_reexec_passing_systemd_sockets(sd_notify, execve, fork, getpid): arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) arbiter.LISTENERS = [mock.Mock(), mock.Mock()] arbiter.systemd = True fork.return_value = 0 - getpid.side_effect = [2, 3] + sd_notify.return_value = None + getpid.side_effect = [2, 3, 3] # 2 getpid calls in new master arbiter.reexec() - environ = execvpe.call_args[0][2] + environ = execve.call_args[0][2] assert environ['GUNICORN_PID'] == '2' assert environ['LISTEN_FDS'] == '2' assert environ['LISTEN_PID'] == '3' + sd_notify.assert_called_once() @mock.patch('os.getpid') @mock.patch('os.fork') -@mock.patch('os.execvpe') -def test_arbiter_reexec_passing_gunicorn_sockets(execvpe, fork, getpid): +@mock.patch('os.execve') +def test_arbiter_reexec_passing_gunicorn_sockets(execve, fork, getpid): arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) listener1 = mock.Mock() listener2 = mock.Mock() @@ -98,7 +101,7 @@ def test_arbiter_reexec_passing_gunicorn_sockets(execvpe, fork, getpid): fork.return_value = 0 getpid.side_effect = [2, 3] arbiter.reexec() - environ = execvpe.call_args[0][2] + environ = execve.call_args[0][2] assert environ['GUNICORN_FD'] == '4,5' assert environ['GUNICORN_PID'] == '2'