Skip to content

Commit b36a6a7

Browse files
committed
simplify python entrypoint
- remove redundant monitor sibling process - use python3-login executable instead of login shell subprocess (same effect, but in more natural order) - use non-blocking binary IO in tee instead of readline (switch to binary mode, as text wrappers don't support non-blocking mode see https://bugs.python.org/issue13322)
1 parent 0f848f7 commit b36a6a7

File tree

3 files changed

+52
-107
lines changed

3 files changed

+52
-107
lines changed

repo2docker/buildpacks/base.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99
import sys
1010
import hashlib
1111
import escapism
12-
import xml.etree.ElementTree as ET
13-
14-
from traitlets import Dict
1512

1613
# Only use syntax features supported by Docker 17.09
1714
TEMPLATE = r"""
@@ -182,6 +179,7 @@
182179
183180
# Add entrypoint
184181
ENV PYTHONUNBUFFERED=1
182+
COPY /python3-login /usr/local/bin/python3-login
185183
COPY /repo2docker-entrypoint /usr/local/bin/repo2docker-entrypoint
186184
ENTRYPOINT ["/usr/local/bin/repo2docker-entrypoint"]
187185
@@ -194,9 +192,7 @@
194192
{% endif %}
195193
"""
196194

197-
ENTRYPOINT_FILE = os.path.join(
198-
os.path.dirname(os.path.abspath(__file__)), "repo2docker-entrypoint"
199-
)
195+
HERE = os.path.dirname(os.path.abspath(__file__))
200196

201197
# Also used for the group
202198
DEFAULT_NB_UID = 1000
@@ -583,7 +579,8 @@ def _filter_tar(tar):
583579
dest_path, src_path = self.generate_build_context_filename(src)
584580
tar.add(src_path, dest_path, filter=_filter_tar)
585581

586-
tar.add(ENTRYPOINT_FILE, "repo2docker-entrypoint", filter=_filter_tar)
582+
for fname in ("repo2docker-entrypoint", "python3-login"):
583+
tar.add(os.path.join(HERE, fname), fname, filter=_filter_tar)
587584

588585
tar.add(".", "src/", filter=_filter_tar)
589586

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash -l
2+
# This is an executable that launches Python in a login shell
3+
# to ensure that full profile setup occurs.
4+
# shebang on linux only allows 1 argument,
5+
# so we couldn't pick a login shell in one shebang line
6+
# for a Python script
7+
8+
# -u means unbuffered, which one ~always wants in a container
9+
# otherwise output can be mysteriously missing
10+
11+
exec python3 -u "$@"

repo2docker/buildpacks/repo2docker-entrypoint

Lines changed: 37 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,158 +1,95 @@
1-
#!/usr/bin/env python3
1+
#!/usr/local/bin/python3-login
22
# note: must run on Python >= 3.5, which mainly means no f-strings
33

44
# goals:
55
# - load environment variables from a login shell (bash -l)
66
# - preserve signal handling of subprocess (kill -TERM and friends)
77
# - tee output to a log file
88

9-
import json
9+
import fcntl
1010
import os
11+
import select
1112
import signal
1213
import subprocess
1314
import sys
14-
import time
15-
16-
17-
def get_login_env():
18-
"""Instantiate a login shell to retrieve environment variables
19-
20-
Serialize with Python to ensure proper escapes
21-
"""
22-
p = subprocess.run(
23-
[
24-
"bash",
25-
"-l",
26-
"-c",
27-
"python3 -c 'import os, json; print(json.dumps(dict(os.environ)))'",
28-
],
29-
stdout=subprocess.PIPE,
30-
)
31-
if p.returncode:
32-
print("Error getting login env")
33-
return {}
34-
35-
last_line = p.stdout.splitlines()[-1]
36-
try:
37-
return json.loads(last_line)
38-
except Exception as e:
39-
print("Error getting login env: {e}".format(e=e), file=sys.stderr)
40-
return {}
41-
42-
43-
def monitor_parent(parent_pid, child_pgid):
44-
"""Monitor parent_pid and shutdown child_pgid if parent goes away first"""
45-
while True:
46-
try:
47-
os.kill(parent_pid, 0)
48-
except ProcessLookupError:
49-
# parent is gone, likely by SIGKILL
50-
# send SIGKILL to child process group
51-
try:
52-
os.killpg(child_pgid, signal.SIGKILL)
53-
except (ProcessLookupError, PermissionError):
54-
# ignore if the child is already gone
55-
pass
56-
return
57-
else:
58-
time.sleep(1)
5915

16+
# output chunk size to read
17+
CHUNK_SIZE = 1024
6018

6119
# signals to be forwarded to the child
62-
SIGNALS = [
63-
signal.SIGHUP,
64-
signal.SIGINT,
65-
# signal.SIGKILL,
66-
signal.SIGQUIT,
67-
signal.SIGTERM,
68-
signal.SIGUSR1,
69-
signal.SIGUSR2,
70-
signal.SIGWINCH,
71-
]
20+
# everything catchable, excluding SIGCHLD
21+
SIGNALS = set(signal.Signals) - {signal.SIGKILL, signal.SIGSTOP, signal.SIGCHLD}
7222

7323

7424
def main():
7525

76-
# load login shell environment
77-
login_env = get_login_env()
78-
env = os.environ.copy()
79-
env.update(login_env)
80-
8126
# open log file to send output
8227
log_file = open(
8328
os.path.join(os.environ.get("REPO_DIR", "."), ".jupyter-server-log.txt"),
84-
"a",
29+
"ab",
8530
)
8631

32+
# build the command
33+
# like `exec "$@"`
8734
command = sys.argv[1:]
35+
# load entrypoint override from env
8836
r2d_entrypoint = os.environ.get("R2D_ENTRYPOINT")
8937
if r2d_entrypoint:
9038
command.insert(0, r2d_entrypoint)
9139

40+
# launch the subprocess
9241
child = subprocess.Popen(
9342
command,
9443
bufsize=1,
95-
env=env,
96-
start_new_session=True,
9744
stdout=subprocess.PIPE,
9845
stderr=subprocess.STDOUT,
99-
universal_newlines=True,
10046
)
101-
child_pgid = os.getpgid(child.pid)
102-
103-
# if parent is forcefully shutdown,
104-
# make sure child shuts down immediately as well
105-
parent_pid = os.getpid()
106-
107-
monitor_pid = os.fork()
108-
if monitor_pid == 0:
109-
# child process, sibling of 'real' command
110-
# avoid receiving signals sent to parent
111-
os.setpgrp()
112-
# terminate child if parent goes away,
113-
# e.g. in ungraceful KILL not relayed to children
114-
monitor_parent(parent_pid, child_pgid)
115-
return
11647

11748
# hook up ~all signals so that every signal the parent gets,
11849
# the children also get
11950

12051
def relay_signal(sig, frame):
12152
"""Relay a signal to children"""
122-
print(
123-
"Forwarding signal {sig} to {child_pgid}".format(
124-
sig=sig, child_pgid=child_pgid
125-
)
126-
)
127-
os.killpg(child_pgid, sig)
128-
129-
# question: maybe use all valid_signals() except a few, e.g. SIGCHLD?
130-
# rather than opt-in list
53+
# DEBUG: show signal
54+
child.send_signal(sig)
55+
13156
for signum in SIGNALS:
13257
signal.signal(signum, relay_signal)
13358

13459
# tee output from child to both our stdout and the log file
13560
def tee(chunk):
136-
for f in [sys.stdout, log_file]:
61+
"""Tee output from child to both our stdout and the log file"""
62+
for f in [sys.stdout.buffer, log_file]:
13763
f.write(chunk)
13864
f.flush()
13965

66+
# make stdout pipe non-blocking
67+
# this means child.stdout.read(nbytes)
68+
# will always return immediately, even if there's nothing to read
69+
flags = fcntl.fcntl(child.stdout, fcntl.F_GETFL)
70+
fcntl.fcntl(child.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK)
71+
poller = select.poll()
72+
poller.register(child.stdout)
73+
74+
# while child is running, constantly relay output
14075
while child.poll() is None:
141-
tee(child.stdout.readline())
76+
chunk = child.stdout.read(CHUNK_SIZE)
77+
if chunk:
78+
tee(chunk)
79+
else:
80+
# empty chunk means nothing to read
81+
# wait for output on the pipe
82+
# timeout is in milliseconds
83+
poller.poll(1000)
14284

143-
# flush the rest
85+
# child has exited, continue relaying any remaining output
86+
# At this point, read() will return an empty string when it's done
14487
chunk = child.stdout.read()
14588
while chunk:
14689
tee(chunk)
14790
chunk = child.stdout.read()
14891

149-
# child exited, cleanup monitor
150-
try:
151-
os.kill(monitor_pid, signal.SIGKILL)
152-
except ProcessLookupError:
153-
pass
154-
155-
# preserve returncode
92+
# make our returncode match the child's returncode
15693
sys.exit(child.returncode)
15794

15895

0 commit comments

Comments
 (0)