Skip to content

Commit 3059665

Browse files
authored
RHAIENG-1161: test(dockerfile): add test for probe logic and notebook args in codeserver (#2504)
* RHAIENG-1161: test(dockerfile): add test for probe logic and notebook args in codeserver This way it should now be impossible to build a container image that is then unable to start. ``` nginx: [alert] could not open error log file: open() "/var/log/nginx/error.log" failed (13: Permission denied) ``` * script -qc "/opt/app-root/bin/run-code-server.sh" /dev/null & works
1 parent 4897f0f commit 3059665

File tree

6 files changed

+224
-1
lines changed

6 files changed

+224
-1
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -526,4 +526,4 @@ print-release:
526526
.PHONY: test
527527
test:
528528
@echo "Running quick static tests"
529-
uv run pytest
529+
uv run pytest -m 'not buildonlytest'

codeserver/ubi9-python-3.12/Dockerfile.cpu

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,3 +270,15 @@ RUN chmod -R g+w /opt/app-root/lib/python3.12/site-packages && \
270270
WORKDIR /opt/app-root/src
271271

272272
CMD ["/opt/app-root/bin/run-code-server.sh"]
273+
274+
FROM codeserver as tests
275+
ARG CODESERVER_SOURCE_CODE=codeserver/ubi9-python-3.12
276+
COPY ${CODESERVER_SOURCE_CODE}/test /tmp/test
277+
# TODO(jdanek): add --mount=type=bind,target=/opt/app-root/src
278+
RUN <<'EOF'
279+
set -Eeuxo pipefail
280+
python3 /tmp/test/test_startup.py |& tee /tmp/test_log.txt
281+
EOF
282+
283+
from codeserver
284+
COPY --from=tests /tmp/test_log.txt /tmp/test_log.txt
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
## test_codeserver_startup
2+
3+
Checks that the image entrypoint starts up and the image passes the readiness probe.
4+
5+
### Konflux considerations
6+
7+
Workaround for [KFLUXSPRT-5139](https://redhat-internal.slack.com/archives/C04PZ7H0VA8/p1758128206734419):
8+
9+
Rootless (or more like remote?) buildah, or is it shipwright on openshift, has `/dev/stderr` be a pipe that may not be reopened.
10+
Since startup script creates some symlinks that then lead nginx to attempt to reopen `/dev/stderr`, it causes the following error:
11+
12+
nginx: [alert] could not open error log file: open() "/var/log/nginx/error.log" failed (13: Permission denied)
13+
14+
Here's how it looks like in the Konflux container:
15+
16+
```
17+
(app-root) cd /var/log/nginx/
18+
(app-root) ls -al
19+
total 20
20+
drwxrwx--x. 1 default root 4096 Sep 17 17:18 .
21+
drwxr-xr-x. 1 root root 4096 Sep 17 15:13 ..
22+
lrwxrwxrwx. 1 default root 11 Sep 17 17:18 access.log -> /dev/stdout
23+
-rw-r--r--. 1 default root 132 Sep 17 17:18 codeserver.access.log
24+
lrwxrwxrwx. 1 default root 11 Sep 17 17:18 error.log -> /dev/stderr
25+
```
26+
27+
```
28+
lrwxrwxrwx. 1 root root 15 Sep 12 11:24 /dev/stderr -> /proc/self/fd/2
29+
l-wx------. 1 default root 64 Sep 17 17:38 /proc/self/fd/2 -> pipe:[9523152]
30+
ls: cannot access '/dev/pts/0': No such file or directory
31+
```
32+
33+
On a regular Linux machine, we have `/dev/pts/0`
34+
35+
```
36+
lrwx------. 1 default root 64 Sep 17 17:34 /proc/self/fd/2 -> /dev/pts/0
37+
```
38+
39+
To solve this, we need to either stop doing the symlinks for nginx, or replace /proc/self/fd/2 with something that nginx can reopen.
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/env python3
2+
from __future__ import annotations
3+
4+
"""
5+
Reproduce kubelet httpGet probe logic for the supplied path.
6+
Exits 0 if the probe will eventually pass, 1 otherwise.
7+
"""
8+
import sys
9+
import time
10+
from http.client import HTTPConnection, HTTPSConnection
11+
from typing import TYPE_CHECKING
12+
13+
if TYPE_CHECKING:
14+
from typing import Type
15+
16+
# probe constants from the odh-dashboard Deployment
17+
# https://github.com/opendatahub-io/odh-dashboard/blob/2.4.0-release/backend/src/utils/notebookUtils.ts#L310-L332
18+
INITIAL_DELAY = 10
19+
PERIOD = 5
20+
TIMEOUT = 1
21+
FAIL_THRESH = 3
22+
SUCCESS_THRESH = 1
23+
24+
25+
def probe_once(connection_factory: Type[HTTPConnection], host: str, port: int, path: str) -> tuple[bool, str]:
26+
conn = None
27+
try:
28+
conn = connection_factory(host, port, timeout=TIMEOUT)
29+
conn.request("GET", path)
30+
resp = conn.getresponse()
31+
return 200 <= resp.status < 400, str(resp.status)
32+
except Exception as e:
33+
return False, str(e)
34+
finally:
35+
try:
36+
if conn:
37+
conn.close()
38+
except Exception:
39+
pass
40+
41+
42+
def main(namespace, name, port) -> int:
43+
path = f"/notebook/{namespace}/{name}/api"
44+
45+
# kubelet waits initialDelaySeconds before the first probe
46+
time.sleep(INITIAL_DELAY)
47+
48+
consecutive_failures = 0
49+
consecutive_successes = 0
50+
51+
while True:
52+
ok, err = probe_once(HTTPConnection, "127.0.0.1", int(port), path)
53+
print("probe_once:", ok, err)
54+
55+
if ok:
56+
consecutive_successes += 1
57+
consecutive_failures = 0
58+
if consecutive_successes >= SUCCESS_THRESH:
59+
return 0
60+
else:
61+
consecutive_failures += 1
62+
consecutive_successes = 0
63+
if consecutive_failures >= FAIL_THRESH:
64+
return 1
65+
66+
time.sleep(PERIOD)
67+
68+
69+
if __name__ == "__main__":
70+
if len(sys.argv) != 4:
71+
print("usage: probe_check.py <namespace> <name> <notebook-port>", file=sys.stderr)
72+
sys.exit(2)
73+
74+
namespace, name, port = sys.argv[1:]
75+
sys.exit(main(namespace, name, port))
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import os
2+
import select
3+
import subprocess
4+
import sys
5+
import threading
6+
import unittest
7+
from pathlib import Path
8+
9+
try:
10+
import pytest
11+
except ImportError:
12+
13+
class pytest:
14+
class _Mark:
15+
@classmethod
16+
def __getattr__(cls, item):
17+
return lambda x: x
18+
19+
mark = _Mark()
20+
21+
22+
import probe_check
23+
24+
25+
@pytest.mark.buildonlytest
26+
class TestStartup(unittest.TestCase):
27+
def setUp(self):
28+
self.project_name = "projectName"
29+
self.notebook_id = "notebookId"
30+
self.translated_username = "sometranslatedUsername"
31+
self.origin = "https://origin"
32+
self.root_dir = Path(__file__).parent
33+
34+
def get_notebook_args_configuration(self) -> str:
35+
expected_args = " ".join(
36+
[
37+
"--ServerApp.port=8888",
38+
"--ServerApp.token=''",
39+
"--ServerApp.password=''",
40+
f"--ServerApp.base_url=/notebook/{self.project_name}/{self.notebook_id}",
41+
"--ServerApp.quit_button=False",
42+
f'--ServerApp.tornado_settings={{"user":"{self.translated_username}","hub_host":"{self.origin}","hub_prefix":"/projects/{self.project_name}"}}',
43+
]
44+
)
45+
return expected_args
46+
47+
def get_environment_variables(self) -> dict[str, str]:
48+
notebook_args = self.get_notebook_args_configuration()
49+
nb_prefix = f"/notebook/{self.project_name}/{self.notebook_id}"
50+
return {
51+
# (outdated?) https://github.com/opendatahub-io/odh-dashboard/blob/2.4.0-release/backend/src/utils/notebookUtils.ts#L284-L293
52+
# https://github.com/opendatahub-io/odh-dashboard/blob/1d5a9065c10acc4706b84b06c67f27f16cf6dee7/frontend/src/api/k8s/notebooks.ts#L157-L170
53+
"NOTEBOOK_ARGS": notebook_args,
54+
# NB_PREFIX is set by notebook-controller and codeserver scripting depends on it
55+
# https://github.com/opendatahub-io/kubeflow/blob/f924a96375988fe3801db883e99ce9ed1ab5939c/components/notebook-controller/controllers/notebook_controller.go#L417
56+
"NB_PREFIX": nb_prefix,
57+
}
58+
59+
def test_codeserver_startup(self):
60+
env = self.get_environment_variables()
61+
command = ["/opt/app-root/bin/run-code-server.sh"]
62+
p = subprocess.Popen(
63+
command,
64+
# KFLUXSPRT-5139: https://redhat-internal.slack.com/archives/C04PZ7H0VA8/p1758128206734419
65+
stdout=subprocess.PIPE,
66+
stderr=subprocess.STDOUT,
67+
env={**os.environ, **env},
68+
)
69+
thr = threading.Thread(target=pump_stream, args=(p,), daemon=True)
70+
thr.start()
71+
72+
try:
73+
ret = probe_check.main(self.project_name, self.notebook_id, "8888")
74+
self.assertEqual(ret, 0, "Probe check should return 0")
75+
self.assertEqual(p.poll(), None, "Code server process should still be running")
76+
finally:
77+
p.terminate()
78+
p.wait()
79+
80+
thr.join()
81+
82+
83+
def pump_stream(process: subprocess.Popen) -> None:
84+
"""Copy everything from stream -> stdout."""
85+
# this may stop pumping even if there's something still buffered, so be it
86+
while process.returncode is None:
87+
rl, _, _ = select.select([process.stdout], [], [], 1)
88+
if rl:
89+
chunk = process.stdout.read1()
90+
sys.stdout.buffer.write(chunk)
91+
sys.stdout.buffer.flush()
92+
process.stdout.close()
93+
94+
95+
if __name__ == "__main__":
96+
unittest.main(verbosity=2)

pytest.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ markers =
2121
openshift: requires openshift to run,
2222
cuda: requires cuda to run,
2323
rocm: requires rocm to run,
24+
buildonlytest: runs inside docker build,

0 commit comments

Comments
 (0)