Skip to content

Commit 0a99a6f

Browse files
Merge pull request #7 from InformaticsMatters/sc-3357
Better timeout handling
2 parents 409937d + c4e7ed2 commit 0a99a6f

File tree

7 files changed

+116
-62
lines changed

7 files changed

+116
-62
lines changed

.github/workflows/build.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ jobs:
3232
strategy:
3333
matrix:
3434
python-version:
35-
- '3.10'
35+
- '3.11'
3636
steps:
3737
- name: Checkout
38-
uses: actions/checkout@v3
38+
uses: actions/checkout@v4
3939
- name: Set up Python ${{ matrix.python-version }}
40-
uses: actions/setup-python@v3
40+
uses: actions/setup-python@v4
4141
with:
4242
python-version: ${{ matrix.python-version }}
4343
- name: Install requirements

.github/workflows/publish.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ jobs:
2929
runs-on: ubuntu-latest
3030
steps:
3131
- name: Checkout
32-
uses: actions/checkout@v3
32+
uses: actions/checkout@v4
3333
- name: Inject slug/short variables
34-
uses: rlespinasse/github-slug-action@v3.x
34+
uses: rlespinasse/github-slug-action@v4
3535
- name: Set up Python
36-
uses: actions/setup-python@v3
36+
uses: actions/setup-python@v4
3737
with:
3838
python-version: '3.11'
3939
- name: Install dependencies

README.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ But jobs *are not* running in the same operating-system environment, e.g. they
2424
are not bound by the same processor and memory constraints they'll encounter in
2525
the Data Manager, which runs in `Kubernetes`_.
2626

27+
To use ``jote`` you will need to install ``docker-compose`` (v1 or v2).
28+
2729
A successful test should give you confidence that it *should* work in the
2830
Data Manger but without writing a lot of tests you'll never be completely
2931
confident that it will always run successfully.
@@ -336,8 +338,8 @@ Installation
336338
This is a Python 3 utility, so try to run it from a recent (ideally 3.10)
337339
Python environment.
338340

339-
To use the utility you will need to have installed `Docker`_ and,
340-
if you want to test nextflow jobs, `nextflow`_.
341+
To use the utility you will need to have installed `Docker`_, `docker-compose`,
342+
and, if you want to test nextflow jobs, `nextflow`_.
341343

342344
.. _PyPI: https://pypi.org/project/im-jote/
343345
.. _Docker: https://docs.docker.com/get-docker/

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
docker-compose == 1.29.2
21
im-data-manager-job-decoder == 1.17.2
32
munch == 2.5.0
43
wheel == 0.40.0

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
"Intended Audience :: Developers",
4242
"License :: OSI Approved :: MIT License",
4343
"Programming Language :: Python :: 3.10",
44+
"Programming Language :: Python :: 3.11",
4445
"Topic :: System :: Installation/Setup",
4546
"Operating System :: POSIX :: Linux",
4647
],

src/jote/compose.py

Lines changed: 101 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,15 @@
88
This module is designed to simulate the actions of the Data Manager
99
and Job Operator that are running in the DM kubernetes deployment.
1010
"""
11+
12+
import contextlib
1113
import copy
1214
import os
1315
import shutil
1416
import subprocess
17+
import sys
1518
import time
16-
from typing import Any, Dict, Optional, Tuple
19+
from typing import Any, Dict, List, Optional, Tuple
1720

1821
# The 'simulated' instance directory,
1922
# created by the Data Manager prior to launching the corresponding Job.
@@ -59,13 +62,44 @@
5962
"""
6063

6164

65+
def _get_docker_compose_command() -> str:
66+
# Try 'docker compose' (v2) and then 'docker-compose' (v1)
67+
# we need one or the other.
68+
dc_command: str = ""
69+
try:
70+
_ = subprocess.run(
71+
["docker", "compose", "version"],
72+
capture_output=True,
73+
check=False,
74+
timeout=4,
75+
)
76+
dc_command = "docker compose"
77+
except FileNotFoundError:
78+
with contextlib.suppress(FileNotFoundError):
79+
_ = subprocess.run(
80+
["docker-compose", "version"],
81+
capture_output=True,
82+
check=False,
83+
timeout=4,
84+
)
85+
dc_command = "docker-compose"
86+
if not dc_command:
87+
print("ERROR: Neither 'docker compose' nor 'docker-compose' has been found")
88+
print("One of these is required.")
89+
print("Please install one of them.")
90+
sys.exit(1)
91+
92+
assert dc_command
93+
return dc_command
94+
95+
6296
def _get_docker_compose_version() -> str:
63-
result = subprocess.run(
64-
["docker-compose", "version"], capture_output=True, check=False, timeout=4
65-
)
97+
dc_command = _get_docker_compose_command()
98+
version_cmd: List[str] = dc_command.split() + ["version"]
99+
result = subprocess.run(version_cmd, capture_output=True, check=False, timeout=4)
66100

67101
# stdout will contain the version on the first line: -
68-
# "docker-compose version 1.29.2, build unknown"
102+
# "docker-compose version v1.29.2, build unknown"
69103
# Ignore the first 23 characters of the first line...
70104
return str(result.stdout.decode("utf-8").split("\n")[0][23:])
71105

@@ -77,10 +111,12 @@ def get_test_root() -> str:
77111

78112

79113
class Compose:
80-
"""A class handling the execution of 'docker-compose'
114+
"""A class handling the execution of 'docker compose'
81115
for an individual test.
82116
"""
83117

118+
# The docker-compose command (for the first test)
119+
_COMPOSE_COMMAND: Optional[str] = None
84120
# The docker-compose version (for the first test)
85121
_COMPOSE_VERSION: Optional[str] = None
86122

@@ -144,10 +180,14 @@ def create(self) -> str:
144180
if os.path.exists(test_path):
145181
shutil.rmtree(test_path)
146182

183+
# Do we have the command?
184+
if not Compose._COMPOSE_COMMAND:
185+
Compose._COMPOSE_COMMAND = _get_docker_compose_command()
186+
print(f"# Compose command: {Compose._COMPOSE_COMMAND}")
147187
# Do we have the docker-compose version the user's installed?
148188
if not Compose._COMPOSE_VERSION:
149189
Compose._COMPOSE_VERSION = _get_docker_compose_version()
150-
print(f"# Compose: docker-compose ({Compose._COMPOSE_VERSION})")
190+
print(f"# Compose version: {Compose._COMPOSE_VERSION}")
151191

152192
# Make the test directory
153193
# (where the test is launched from)
@@ -159,15 +199,8 @@ def create(self) -> str:
159199
os.makedirs(inst_path)
160200

161201
# Run as a specific user/group ID?
162-
if self._user_id is not None:
163-
user_id = self._user_id
164-
else:
165-
user_id = os.getuid()
166-
if self._group_id is not None:
167-
group_id = self._group_id
168-
else:
169-
group_id = os.getgid()
170-
202+
user_id = self._user_id if self._user_id is not None else os.getuid()
203+
group_id = self._group_id if self._group_id is not None else os.getgid()
171204
# Write the Docker compose content to a file in the test directory
172205
additional_environment: str = ""
173206
if self._test_environment:
@@ -214,15 +247,17 @@ def run(
214247
caller along with the stdout and stderr content.
215248
A non-zero exit code does not necessarily mean the test has failed.
216249
"""
250+
assert Compose._COMPOSE_COMMAND
217251

218252
execution_directory: str = self.get_test_path()
219253

220-
print('# Compose: Executing the test ("docker-compose up")...')
254+
print(f'# Compose: Executing the test ("{Compose._COMPOSE_COMMAND} up")...')
221255
print(f'# Compose: Execution directory is "{execution_directory}"')
222256

223257
cwd = os.getcwd()
224258
os.chdir(execution_directory)
225259

260+
timeout: bool = False
226261
try:
227262
# Run the container, and then cleanup.
228263
# If a test environment is set then we pass in these values to the
@@ -237,33 +272,45 @@ def run(
237272
# we set the prefix for the network name and can use compose files
238273
# from different directories. Without this the network name
239274
# is prefixed by the directory the compose file is in.
275+
up_cmd: List[str] = Compose._COMPOSE_COMMAND.split() + [
276+
"-p",
277+
"data-manager",
278+
"up",
279+
"--exit-code-from",
280+
"job",
281+
"--abort-on-container-exit",
282+
]
240283
test = subprocess.run(
241-
[
242-
"docker-compose",
243-
"-p",
244-
"data-manager",
245-
"up",
246-
"--exit-code-from",
247-
"job",
248-
"--abort-on-container-exit",
249-
],
284+
up_cmd,
250285
capture_output=True,
251286
timeout=timeout_minutes * 60,
252287
check=False,
253288
env=env,
254289
)
290+
down_cmd: List[str] = Compose._COMPOSE_COMMAND.split() + ["down"]
255291
_ = subprocess.run(
256-
["docker-compose", "down"],
292+
down_cmd,
257293
capture_output=True,
258294
timeout=240,
259295
check=False,
260296
)
297+
except: # pylint: disable=bare-except
298+
timeout = True
261299
finally:
262300
os.chdir(cwd)
263301

264-
print(f"# Compose: Executed (exit code {test.returncode})")
302+
if timeout:
303+
print("# Compose: ERROR - Test timeout")
304+
return_code: int = -911
305+
test_stdout: str = ""
306+
test_stderr: str = ""
307+
else:
308+
print(f"# Compose: Executed (exit code {test.returncode})")
309+
return_code = test.returncode
310+
test_stdout = test.stdout.decode("utf-8")
311+
test_stderr = test.stderr.decode("utf-8")
265312

266-
return test.returncode, test.stdout.decode("utf-8"), test.stderr.decode("utf-8")
313+
return return_code, test_stdout, test_stderr
267314

268315
def delete(self) -> None:
269316
"""Deletes a test directory created by 'create()'."""
@@ -279,9 +326,10 @@ def delete(self) -> None:
279326
def run_group_compose_file(compose_file: str, delay_seconds: int = 0) -> bool:
280327
"""Starts a group compose file in a detached state.
281328
The file is expected to be a compose file in the 'data-manager' directory.
282-
We pull the continer imag to reduce the 'docker-compose up' time
329+
We pull the container image to reduce the 'docker-compose up' time
283330
and then optionally wait for a period of seconds.
284331
"""
332+
assert Compose._COMPOSE_COMMAND
285333

286334
print("# Compose: Starting test group containers...")
287335

@@ -290,13 +338,13 @@ def run_group_compose_file(compose_file: str, delay_seconds: int = 0) -> bool:
290338
try:
291339
# Pre-pull the docker-compose images.
292340
# This saves start-up execution time.
341+
pull_cmd: List[str] = Compose._COMPOSE_COMMAND.split() + [
342+
"-f",
343+
os.path.join("data-manager", compose_file),
344+
"pull",
345+
]
293346
_ = subprocess.run(
294-
[
295-
"docker-compose",
296-
"-f",
297-
os.path.join("data-manager", compose_file),
298-
"pull",
299-
],
347+
pull_cmd,
300348
capture_output=False,
301349
check=False,
302350
)
@@ -306,16 +354,16 @@ def run_group_compose_file(compose_file: str, delay_seconds: int = 0) -> bool:
306354
# we set the prefix for the network name and services from this container
307355
# are visible to the test container. Without this the network name
308356
# is prefixed by the directory the compose file is in.
357+
up_cmd: List[str] = Compose._COMPOSE_COMMAND.split() + [
358+
"-f",
359+
os.path.join("data-manager", compose_file),
360+
"-p",
361+
"data-manager",
362+
"up",
363+
"-d",
364+
]
309365
_ = subprocess.run(
310-
[
311-
"docker-compose",
312-
"-f",
313-
os.path.join("data-manager", compose_file),
314-
"-p",
315-
"data-manager",
316-
"up",
317-
"-d",
318-
],
366+
up_cmd,
319367
capture_output=False,
320368
check=False,
321369
)
@@ -335,19 +383,20 @@ def stop_group_compose_file(compose_file: str) -> bool:
335383
"""Stops a group compose file.
336384
The file is expected to be a compose file in the 'data-manager' directory.
337385
"""
386+
assert Compose._COMPOSE_COMMAND
338387

339388
print("# Compose: Stopping test group containers...")
340389

341390
try:
342391
# Bring the compose file down...
392+
down_cmd: List[str] = Compose._COMPOSE_COMMAND.split() + [
393+
"-f",
394+
os.path.join("data-manager", compose_file),
395+
"down",
396+
"--remove-orphans",
397+
]
343398
_ = subprocess.run(
344-
[
345-
"docker-compose",
346-
"-f",
347-
os.path.join("data-manager", compose_file),
348-
"down",
349-
"--remove-orphans",
350-
],
399+
down_cmd,
351400
capture_output=False,
352401
timeout=240,
353402
check=False,

src/jote/jote.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,10 @@ def _run_a_test(
768768
job_image = f"{job_definition.image.name}:{job_definition.image.tag}"
769769
job_image_memory: str = job_definition.image["memory"]
770770
if job_image_memory is None:
771-
job_image_memory = "1Gi"
771+
job_image_memory = "1G"
772+
elif job_image_memory.lower().endswith("i"):
773+
# Strip trailing kubernetes 'i' - not liked by compose
774+
job_image_memory = job_image_memory[:-1]
772775
job_image_cores: int = job_definition.image["cores"]
773776
if job_image_cores is None:
774777
job_image_cores = 1

0 commit comments

Comments
 (0)