Skip to content

Commit 70eb049

Browse files
mb1069mr-c
authored andcommitted
Added memory monitoring for non-dockerised tools (#1006)
1 parent 31aa094 commit 70eb049

File tree

2 files changed

+95
-33
lines changed

2 files changed

+95
-33
lines changed

cwltool/job.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,29 @@
22

33
import datetime
44
import functools
5+
import itertools
56
import logging
67
import os
78
import re
89
import shutil
910
import stat
1011
import sys
11-
import time
1212
import tempfile
13+
import time
1314
import uuid
1415
from abc import ABCMeta, abstractmethod
1516
from io import IOBase, open # pylint: disable=redefined-builtin
1617
from threading import Timer
1718
from typing import (IO, Any, AnyStr, Callable, Dict, Iterable, List, Tuple,
1819
MutableMapping, MutableSequence, Optional, Union, cast)
1920

20-
import shellescape
2121
import psutil
22+
import shellescape
2223
from prov.model import PROV
24+
from schema_salad.sourceline import SourceLine
2325
from six import PY2, with_metaclass
2426
from typing_extensions import (TYPE_CHECKING, # pylint: disable=unused-import
2527
Text)
26-
from schema_salad.sourceline import SourceLine
2728

2829
from .builder import Builder, HasReqsHints # pylint: disable=unused-import
2930
from .context import RuntimeContext # pylint: disable=unused-import
@@ -231,11 +232,11 @@ def _setup(self, runtimeContext): # type: (RuntimeContext) -> None
231232
for p in self.generatemapper.files()}, indent=4))
232233

233234
def _execute(self,
234-
runtime, # type: List[Text]
235-
env, # type: MutableMapping[Text, Text]
236-
runtimeContext, # type: RuntimeContext
237-
monitor_function=None # type: Optional[Callable]
238-
): # type: (...) -> None
235+
runtime, # type: List[Text]
236+
env, # type: MutableMapping[Text, Text]
237+
runtimeContext, # type: RuntimeContext
238+
monitor_function=None, # type: Optional[Callable]
239+
): # type: (...) -> None
239240

240241
scr, _ = self.get_requirement("ShellCommandRequirement")
241242

@@ -383,6 +384,30 @@ def _execute(self,
383384
_logger.debug(u"[job %s] Removing temporary directory %s", self.name, self.tmpdir)
384385
shutil.rmtree(self.tmpdir, True)
385386

387+
def process_monitor(self, sproc):
388+
monitor = psutil.Process(sproc.pid)
389+
memory_usage = [None] # Value must be list rather than integer to utilise pass-by-reference in python
390+
391+
def get_tree_mem_usage(memory_usage):
392+
children = monitor.children()
393+
rss = monitor.memory_info().rss
394+
while len(children):
395+
rss += sum([process.memory_info().rss for process in children])
396+
children = list(itertools.chain(*[process.children() for process in children]))
397+
if memory_usage[0] is None or rss > memory_usage[0]:
398+
memory_usage[0] = rss
399+
400+
mem_tm = Timer(interval=1, function=get_tree_mem_usage, args=(memory_usage,))
401+
mem_tm.daemon = True
402+
mem_tm.start()
403+
sproc.wait()
404+
mem_tm.cancel()
405+
if memory_usage[0] is not None:
406+
_logger.info(u"[job %s] Max memory used: %iMiB", self.name,
407+
round(memory_usage[0] / (2 ** 20)))
408+
else:
409+
_logger.info(u"Could not collect memory usage, job ended before monitoring began.")
410+
386411

387412
class CommandLineJob(JobBase):
388413
def run(self,
@@ -419,7 +444,9 @@ def run(self,
419444
self.generatemapper, self.outdir, self.builder.outdir,
420445
inplace_update=self.inplace_update)
421446

422-
self._execute([], env, runtimeContext)
447+
monitor_function = functools.partial(self.process_monitor)
448+
449+
self._execute([], env, runtimeContext, monitor_function)
423450

424451

425452
CONTROL_CODE_RE = r'\x1b\[[0-9;]*[a-zA-Z]'
@@ -621,6 +648,8 @@ def run(self, runtimeContext): # type: (RuntimeContext) -> None
621648
monitor_function = functools.partial(
622649
self.docker_monitor, cidfile, runtimeContext.tmpdir_prefix,
623650
not bool(runtimeContext.cidfile_dir))
651+
elif runtimeContext.user_space_docker_cmd:
652+
monitor_function = functools.partial(self.process_monitor)
624653
self._execute(runtime, env, runtimeContext, monitor_function)
625654

626655
@staticmethod

tests/test_udocker.py

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,65 @@
33
import os
44
import subprocess
55
from .util import get_data, get_main_output
6+
import tempfile
7+
import shutil
8+
from psutil.tests import TRAVIS
69

710
LINUX = sys.platform in ('linux', 'linux2')
811

912

1013
@pytest.mark.skipif(not LINUX, reason="LINUX only")
11-
def test_udocker_usage_should_not_write_cid_file(tmpdir):
12-
cwd = tmpdir.chdir()
13-
install_cmds = [
14-
"curl https://raw.githubusercontent.com/indigo-dc/udocker/master/udocker.py -o ./udocker",
15-
"chmod u+rx ./udocker",
16-
"./udocker install"]
17-
os.environ['UDOCKER_DIR'] = os.path.join(str(tmpdir), ".udocker")
18-
19-
assert sum([subprocess.call(cmd.split()) for cmd in install_cmds]) == 0
20-
21-
docker_path = os.path.join(str(tmpdir), 'udocker')
22-
23-
test_file = "tests/wf/wc-tool.cwl"
24-
job_file = "tests/wf/wc-job.json"
25-
error_code, stdout, stderr = get_main_output(
26-
["--debug", "--default-container", "debian", "--user-space-docker-cmd=" + docker_path,
27-
get_data(test_file), get_data(job_file)])
28-
cwd.chdir()
29-
cidfiles_count = sum(1 for _ in tmpdir.visit(fil="*.cid"))
30-
31-
tmpdir.remove(ignore_errors=True)
32-
33-
assert "completed success" in stderr
34-
assert cidfiles_count == 0
14+
class TestUdocker:
15+
udocker_path = None
16+
17+
@classmethod
18+
def setup_class(cls):
19+
install_cmds = [
20+
"curl https://raw.githubusercontent.com/indigo-dc/udocker/master/udocker.py -o ./udocker",
21+
"chmod u+rx ./udocker",
22+
"./udocker install"]
23+
24+
test_cwd = os.getcwd()
25+
26+
cls.docker_install_dir = tempfile.mkdtemp()
27+
os.chdir(cls.docker_install_dir)
28+
29+
os.environ['UDOCKER_DIR'] = os.path.join(cls.docker_install_dir, ".udocker")
30+
31+
assert sum([subprocess.call(cmd.split()) for cmd in install_cmds]) == 0
32+
33+
cls.udocker_path = os.path.join(cls.docker_install_dir, 'udocker')
34+
os.chdir(test_cwd)
35+
print('Udocker install dir: ' + cls.docker_install_dir)
36+
37+
@classmethod
38+
def teardown_class(cls):
39+
shutil.rmtree(cls.docker_install_dir)
40+
41+
def test_udocker_usage_should_not_write_cid_file(self, tmpdir):
42+
cwd = tmpdir.chdir()
43+
44+
test_file = "tests/wf/wc-tool.cwl"
45+
job_file = "tests/wf/wc-job.json"
46+
error_code, stdout, stderr = get_main_output(
47+
["--debug", "--default-container", "debian", "--user-space-docker-cmd=" + self.udocker_path,
48+
get_data(test_file), get_data(job_file)])
49+
cwd.chdir()
50+
cidfiles_count = sum(1 for _ in tmpdir.visit(fil="*.cid"))
51+
52+
tmpdir.remove(ignore_errors=True)
53+
54+
assert "completed success" in stderr
55+
assert cidfiles_count == 0
56+
57+
@pytest.mark.skipif(TRAVIS, reason='Not reliable on single threaded test on travis.')
58+
def test_udocker_should_display_memory_usage(self, tmpdir):
59+
cwd = tmpdir.chdir()
60+
error_code, stdout, stderr = get_main_output(
61+
["--default-container=debian", "--user-space-docker-cmd=" + self.udocker_path,
62+
get_data("tests/wf/timelimit.cwl"), "--sleep_time", "10"])
63+
cwd.chdir()
64+
tmpdir.remove(ignore_errors=True)
65+
66+
assert "completed success" in stderr
67+
assert "Max memory" in stderr

0 commit comments

Comments
 (0)