Skip to content

Commit 96d69de

Browse files
committed
fix: minimize processing before batch submission
1 parent 9255e2b commit 96d69de

File tree

3 files changed

+132
-66
lines changed

3 files changed

+132
-66
lines changed

heudiconv/cli/run.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,13 @@ def process_args(args):
246246
if not args.heuristic:
247247
raise RuntimeError("No heuristic specified - add to arguments and rerun")
248248

249+
if args.queue:
250+
lgr.info("Queuing %s conversion", args.queue)
251+
iterarg, iterables = ("files", len(args.files)) if args.files else \
252+
("subjects", len(args.subjs))
253+
queue_conversion(args.queue, iterarg, iterables, args.queue_args)
254+
sys.exit(0)
255+
249256
heuristic = load_heuristic(args.heuristic)
250257

251258
study_sessions = get_study_sessions(args.dicom_dir_template, args.files,
@@ -281,21 +288,6 @@ def process_args(args):
281288
lgr.warning("Skipping unknown locator dataset")
282289
continue
283290

284-
if args.queue:
285-
286-
studyid = sid
287-
if session:
288-
studyid += "-%s" % session
289-
if locator:
290-
studyid += "-%s" % locator
291-
# remove any separators
292-
studyid = studyid.replace(op.sep, '_')
293-
294-
queue_conversion(args.queue,
295-
studyid,
296-
args.queue_args)
297-
continue
298-
299291
anon_sid = anonymize_sid(sid, args.anon_cmd) if args.anon_cmd else None
300292
if args.anon_cmd:
301293
lgr.info('Anonymized {} to {}'.format(sid, anon_sid))

heudiconv/queue.py

Lines changed: 82 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -7,64 +7,107 @@
77

88
lgr = logging.getLogger(__name__)
99

10-
def queue_conversion(queue, studyid, queue_args=None):
10+
def queue_conversion(queue, iterarg, iterables, queue_args=None):
1111
"""
1212
Write out conversion arguments to file and submit to a job scheduler.
1313
Parses `sys.argv` for heudiconv arguments.
1414
1515
Parameters
1616
----------
1717
queue: string
18-
batch scheduler to use
19-
studyid: string
20-
identifier for conversion
18+
Batch scheduler to use
19+
iterarg: str
20+
Multi-argument to index (`subjects` OR `files`)
21+
iterables: int
22+
Number of `iterarg` arguments
2123
queue_args: string (optional)
22-
additional queue arguments for job submission
24+
Additional queue arguments for job submission
2325
24-
Returns
25-
-------
26-
proc: int
27-
Queue submission exit code
2826
"""
2927

3028
SUPPORTED_QUEUES = {'SLURM': 'sbatch'}
3129
if queue not in SUPPORTED_QUEUES:
3230
raise NotImplementedError("Queuing with %s is not supported", queue)
3331

34-
args = clean_args(sys.argv[1:])
35-
# make arguments executable
36-
heudiconv_exec = which("heudiconv") or "heudiconv"
37-
args.insert(0, heudiconv_exec)
38-
convertcmd = " ".join(args)
39-
40-
# will overwrite across subjects
41-
queue_file = os.path.abspath('heudiconv-%s.sh' % queue)
42-
with open(queue_file, 'wt') as fp:
43-
fp.write("#!/bin/bash\n")
44-
if queue_args:
45-
for qarg in queue_args.split():
46-
fp.write("#SBATCH %s\n" % qarg)
47-
fp.write(convertcmd + "\n")
48-
49-
cmd = [SUPPORTED_QUEUES[queue], queue_file]
50-
proc = subprocess.call(cmd)
51-
return proc
52-
53-
def clean_args(hargs, keys=['-q', '--queue', '--queue-args']):
32+
for i in range(iterables):
33+
args = clean_args(sys.argv[1:], iterarg, i)
34+
# make arguments executable
35+
heudiconv_exec = which("heudiconv") or "heudiconv"
36+
args.insert(0, heudiconv_exec)
37+
convertcmd = " ".join(args)
38+
39+
print(convertcmd)
40+
# will overwrite across subjects
41+
queue_file = os.path.abspath('heudiconv-%s.sh' % queue)
42+
with open(queue_file, 'wt') as fp:
43+
fp.write("#!/bin/bash\n")
44+
if queue_args:
45+
for qarg in queue_args.split():
46+
fp.write("#SBATCH %s\n" % qarg)
47+
fp.write(convertcmd + "\n")
48+
49+
cmd = [SUPPORTED_QUEUES[queue], queue_file]
50+
proc = subprocess.call(cmd)
51+
lgr.info("Submitted %d jobs", iterables)
52+
53+
def clean_args(hargs, iterarg, iteridx):
5454
"""
55-
Filters out unwanted arguments
55+
Filters arguments for batch submission.
56+
57+
Parameters
58+
----------
59+
hargs: list
60+
Command-line arguments
61+
iterarg: str
62+
Multi-argument to index (`subjects` OR `files`)
63+
iteridx: int
64+
`iterarg` index to submit
65+
66+
Returns
67+
-------
68+
cmdargs : list
69+
Filtered arguments for batch submission
5670
57-
:param hargs: Arguments passed
58-
:type hargs: Iterable
59-
:param keys: Unwanted arguments
60-
:type keys: Iterable
61-
:return: Filtered arguments
71+
Example
72+
--------
73+
>>> from heudiconv.queue import clean_args
74+
>>> cmd = ['heudiconv', '-d', '/some/{subject}/path',
75+
... '-q', 'SLURM',
76+
... '-s', 'sub-1', 'sub-2', 'sub-3', 'sub-4']
77+
>>> clean_args(cmd, 'subjects', 0)
78+
['heudiconv', '-d', '/some/{subject}/path', '-s', 'sub-1']
6279
"""
80+
81+
if iterarg == "subjects":
82+
iterarg = ['-s', '--subjects']
83+
elif iterarg == "files":
84+
iterarg = ['--files']
85+
else:
86+
raise ValueError("Cannot index %s" % iterarg)
87+
88+
# remove these or cause an infinite loop
89+
queue_args = ['-q', '--queue', '--queue-args']
90+
91+
# control variables for multi-argument parsing
92+
is_iterarg = False
93+
itercount = 0
94+
6395
indicies = []
96+
cmdargs = hargs[:]
97+
6498
for i, arg in enumerate(hargs):
65-
if arg in keys:
99+
if arg.startswith('-') and is_iterarg:
100+
# moving on to another argument
101+
is_iterarg = False
102+
if is_iterarg:
103+
if iteridx != itercount:
104+
indicies.append(i)
105+
itercount += 1
106+
if arg in iterarg:
107+
is_iterarg = True
108+
if arg in queue_args:
66109
indicies.extend([i, i+1])
67-
for j in sorted(indicies, reverse=True):
68-
del hargs[j]
69-
return hargs
70110

111+
for j in sorted(indicies, reverse=True):
112+
del cmdargs[j]
113+
return cmdargs

heudiconv/tests/test_queue.py

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def test_queue_no_slurm(tmpdir, invocation):
2323
sys.argv = ['heudiconv'] + hargs
2424

2525
try:
26-
with pytest.raises(OSError):
26+
with pytest.raises(OSError): # SLURM should not be installed
2727
runner(hargs)
2828
# should have generated a slurm submission script
2929
slurm_cmd_file = (tmpdir / 'heudiconv-SLURM.sh').strpath
@@ -46,17 +46,48 @@ def test_queue_no_slurm(tmpdir, invocation):
4646
sys.argv = _sys_args
4747

4848
def test_argument_filtering(tmpdir):
49-
cmdargs = [
50-
'heudiconv',
51-
'--files',
52-
'/fake/path/to/files',
53-
'-f',
54-
'convertall',
55-
'-q',
56-
'SLURM',
57-
'--queue-args',
49+
cmd_files = [
50+
'heudiconv',
51+
'--files',
52+
'/fake/path/to/files',
53+
'/another/fake/path',
54+
'-f',
55+
'convertall',
56+
'-q',
57+
'SLURM',
58+
'--queue-args',
5859
'--cpus-per-task=4 --contiguous --time=10'
5960
]
60-
filtered = cmdargs[:-4]
61+
filtered = [
62+
'heudiconv',
63+
'--files',
64+
'/another/fake/path',
65+
'-f',
66+
'convertall',
67+
]
68+
assert clean_args(cmd_files, 'files', 1) == filtered
6169

62-
assert(clean_args(cmdargs) == filtered)
70+
cmd_subjects = [
71+
'heudiconv',
72+
'-d',
73+
'/some/{subject}/path',
74+
'--queue',
75+
'SLURM',
76+
'--subjects',
77+
'sub1',
78+
'sub2',
79+
'sub3',
80+
'sub4',
81+
'-f',
82+
'convertall'
83+
]
84+
filtered = [
85+
'heudiconv',
86+
'-d',
87+
'/some/{subject}/path',
88+
'--subjects',
89+
'sub3',
90+
'-f',
91+
'convertall'
92+
]
93+
assert clean_args(cmd_subjects, 'subjects', 2) == filtered

0 commit comments

Comments
 (0)