Skip to content

Commit 00b92ec

Browse files
authored
Merge pull request #754 from nipype/enh/getitem-formatters
ENH: Add more complete format string implementation for argstrings
2 parents b6db19b + fc3b31f commit 00b92ec

File tree

3 files changed

+80
-13
lines changed

3 files changed

+80
-13
lines changed

pydra/engine/helpers.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -649,12 +649,9 @@ def argstr_formatting(argstr, inputs, value_updates=None):
649649
if value_updates:
650650
inputs_dict.update(value_updates)
651651
# getting all fields that should be formatted, i.e. {field_name}, ...
652-
inp_fields = re.findall(r"{\w+}", argstr)
653-
inp_fields_float = re.findall(r"{\w+:[0-9.]+f}", argstr)
654-
inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_float]
652+
inp_fields = parse_format_string(argstr)
655653
val_dict = {}
656-
for fld in inp_fields:
657-
fld_name = fld[1:-1] # extracting the name form {field_name}
654+
for fld_name in inp_fields:
658655
fld_value = inputs_dict[fld_name]
659656
fld_attr = getattr(attrs.fields(type(inputs)), fld_name)
660657
if fld_value is attr.NOTHING or (
@@ -738,3 +735,22 @@ def parse_copyfile(fld: attr.Attribute, default_collation=FileSet.CopyCollation.
738735
f"Unrecognised type for collation copyfile metadata of {fld}, {collation}"
739736
)
740737
return mode, collation
738+
739+
740+
def parse_format_string(fmtstr):
741+
"""Parse a argstr format string and return all keywords used in it."""
742+
identifier = r"[a-zA-Z_]\w*"
743+
attribute = rf"\.{identifier}"
744+
item = r"\[\w+\]"
745+
# Example: var.attr[key][0].attr2 (capture "var")
746+
field_with_lookups = (
747+
f"({identifier})(?:{attribute}|{item})*" # Capture only the keyword
748+
)
749+
conversion = "(?:!r|!s)"
750+
nobrace = "[^{}]*"
751+
# Example: 0{pads[hex]}x (capture "pads")
752+
fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?" # Capture keywords in spec
753+
full_field = f"{{{field_with_lookups}{conversion}?(?::{fmtspec})?}}"
754+
755+
all_keywords = re.findall(full_field, fmtstr)
756+
return set().union(*all_keywords) - {""}

pydra/engine/tests/test_helpers.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
from pathlib import Path
44
import random
55
import platform
6+
import typing as ty
67
import pytest
8+
import attrs
79
import cloudpickle as cp
810
from unittest.mock import Mock
911
from fileformats.generic import Directory, File
@@ -15,9 +17,10 @@
1517
load_and_run,
1618
position_sort,
1719
parse_copyfile,
20+
argstr_formatting,
21+
parse_format_string,
1822
)
1923
from ...utils.hash import hash_function
20-
from .. import helpers_file
2124
from ..core import Workflow
2225

2326

@@ -50,7 +53,7 @@ def test_hash_file(tmpdir):
5053
with open(outdir / "test.file", "w") as fp:
5154
fp.write("test")
5255
assert (
53-
hash_function(File(outdir / "test.file")) == "37fcc546dce7e59585f3217bb4c30299"
56+
hash_function(File(outdir / "test.file")) == "f32ab20c4a86616e32bf2504e1ac5a22"
5457
)
5558

5659

@@ -311,3 +314,51 @@ def mock_field(copyfile):
311314
parse_copyfile(mock_field((1, 2)))
312315
with pytest.raises(TypeError, match="Unrecognised type for collation copyfile"):
313316
parse_copyfile(mock_field((Mode.copy, 2)))
317+
318+
319+
def test_argstr_formatting():
320+
@attrs.define
321+
class Inputs:
322+
a1_field: str
323+
b2_field: float
324+
c3_field: ty.Dict[str, str]
325+
d4_field: ty.List[str]
326+
327+
inputs = Inputs("1", 2.0, {"c": "3"}, ["4"])
328+
assert (
329+
argstr_formatting(
330+
"{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}",
331+
inputs,
332+
)
333+
== "1 2.000000 -test 3 -me 4"
334+
)
335+
336+
337+
def test_parse_format_string1():
338+
assert parse_format_string("{a}") == {"a"}
339+
340+
341+
def test_parse_format_string2():
342+
assert parse_format_string("{abc}") == {"abc"}
343+
344+
345+
def test_parse_format_string3():
346+
assert parse_format_string("{a:{b}}") == {"a", "b"}
347+
348+
349+
def test_parse_format_string4():
350+
assert parse_format_string("{a:{b[2]}}") == {"a", "b"}
351+
352+
353+
def test_parse_format_string5():
354+
assert parse_format_string("{a.xyz[somekey].abc:{b[a][b].d[0]}}") == {"a", "b"}
355+
356+
357+
def test_parse_format_string6():
358+
assert parse_format_string("{a:05{b[a 2][b].e}}") == {"a", "b"}
359+
360+
361+
def test_parse_format_string7():
362+
assert parse_format_string(
363+
"{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}"
364+
) == {"a1_field", "b2_field", "c3_field", "d4_field"}

pydra/engine/tests/test_specs.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def test_input_file_hash_1(tmp_path):
140140
fields = [("in_file", File)]
141141
input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,))
142142
inputs = make_klass(input_spec)
143-
assert inputs(in_file=outfile).hash == "0e9306e5cae1de1b4dff1f27cca03bce"
143+
assert inputs(in_file=outfile).hash == "02fa5f6f1bbde7f25349f54335e1adaf"
144144

145145

146146
def test_input_file_hash_2(tmp_path):
@@ -154,7 +154,7 @@ def test_input_file_hash_2(tmp_path):
154154

155155
# checking specific hash value
156156
hash1 = inputs(in_file=file).hash
157-
assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb"
157+
assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3"
158158

159159
# checking if different name doesn't affect the hash
160160
file_diffname = tmp_path / "in_file_2.txt"
@@ -185,7 +185,7 @@ def test_input_file_hash_2a(tmp_path):
185185

186186
# checking specific hash value
187187
hash1 = inputs(in_file=file).hash
188-
assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb"
188+
assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3"
189189

190190
# checking if different name doesn't affect the hash
191191
file_diffname = tmp_path / "in_file_2.txt"
@@ -204,7 +204,7 @@ def test_input_file_hash_2a(tmp_path):
204204

205205
# checking if string is also accepted
206206
hash4 = inputs(in_file=str(file)).hash
207-
assert hash4 == "aee7c7ae25509fb4c92a081d58d17a67"
207+
assert hash4 == "800af2b5b334c9e3e5c40c0e49b7ffb5"
208208

209209

210210
def test_input_file_hash_3(tmp_path):
@@ -278,7 +278,7 @@ def test_input_file_hash_4(tmp_path):
278278

279279
# checking specific hash value
280280
hash1 = inputs(in_file=[[file, 3]]).hash
281-
assert hash1 == "11b7e9c90bc8d9dc5ccfc8d4526ba091"
281+
assert hash1 == "0693adbfac9f675af87e900065b1de00"
282282

283283
# the same file, but int field changes
284284
hash1a = inputs(in_file=[[file, 5]]).hash
@@ -315,7 +315,7 @@ def test_input_file_hash_5(tmp_path):
315315

316316
# checking specific hash value
317317
hash1 = inputs(in_file=[{"file": file, "int": 3}]).hash
318-
assert hash1 == "5fd53b79e55bbf62a4bb3027eb753a2c"
318+
assert hash1 == "56e6e2c9f3bdf0cd5bd3060046dea480"
319319

320320
# the same file, but int field changes
321321
hash1a = inputs(in_file=[{"file": file, "int": 5}]).hash

0 commit comments

Comments
 (0)