Merge pull request #754 from nipype/enh/getitem-formatters

effigies · web-flow · commit 00b92ec32d78 · 2024-05-29T19:34:51.000-04:00
ENH: Add more complete format string implementation for argstrings
diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py
@@ -649,12 +649,9 @@ def argstr_formatting(argstr, inputs, value_updates=None):
     if value_updates:
         inputs_dict.update(value_updates)
     # getting all fields that should be formatted, i.e. {field_name}, ...
-    inp_fields = re.findall(r"{\w+}", argstr)
-    inp_fields_float = re.findall(r"{\w+:[0-9.]+f}", argstr)
-    inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_float]
+    inp_fields = parse_format_string(argstr)
     val_dict = {}
-    for fld in inp_fields:
-        fld_name = fld[1:-1]  # extracting the name form {field_name}
+    for fld_name in inp_fields:
         fld_value = inputs_dict[fld_name]
         fld_attr = getattr(attrs.fields(type(inputs)), fld_name)
         if fld_value is attr.NOTHING or (
@@ -738,3 +735,22 @@ def parse_copyfile(fld: attr.Attribute, default_collation=FileSet.CopyCollation.
             f"Unrecognised type for collation copyfile metadata of {fld}, {collation}"
         )
     return mode, collation
+
+
+def parse_format_string(fmtstr):
+    """Parse a argstr format string and return all keywords used in it."""
+    identifier = r"[a-zA-Z_]\w*"
+    attribute = rf"\.{identifier}"
+    item = r"\[\w+\]"
+    # Example: var.attr[key][0].attr2 (capture "var")
+    field_with_lookups = (
+        f"({identifier})(?:{attribute}|{item})*"  # Capture only the keyword
+    )
+    conversion = "(?:!r|!s)"
+    nobrace = "[^{}]*"
+    # Example: 0{pads[hex]}x (capture "pads")
+    fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?"  # Capture keywords in spec
+    full_field = f"{{{field_with_lookups}{conversion}?(?::{fmtspec})?}}"
+
+    all_keywords = re.findall(full_field, fmtstr)
+    return set().union(*all_keywords) - {""}
diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py
@@ -3,7 +3,9 @@
 from pathlib import Path
 import random
 import platform
+import typing as ty
 import pytest
+import attrs
 import cloudpickle as cp
 from unittest.mock import Mock
 from fileformats.generic import Directory, File
@@ -15,9 +17,10 @@
     load_and_run,
     position_sort,
     parse_copyfile,
+    argstr_formatting,
+    parse_format_string,
 )
 from ...utils.hash import hash_function
-from .. import helpers_file
 from ..core import Workflow
 
 
@@ -50,7 +53,7 @@ def test_hash_file(tmpdir):
     with open(outdir / "test.file", "w") as fp:
         fp.write("test")
     assert (
-        hash_function(File(outdir / "test.file")) == "37fcc546dce7e59585f3217bb4c30299"
+        hash_function(File(outdir / "test.file")) == "f32ab20c4a86616e32bf2504e1ac5a22"
     )
 
 
@@ -311,3 +314,51 @@ def mock_field(copyfile):
         parse_copyfile(mock_field((1, 2)))
     with pytest.raises(TypeError, match="Unrecognised type for collation copyfile"):
         parse_copyfile(mock_field((Mode.copy, 2)))
+
+
+def test_argstr_formatting():
+    @attrs.define
+    class Inputs:
+        a1_field: str
+        b2_field: float
+        c3_field: ty.Dict[str, str]
+        d4_field: ty.List[str]
+
+    inputs = Inputs("1", 2.0, {"c": "3"}, ["4"])
+    assert (
+        argstr_formatting(
+            "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}",
+            inputs,
+        )
+        == "1 2.000000 -test 3 -me 4"
+    )
+
+
+def test_parse_format_string1():
+    assert parse_format_string("{a}") == {"a"}
+
+
+def test_parse_format_string2():
+    assert parse_format_string("{abc}") == {"abc"}
+
+
+def test_parse_format_string3():
+    assert parse_format_string("{a:{b}}") == {"a", "b"}
+
+
+def test_parse_format_string4():
+    assert parse_format_string("{a:{b[2]}}") == {"a", "b"}
+
+
+def test_parse_format_string5():
+    assert parse_format_string("{a.xyz[somekey].abc:{b[a][b].d[0]}}") == {"a", "b"}
+
+
+def test_parse_format_string6():
+    assert parse_format_string("{a:05{b[a 2][b].e}}") == {"a", "b"}
+
+
+def test_parse_format_string7():
+    assert parse_format_string(
+        "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}"
+    ) == {"a1_field", "b2_field", "c3_field", "d4_field"}
diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py
@@ -140,7 +140,7 @@ def test_input_file_hash_1(tmp_path):
     fields = [("in_file", File)]
     input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,))
     inputs = make_klass(input_spec)
-    assert inputs(in_file=outfile).hash == "0e9306e5cae1de1b4dff1f27cca03bce"
+    assert inputs(in_file=outfile).hash == "02fa5f6f1bbde7f25349f54335e1adaf"
 
 
 def test_input_file_hash_2(tmp_path):
@@ -154,7 +154,7 @@ def test_input_file_hash_2(tmp_path):
 
     # checking specific hash value
     hash1 = inputs(in_file=file).hash
-    assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb"
+    assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3"
 
     # checking if different name doesn't affect the hash
     file_diffname = tmp_path / "in_file_2.txt"
@@ -185,7 +185,7 @@ def test_input_file_hash_2a(tmp_path):
 
     # checking specific hash value
     hash1 = inputs(in_file=file).hash
-    assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb"
+    assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3"
 
     # checking if different name doesn't affect the hash
     file_diffname = tmp_path / "in_file_2.txt"
@@ -204,7 +204,7 @@ def test_input_file_hash_2a(tmp_path):
 
     # checking if string is also accepted
     hash4 = inputs(in_file=str(file)).hash
-    assert hash4 == "aee7c7ae25509fb4c92a081d58d17a67"
+    assert hash4 == "800af2b5b334c9e3e5c40c0e49b7ffb5"
 
 
 def test_input_file_hash_3(tmp_path):
@@ -278,7 +278,7 @@ def test_input_file_hash_4(tmp_path):
 
     # checking specific hash value
     hash1 = inputs(in_file=[[file, 3]]).hash
-    assert hash1 == "11b7e9c90bc8d9dc5ccfc8d4526ba091"
+    assert hash1 == "0693adbfac9f675af87e900065b1de00"
 
     # the same file, but int field changes
     hash1a = inputs(in_file=[[file, 5]]).hash
@@ -315,7 +315,7 @@ def test_input_file_hash_5(tmp_path):
 
     # checking specific hash value
     hash1 = inputs(in_file=[{"file": file, "int": 3}]).hash
-    assert hash1 == "5fd53b79e55bbf62a4bb3027eb753a2c"
+    assert hash1 == "56e6e2c9f3bdf0cd5bd3060046dea480"
 
     # the same file, but int field changes
     hash1a = inputs(in_file=[{"file": file, "int": 5}]).hash