Skip to content

Commit b2f4e04

Browse files
authored
Merge pull request #792 from nipype/task-def-dict-util
Serializing Task classes to/from dictionaries
2 parents da37abb + a490a01 commit b2f4e04

30 files changed

+627
-197
lines changed

README.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ It is also useful to install pre-commit:
106106

107107
.. |codecov| image:: https://codecov.io/gh/nipype/pydra/branch/main/graph/badge.svg
108108
:alt: codecov
109+
:target: https://codecov.io/gh/nipype/pydra
109110

110111
.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/pydra.svg
111112
:alt: Supported Python versions

docs/source/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ workflows,
8585
* :ref:`Python-tasks`
8686
* :ref:`Shell-tasks`
8787
* :ref:`Workflows`
88-
* :ref:`Canonical task form`
88+
* :ref:`Canonical form and serialisation`
8989

9090
Examples
9191
~~~~~~~~

docs/source/tutorial/5-shell.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@
162162
"metadata": {},
163163
"outputs": [],
164164
"source": [
165-
"from pydra.utils import task_fields\n",
165+
"from pydra.utils import get_fields\n",
166166
"\n",
167167
"Cp = shell.define(\n",
168168
" \"cp <in_fs_objects:fs-object+> <out|out_dir:directory> \"\n",
@@ -172,7 +172,7 @@
172172
" \"--tuple-arg <tuple_arg:int,str=(1,'bar')> \"\n",
173173
")\n",
174174
"\n",
175-
"print(f\"'--int-arg' default: {task_fields(Cp).int_arg.default}\")"
175+
"print(f\"'--int-arg' default: {get_fields(Cp).int_arg.default}\")"
176176
]
177177
},
178178
{

docs/source/tutorial/7-canonical-form.ipynb

Lines changed: 132 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"# Canonical task form\n",
7+
"# Canonical form and serialisation\n",
8+
"\n",
9+
"## Canonical task form\n",
810
"\n",
911
"Under the hood, all Python, shell and workflow tasks generated by the\n",
1012
"`pydra.compose.*.define` decorators/functions are translated to\n",
@@ -149,33 +151,32 @@
149151
"from pathlib import Path\n",
150152
"from fileformats import generic\n",
151153
"from pydra.compose import shell\n",
152-
"from pydra.utils.typing import MultiInputObj\n",
154+
"\n",
155+
"\n",
156+
"# the \"copied\" output is magically passed to this function because the name matches\n",
157+
"def get_file_size(copied: Path) -> int:\n",
158+
" \"\"\"Calculate the file size\"\"\"\n",
159+
" result = os.stat(copied)\n",
160+
" return result.st_size\n",
153161
"\n",
154162
"\n",
155163
"@shell.define\n",
156-
"class CpWithSize(shell.Task[\"CpWithSize.Outputs\"]):\n",
164+
"class CpFileWithSize(shell.Task[\"CpFileWithSize.Outputs\"]):\n",
157165
"\n",
158166
" executable = \"cp\"\n",
159167
"\n",
160-
" in_fs_objects: MultiInputObj[generic.FsObject]\n",
161-
" recursive: bool = shell.arg(argstr=\"-R\")\n",
162-
" text_arg: str = shell.arg(argstr=\"--text-arg\")\n",
163-
" int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n",
164-
" tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n",
168+
" in_file: generic.File # = shell.arg() is assumed\n",
169+
" archive_mode: bool = shell.arg(argstr=\"-a\", default=False)\n",
165170
"\n",
166171
" class Outputs(shell.Outputs):\n",
167172
"\n",
168-
" @staticmethod\n",
169-
" def get_file_size(out_file: Path) -> int:\n",
170-
" \"\"\"Calculate the file size\"\"\"\n",
171-
" result = os.stat(out_file)\n",
172-
" return result.st_size\n",
173+
" copied: generic.File = shell.outarg(\n",
174+
" position=-1, path_template=\"{in_file}_copied\"\n",
175+
" )\n",
176+
" file_size: int = shell.out(callable=get_file_size)\n",
173177
"\n",
174-
" copied: generic.FsObject = shell.outarg(path_template=\"copied\")\n",
175-
" out_file_size: int = shell.out(callable=get_file_size)\n",
176178
"\n",
177-
"\n",
178-
"print_help(CpWithSize)"
179+
"print_help(CpFileWithSize)"
179180
]
180181
},
181182
{
@@ -197,7 +198,6 @@
197198
"import typing as ty\n",
198199
"import re\n",
199200
"from pydra.compose import python, workflow\n",
200-
"from pydra.compose.base import is_set\n",
201201
"from pydra.utils import print_help, show_workflow\n",
202202
"\n",
203203
"\n",
@@ -237,6 +237,120 @@
237237
"print_help(CanonicalWorkflowTask)\n",
238238
"show_workflow(CanonicalWorkflowTask)"
239239
]
240+
},
241+
{
242+
"cell_type": "markdown",
243+
"metadata": {},
244+
"source": [
245+
"## Serialization\n",
246+
"\n",
247+
"As well as the dataclass-like canonical form, it is also possible to represent all tasks\n",
248+
"in a nested dictionary form, which could be written to a static file (e.g. in JSON or\n",
249+
"YAML format). The dictionary form of a class can be generated by the `pydra.utils.unstructure`\n",
250+
"function. For example, the following shell command"
251+
]
252+
},
253+
{
254+
"cell_type": "code",
255+
"execution_count": null,
256+
"metadata": {},
257+
"outputs": [],
258+
"source": [
259+
"MyCmd = shell.define(\n",
260+
" \"my-cmd <in_file> <out|out_file> --an-arg <an_arg?> \"\n",
261+
" \"--a-flag<a_flag> --arg-with-default <arg_with_default:int=3>\"\n",
262+
")\n",
263+
"\n",
264+
"print_help(MyCmd)"
265+
]
266+
},
267+
{
268+
"cell_type": "markdown",
269+
"metadata": {},
270+
"source": [
271+
"Can be converted into a serialised dictionary form"
272+
]
273+
},
274+
{
275+
"cell_type": "code",
276+
"execution_count": null,
277+
"metadata": {},
278+
"outputs": [],
279+
"source": [
280+
"from pprint import pprint\n",
281+
"from pydra.utils import unstructure\n",
282+
"\n",
283+
"my_cmd_dict = unstructure(MyCmd)\n",
284+
"\n",
285+
"pprint(my_cmd_dict)"
286+
]
287+
},
288+
{
289+
"cell_type": "markdown",
290+
"metadata": {},
291+
"source": [
292+
"Noting that there is still a little more work has to be done to serialise some Python\n",
293+
"objects, e.g. classes used in field types and functions that are run in Python and\n",
294+
"construct workflows in workflow tasks, before the serialized form can be written to JSON/YAML."
295+
]
296+
},
297+
{
298+
"cell_type": "code",
299+
"execution_count": null,
300+
"metadata": {},
301+
"outputs": [],
302+
"source": [
303+
"cp_with_size_dict = unstructure(CpFileWithSize)\n",
304+
"\n",
305+
"pprint(cp_with_size_dict)"
306+
]
307+
},
308+
{
309+
"cell_type": "markdown",
310+
"metadata": {},
311+
"source": [
312+
"To unserialize the general dictionary form back into a Task class, you can use the\n",
313+
"`pydra.utils.structure` method"
314+
]
315+
},
316+
{
317+
"cell_type": "code",
318+
"execution_count": null,
319+
"metadata": {},
320+
"outputs": [],
321+
"source": [
322+
"from pydra.utils import structure\n",
323+
"\n",
324+
"ReloadedCpFileWithSize = structure(cp_with_size_dict)"
325+
]
326+
},
327+
{
328+
"cell_type": "markdown",
329+
"metadata": {},
330+
"source": [
331+
"which should run just as before"
332+
]
333+
},
334+
{
335+
"cell_type": "code",
336+
"execution_count": null,
337+
"metadata": {},
338+
"outputs": [],
339+
"source": [
340+
"from pathlib import Path\n",
341+
"import tempfile\n",
342+
"from pydra.utils import asdict\n",
343+
"\n",
344+
"tmp_dir = Path(tempfile.mkdtemp())\n",
345+
"\n",
346+
"a_file = tmp_dir / \"hello-world.txt\"\n",
347+
"a_file.write_text(\"Hello world\")\n",
348+
"\n",
349+
"cp_file_with_size = ReloadedCpFileWithSize(in_file=a_file)\n",
350+
"outputs = cp_file_with_size(cache_root=tmp_dir / \"cache\")\n",
351+
"\n",
352+
"pprint(asdict(outputs))"
353+
]
240354
}
241355
],
242356
"metadata": {

pydra/compose/base/field.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from fileformats.core import to_mime
77
from fileformats.generic import File, FileSet
88
from pydra.utils.typing import TypeParser, is_optional, is_type, is_union
9-
from pydra.utils.general import task_fields, wrap_text
9+
from pydra.utils.general import get_fields, wrap_text
1010
import attrs
1111

1212
if ty.TYPE_CHECKING:
@@ -66,7 +66,7 @@ class Requirement:
6666
def satisfied(self, inputs: "Task") -> bool:
6767
"""Check if the requirement is satisfied by the inputs"""
6868
value = getattr(inputs, self.name)
69-
field = {f.name: f for f in task_fields(inputs)}[self.name]
69+
field = {f.name: f for f in get_fields(inputs)}[self.name]
7070
if value is None or field.type is bool and value is False:
7171
return False
7272
if self.allowed_values is None:
@@ -326,7 +326,7 @@ class Arg(Field):
326326
it is False
327327
"""
328328

329-
allowed_values: frozenset = attrs.field(default=(), converter=frozenset)
329+
allowed_values: frozenset = attrs.field(factory=frozenset, converter=frozenset)
330330
copy_mode: File.CopyMode = File.CopyMode.any
331331
copy_collation: File.CopyCollation = File.CopyCollation.any
332332
copy_ext_decomp: File.ExtensionDecomposition = File.ExtensionDecomposition.single

pydra/compose/base/helpers.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import re
55
from copy import copy
66
from pydra.utils.typing import is_type, is_optional
7-
from pydra.utils.general import task_fields
7+
from pydra.utils.general import get_fields
88
from .field import Field, Arg, Out, NO_DEFAULT
99

1010

@@ -114,7 +114,13 @@ def ensure_field_objects(
114114
out_kwds = copy(out)
115115
if "help" not in out_kwds:
116116
out_kwds["help"] = output_helps.get(output_name, "")
117-
outputs[output_name] = out_type(
117+
if "path_template" in out_kwds:
118+
from pydra.compose.shell import outarg
119+
120+
out_type_ = outarg
121+
else:
122+
out_type_ = out_type
123+
outputs[output_name] = out_type_(
118124
name=output_name,
119125
**out_kwds,
120126
)
@@ -217,6 +223,8 @@ def extract_function_inputs_and_outputs(
217223
inpt.default = default
218224
elif inspect.isclass(inpt) or ty.get_origin(inpt):
219225
inputs[inpt_name] = arg_type(type=inpt, default=default)
226+
elif isinstance(inpt, dict):
227+
inputs[inpt_name] = arg_type(**inpt)
220228
else:
221229
raise ValueError(
222230
f"Unrecognised input type ({inpt}) for input {inpt_name} with default "
@@ -406,11 +414,11 @@ def extract_fields_from_class(
406414

407415
input_helps, _ = parse_doc_string(klass.__doc__)
408416

409-
def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]:
417+
def extract_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]:
410418
"""Get the fields from a class"""
411419
fields_dict = {}
412420
# Get fields defined in base classes if present
413-
for field in task_fields(klass):
421+
for field in get_fields(klass):
414422
if field.name not in skip_fields:
415423
fields_dict[field.name] = field
416424
type_hints = ty.get_type_hints(klass)
@@ -460,7 +468,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]:
460468
f"tasks, {klass} must inherit from {spec_type}"
461469
)
462470

463-
inputs = get_fields(klass, arg_type, auto_attribs, input_helps)
471+
inputs = extract_fields(klass, arg_type, auto_attribs, input_helps)
464472

465473
try:
466474
outputs_klass = klass.Outputs
@@ -475,7 +483,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]:
475483
)
476484

477485
output_helps, _ = parse_doc_string(outputs_klass.__doc__)
478-
outputs = get_fields(outputs_klass, out_type, auto_attribs, output_helps)
486+
outputs = extract_fields(outputs_klass, out_type, auto_attribs, output_helps)
479487

480488
return inputs, outputs
481489

0 commit comments

Comments
 (0)