|
11 | 11 | "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", |
12 | 12 | "some of the available options for executing a task.\n", |
13 | 13 | "\n", |
14 | | - "[](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" |
| 14 | + "[](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)\n", |
| 15 | + "\n", |
| 16 | + "Remember that before attempting to run multi-process code in Jupyter notebooks, the\n", |
| 17 | + "following snippet must be called" |
15 | 18 | ] |
16 | 19 | }, |
17 | 20 | { |
|
30 | 33 | "source": [ |
31 | 34 | "## Submitter\n", |
32 | 35 | "\n", |
33 | | - "If you want to access a richer `Result` object you can use a Submitter object to execute the following task" |
34 | | - ] |
35 | | - }, |
36 | | - { |
37 | | - "cell_type": "code", |
38 | | - "execution_count": null, |
39 | | - "metadata": {}, |
40 | | - "outputs": [], |
41 | | - "source": [ |
42 | | - "from pydra.design import python\n", |
43 | | - "\n", |
44 | | - "@python.define\n", |
45 | | - "def TenToThePower(p: int) -> int:\n", |
46 | | - " return 10 ** p" |
| 36 | + "If you want to access a richer `Result` object you can use a Submitter object to initiate\n", |
| 37 | + "the task execution. For example, using the `TenToThePower` task from the testing package" |
47 | 38 | ] |
48 | 39 | }, |
49 | 40 | { |
|
53 | 44 | "outputs": [], |
54 | 45 | "source": [ |
55 | 46 | "from pydra.engine.submitter import Submitter\n", |
| 47 | + "from pydra.tasks.testing import TenToThePower\n", |
| 48 | + "\n", |
56 | 49 | "\n", |
57 | 50 | "ten_to_the_power = TenToThePower(p=3)\n", |
58 | 51 | "\n", |
|
110 | 103 | "class itself. Additional parameters can be passed to the worker initialisation as keyword\n", |
111 | 104 | "arguments to the execution call. For example, if we wanted to run five tasks using the\n", |
112 | 105 | "ConcurentFutures worker but only use three CPUs, we can pass `n_procs=3` to the execution\n", |
113 | | - "call." |
| 106 | + "call.\n", |
| 107 | + "\n", |
| 108 | + "Remember that when calling multi-process code in a top level script the call must be\n", |
| 109 | + "enclosed within a `if __name__ == \"__main__\"` block to allow the worker processes to\n", |
| 110 | + "import the module without re-executing it." |
114 | 111 | ] |
115 | 112 | }, |
116 | 113 | { |
|
119 | 116 | "metadata": {}, |
120 | 117 | "outputs": [], |
121 | 118 | "source": [ |
122 | | - "from pydra.design import python\n", |
| 119 | + "import tempfile\n", |
| 120 | + "\n", |
| 121 | + "cache_root = tempfile.mkdtemp()\n", |
123 | 122 | "\n", |
124 | 123 | "if __name__ == \"__main__\":\n", |
125 | 124 | "\n", |
126 | 125 | " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", |
127 | 126 | "\n", |
128 | 127 | " # Run the 5 tasks in parallel split across 3 processes\n", |
129 | | - " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", |
| 128 | + " outputs = ten_to_the_power(worker=\"cf\", n_procs=3, cache_dir=cache_root)\n", |
130 | 129 | "\n", |
131 | 130 | " p1, p2, p3, p4, p5 = outputs.out\n", |
132 | 131 | "\n", |
|
168 | 167 | "as long as exactly the hashes of the inputs provided to the task are the same. Here we\n", |
169 | 168 | "go through some of the practicalities of this caching and hashing (see\n", |
170 | 169 | "[Caches and hashes](../explanation/hashing-caching.html) for more details and issues\n", |
171 | | - "to consider)." |
| 170 | + "to consider).\n", |
| 171 | + "\n", |
| 172 | + "First we import the functions and classes we need andcreate some sample NIfTI files to work with" |
172 | 173 | ] |
173 | 174 | }, |
174 | 175 | { |
|
179 | 180 | "source": [ |
180 | 181 | "from pathlib import Path\n", |
181 | 182 | "import tempfile\n", |
| 183 | + "from pprint import pprint\n", |
182 | 184 | "from fileformats.medimage import Nifti1\n", |
183 | 185 | "from pydra.engine.submitter import Submitter\n", |
184 | 186 | "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", |
185 | 187 | "\n", |
186 | | - "# Make directory filled with nifti files\n", |
| 188 | + "# Make a temporary directory\n", |
187 | 189 | "test_dir = Path(tempfile.mkdtemp())\n", |
188 | 190 | "nifti_dir = test_dir / \"nifti\"\n", |
189 | 191 | "nifti_dir.mkdir()\n", |
190 | | - "for i in range(10):\n", |
191 | | - " Nifti1.sample(nifti_dir, seed=i)\n", |
192 | | - "\n", |
193 | | - "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", |
194 | | - "# by splitting the \"input\" input field over all files in the directory\n", |
195 | | - "mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5, 0.5, 0.5)).split(\n", |
196 | | - " in_file=nifti_dir.iterdir()\n", |
197 | | - ")\n", |
198 | | - "\n", |
199 | | - "# Run the task to resample all NIfTI files\n", |
200 | | - "outputs = mrgrid()\n", |
201 | | - "\n", |
202 | | - "# Create a new custom directory\n", |
203 | | - "cache_dir = test_dir / \"cache\"\n", |
204 | | - "cache_dir.mkdir()\n", |
205 | | - "\n", |
206 | | - "submitter = Submitter(cache_dir=cache_dir)\n", |
207 | | - "\n", |
208 | | - "# Run the task to resample all NIfTI files with different voxel sizes\n", |
209 | | - "with submitter:\n", |
210 | | - " result1 = submitter(mrgrid)\n", |
211 | 192 | "\n", |
212 | | - "print(result1)" |
| 193 | + "# Generate some random NIfTI files to work with\n", |
| 194 | + "nifti_files = [Nifti1.sample(nifti_dir, seed=i) for i in range(10)]" |
213 | 195 | ] |
214 | 196 | }, |
215 | 197 | { |
|
243 | 225 | "\n", |
244 | 226 | "mrgrid_varying_vox = MrGrid(operation=\"regrid\").split(\n", |
245 | 227 | " (\"in_file\", \"voxel\"),\n", |
246 | | - " in_file=nifti_dir.iterdir(),\n", |
| 228 | + " in_file=nifti_files,\n", |
247 | 229 | " voxel=VOX_SIZES,\n", |
248 | 230 | ")\n", |
249 | 231 | "\n", |
250 | 232 | "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", |
251 | 233 | "\n", |
252 | 234 | "\n", |
253 | | - "# Result from previous run is reused as the task and inputs are identical\n", |
254 | 235 | "with submitter:\n", |
255 | 236 | " result1 = submitter(mrgrid_varying_vox)\n", |
256 | 237 | "\n", |
257 | 238 | "\n", |
258 | 239 | "mrgrid_varying_vox2 = MrGrid(operation=\"regrid\").split(\n", |
259 | 240 | " (\"in_file\", \"voxel\"),\n", |
260 | | - " in_file=nifti_dir.iterdir(),\n", |
| 241 | + " in_file=nifti_files,\n", |
261 | 242 | " voxel=copy(VOX_SIZES),\n", |
262 | 243 | ")\n", |
263 | 244 | "\n", |
|
298 | 279 | "outputs": [], |
299 | 280 | "source": [ |
300 | 281 | "# Rename a NIfTI file within the test directory\n", |
301 | | - "first_file = next(nifti_dir.iterdir())\n", |
302 | | - "new_name = first_file.with_name(\"first.nii\")\n", |
303 | | - "first_file.rename(new_name)\n", |
| 282 | + "nifti_files[0] = Nifti1(\n", |
| 283 | + " nifti_files[0].fspath.rename(nifti_files[0].fspath.with_name(\"first.nii\"))\n", |
| 284 | + ")\n", |
304 | 285 | "\n", |
305 | 286 | "mrgrid_varying_vox3 = MrGrid(operation=\"regrid\").split(\n", |
306 | 287 | " (\"in_file\", \"voxel\"),\n", |
307 | | - " in_file=nifti_dir.iterdir(),\n", |
| 288 | + " in_file=nifti_files,\n", |
308 | 289 | " voxel=VOX_SIZES,\n", |
309 | 290 | ")\n", |
310 | 291 | "\n", |
311 | | - "# Result from previous run is reused as the task and inputs are identical\n", |
| 292 | + "# Result from previous run is reused as contents of the files have not changed, despite\n", |
| 293 | + "# the file names changing\n", |
312 | 294 | "with submitter:\n", |
313 | | - " result3 = submitter(mrgrid_varying_vox3)\n", |
| 295 | + " result4 = submitter(mrgrid_varying_vox3)\n", |
314 | 296 | "\n", |
315 | | - "assert result3.output_dir == result1.output_dir\n", |
| 297 | + "assert result4.output_dir == result1.output_dir\n", |
316 | 298 | "\n", |
317 | 299 | "# Replace the first NIfTI file with a new file\n", |
318 | | - "new_name.unlink()\n", |
319 | | - "Nifti1.sample(nifti_dir, seed=100)\n", |
| 300 | + "nifti_files[0] = Nifti1.sample(nifti_dir, seed=100)\n", |
320 | 301 | "\n", |
321 | 302 | "# Update the in_file input field to include the new file\n", |
322 | 303 | "mrgrid_varying_vox4 = MrGrid(operation=\"regrid\").split(\n", |
323 | 304 | " (\"in_file\", \"voxel\"),\n", |
324 | | - " in_file=nifti_dir.iterdir(),\n", |
| 305 | + " in_file=nifti_files,\n", |
325 | 306 | " voxel=VOX_SIZES,\n", |
326 | 307 | ")\n", |
327 | 308 | "\n", |
|
333 | 314 | "assert result4.output_dir != result1.output_dir" |
334 | 315 | ] |
335 | 316 | }, |
336 | | - { |
337 | | - "cell_type": "markdown", |
338 | | - "metadata": {}, |
339 | | - "source": [] |
340 | | - }, |
341 | 317 | { |
342 | 318 | "cell_type": "markdown", |
343 | 319 | "metadata": {}, |
344 | 320 | "source": [ |
345 | | - "## Environments\n", |
| 321 | + "## Environments and hooks\n", |
346 | 322 | "\n", |
347 | 323 | "For shell tasks, it is possible to specify that the command runs within a specific\n", |
348 | | - "software environment, such as those provided by software containers (e.g. Docker or Apptainer).\n", |
| 324 | + "software environment, such as those provided by software containers (e.g. Docker or Singularity/Apptainer).\n", |
349 | 325 | "This is down by providing the environment to the submitter/execution call," |
350 | 326 | ] |
351 | 327 | }, |
|
371 | 347 | "outputs = mrgrid(environment=Docker(image=\"mrtrix3/mrtrix3\", tag=\"latest\"))\n", |
372 | 348 | "\n", |
373 | 349 | "# Print the locations of the output files\n", |
374 | | - "print(\"\\n\".join(str(p) for p in outputs.out_file))" |
| 350 | + "pprint(outputs.out_file)" |
375 | 351 | ] |
376 | 352 | }, |
377 | 353 | { |
|
381 | 357 | "Of course for this to work Docker needs to work and be configured for\n", |
382 | 358 | "[sudo-less execution](https://docs.docker.com/engine/install/linux-postinstall/).\n", |
383 | 359 | "See [Containers and Environments](../explanation/environments.rst) for more details on\n", |
384 | | - "how to utilise containers and add support for other software environments." |
| 360 | + "how to utilise containers and add support for other software environments.\n", |
| 361 | + "\n", |
| 362 | + "It is also possible to specify functions to run at hooks that are immediately before and after\n", |
| 363 | + "the task is executed by passing a `pydra.engine.spec.TaskHooks` object to the `hooks`\n", |
| 364 | + "keyword arg. The callable should take the `pydra.engine.core.Task` object as its only\n", |
| 365 | + "argument and return None. The available hooks to attach functions are:\n", |
| 366 | + "\n", |
| 367 | + "* pre_run: before the task cache directory is created\n", |
| 368 | + "* pre_run_task: after the cache directory has been created and the inputs resolved but before the task is executed\n", |
| 369 | + "* post_run_task: after the task has been run and the outputs collected\n", |
| 370 | + "* post_run: after the cache directory has been finalised\n", |
| 371 | + "\n", |
| 372 | + "\n", |
| 373 | + "QUESTION: What are these hooks intended for? Should the post_run_task hook be run before the outputs have been\n", |
| 374 | + "collected?" |
385 | 375 | ] |
386 | 376 | }, |
387 | 377 | { |
388 | | - "cell_type": "markdown", |
| 378 | + "cell_type": "code", |
| 379 | + "execution_count": null, |
389 | 380 | "metadata": {}, |
| 381 | + "outputs": [], |
390 | 382 | "source": [ |
391 | | - "## Provenance and auditing\n", |
| 383 | + "from pydra.engine.core import Task\n", |
| 384 | + "from pydra.engine.specs import TaskHooks, Result\n", |
| 385 | + "import os\n", |
| 386 | + "import platform\n", |
| 387 | + "\n", |
| 388 | + "def notify_task_completion(task: Task, result: Result):\n", |
| 389 | + " # Print a message to the terminal\n", |
| 390 | + " print(f\"Task completed! Results are stored in {str(task.output_dir)!r}\")\n", |
| 391 | + "\n", |
| 392 | + " # Platform-specific notifications\n", |
| 393 | + " if platform.system() == \"Darwin\": # macOS\n", |
| 394 | + " os.system('osascript -e \\'display notification \"Task has completed successfully!\" with title \"Task Notification\"\\'')\n", |
| 395 | + " elif platform.system() == \"Linux\": # Linux\n", |
| 396 | + " os.system('notify-send \"Task Notification\" \"Task has completed successfully!\"')\n", |
| 397 | + " elif platform.system() == \"Windows\": # Windows\n", |
| 398 | + " os.system('msg * \"Task has completed successfully!\"')\n", |
392 | 399 | "\n", |
393 | | - "Work in progress..." |
| 400 | + "# Run the task to resample all NIfTI files\n", |
| 401 | + "outputs = mrgrid(hooks=TaskHooks(post_run=notify_task_completion), cache_dir=tempfile.mkdtemp())\n", |
| 402 | + "\n", |
| 403 | + "# Print the locations of the output files\n", |
| 404 | + "pprint(outputs.out_file)" |
394 | 405 | ] |
395 | 406 | }, |
396 | 407 | { |
397 | 408 | "cell_type": "markdown", |
398 | 409 | "metadata": {}, |
399 | 410 | "source": [ |
400 | | - "## Hooks\n", |
| 411 | + "## Provenance and auditing\n", |
401 | 412 | "\n", |
402 | 413 | "Work in progress..." |
403 | 414 | ] |
404 | | - }, |
405 | | - { |
406 | | - "cell_type": "markdown", |
407 | | - "metadata": {}, |
408 | | - "source": [] |
409 | 415 | } |
410 | 416 | ], |
411 | 417 | "metadata": { |
|
0 commit comments