|
27 | 27 | "source": [ |
28 | 28 | "## Caching results\n", |
29 | 29 | "\n", |
30 | | - "See [Caching and hashing](../explanation/hashing-caching.html) for more details." |
| 30 | + "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", |
| 31 | + "task and the operation to be performed. This hash is used to name the output directory for\n", |
| 32 | + "the task within the specified cache directory. Therefore, if you use the same cache\n", |
| 33 | + "directory between runs and in a subsequent run the same task is executed with the same\n", |
| 34 | + "inputs then the location of its output directory will also be the same, and the outputs\n", |
| 35 | + "generated by the previous run are reused.\n", |
| 36 | + "\n", |
| 37 | + "For example, using the MrGrid example from the [Getting Started Tutorial](./getting-started.html)\n" |
| 38 | + ] |
| 39 | + }, |
| 40 | + { |
| 41 | + "cell_type": "code", |
| 42 | + "execution_count": 2, |
| 43 | + "metadata": {}, |
| 44 | + "outputs": [ |
| 45 | + { |
| 46 | + "ename": "ImportError", |
| 47 | + "evalue": "cannot import name 'MrGrid' from 'pydra.tasks.mrtrix3' (/Users/tclose/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/__init__.py)", |
| 48 | + "output_type": "error", |
| 49 | + "traceback": [ |
| 50 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 51 | + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", |
| 52 | + "Cell \u001b[0;32mIn[2], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Nifti\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Submitter\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Make directory filled with nifti files\u001b[39;00m\n\u001b[1;32m 8\u001b[0m test_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n", |
| 53 | + "\u001b[0;31mImportError\u001b[0m: cannot import name 'MrGrid' from 'pydra.tasks.mrtrix3' (/Users/tclose/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/__init__.py)" |
| 54 | + ] |
| 55 | + } |
| 56 | + ], |
| 57 | + "source": [ |
| 58 | + "from pathlib import Path\n", |
| 59 | + "import tempfile\n", |
| 60 | + "from fileformats.medimage import Nifti\n", |
| 61 | + "from pydra.engine.submitter import Submitter\n", |
| 62 | + "from pydra.tasks.mrtrix3 import MrGrid\n", |
| 63 | + "\n", |
| 64 | + "# Make directory filled with nifti files\n", |
| 65 | + "test_dir = Path(tempfile.mkdtemp())\n", |
| 66 | + "nifti_dir = test_dir / \"nifti\"\n", |
| 67 | + "nifti_dir.mkdir()\n", |
| 68 | + "for i in range(10):\n", |
| 69 | + " Nifti.sample(nifti_dir, seed=i)\n", |
| 70 | + "\n", |
| 71 | + "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", |
| 72 | + "\n", |
| 73 | + "mrgrid_varying_vox_sizes = MrGrid().split(\n", |
| 74 | + " (\"input\", \"voxel\"),\n", |
| 75 | + " input=nifti_dir.iterdir(),\n", |
| 76 | + " voxel=VOXEL_SIZES\n", |
| 77 | + ")\n", |
| 78 | + "\n", |
| 79 | + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", |
| 80 | + "\n", |
| 81 | + "# Run the task to resample all NIfTI files with different voxel sizes\n", |
| 82 | + "with submitter:\n", |
| 83 | + " result1 = submitter(mrgrid_varying_vox_sizes)" |
| 84 | + ] |
| 85 | + }, |
| 86 | + { |
| 87 | + "cell_type": "markdown", |
| 88 | + "metadata": {}, |
| 89 | + "source": [ |
| 90 | + "If we attempt to run the same task with the same parameterisation the cache directory\n", |
| 91 | + "will point to the same location and the results will be reused" |
| 92 | + ] |
| 93 | + }, |
| 94 | + { |
| 95 | + "cell_type": "code", |
| 96 | + "execution_count": 1, |
| 97 | + "metadata": {}, |
| 98 | + "outputs": [ |
| 99 | + { |
| 100 | + "ename": "NameError", |
| 101 | + "evalue": "name 'MrGrid' is not defined", |
| 102 | + "output_type": "error", |
| 103 | + "traceback": [ |
| 104 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 105 | + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", |
| 106 | + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m mrgrid_varying_vox_sizes2 \u001b[38;5;241m=\u001b[39m \u001b[43mMrGrid\u001b[49m()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 2\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 4\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 7\u001b[0m submitter \u001b[38;5;241m=\u001b[39m Submitter(cache_dir\u001b[38;5;241m=\u001b[39mtest_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Result from previous run is reused as the task and inputs are identical\u001b[39;00m\n", |
| 107 | + "\u001b[0;31mNameError\u001b[0m: name 'MrGrid' is not defined" |
| 108 | + ] |
| 109 | + } |
| 110 | + ], |
| 111 | + "source": [ |
| 112 | + "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", |
| 113 | + " (\"input\", \"voxel\"),\n", |
| 114 | + " input=nifti_dir.iterdir(),\n", |
| 115 | + " voxel=VOXEL_SIZES\n", |
| 116 | + ")\n", |
| 117 | + "\n", |
| 118 | + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", |
| 119 | + "\n", |
| 120 | + "# Result from previous run is reused as the task and inputs are identical\n", |
| 121 | + "with submitter:\n", |
| 122 | + " result2 = submitter(mrgrid_varying_vox_sizes2)\n", |
| 123 | + "\n", |
| 124 | + "\n", |
| 125 | + "# Check that the output directory is the same for both runs\n", |
| 126 | + "assert result2.output_dir == result1.output_dir\n", |
| 127 | + "\n", |
| 128 | + "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", |
| 129 | + "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", |
| 130 | + "\n", |
| 131 | + "# Result from previous run is reused as the task and inputs are identical\n", |
| 132 | + "with submitter:\n", |
| 133 | + " result3 = submitter(mrgrid_varying_vox_sizes2)\n", |
| 134 | + "\n", |
| 135 | + "# The output directory will be different as the inputs are now different\n", |
| 136 | + "assert result3.output_dir != result1.output_dir" |
| 137 | + ] |
| 138 | + }, |
| 139 | + { |
| 140 | + "cell_type": "markdown", |
| 141 | + "metadata": {}, |
| 142 | + "source": [ |
| 143 | + "Note that for file objects, the contents of the files are used to calculate the hash\n", |
| 144 | + "not their paths. Therefore, when inputting large files there might be some additional\n", |
| 145 | + "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", |
| 146 | + "shouldn't need to be recalculated unless they are modified). However, this makes the\n", |
| 147 | + "hashes invariant to file-system movement. For example, changing the name of one of the\n", |
| 148 | + "files in the nifti directory won't invalidate the hash." |
| 149 | + ] |
| 150 | + }, |
| 151 | + { |
| 152 | + "cell_type": "code", |
| 153 | + "execution_count": 3, |
| 154 | + "metadata": {}, |
| 155 | + "outputs": [ |
| 156 | + { |
| 157 | + "ename": "NameError", |
| 158 | + "evalue": "name 'nifti_dir' is not defined", |
| 159 | + "output_type": "error", |
| 160 | + "traceback": [ |
| 161 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
| 162 | + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", |
| 163 | + "Cell \u001b[0;32mIn[3], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Rename a NIfTI file within the test directory\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m first_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[43mnifti_dir\u001b[49m\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 3\u001b[0m first_file\u001b[38;5;241m.\u001b[39mrename(first_file\u001b[38;5;241m.\u001b[39mwith_name(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfirst.nii.gz\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 5\u001b[0m mrgrid_varying_vox_sizes3 \u001b[38;5;241m=\u001b[39m MrGrid()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 6\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 8\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 9\u001b[0m )\n", |
| 164 | + "\u001b[0;31mNameError\u001b[0m: name 'nifti_dir' is not defined" |
| 165 | + ] |
| 166 | + } |
| 167 | + ], |
| 168 | + "source": [ |
| 169 | + "# Rename a NIfTI file within the test directory\n", |
| 170 | + "first_file = next(nifti_dir.iterdir())\n", |
| 171 | + "first_file.rename(first_file.with_name(\"first.nii.gz\"))\n", |
| 172 | + "\n", |
| 173 | + "mrgrid_varying_vox_sizes3 = MrGrid().split(\n", |
| 174 | + " (\"input\", \"voxel\"),\n", |
| 175 | + " input=nifti_dir.iterdir(),\n", |
| 176 | + " voxel=VOXEL_SIZES\n", |
| 177 | + ")\n", |
| 178 | + "\n", |
| 179 | + "# Result from previous run is reused as the task and inputs are identical\n", |
| 180 | + "with submitter:\n", |
| 181 | + " result4 = submitter(mrgrid_varying_vox_sizes2)\n", |
| 182 | + "\n", |
| 183 | + "# Check that the output directory is the same for both runs\n", |
| 184 | + "assert result4.output_dir == result1.output_dir" |
| 185 | + ] |
| 186 | + }, |
| 187 | + { |
| 188 | + "cell_type": "markdown", |
| 189 | + "metadata": {}, |
| 190 | + "source": [ |
| 191 | + "See [Caching and hashing](../explanation/hashing-caching.html) for more details on how inputs\n", |
| 192 | + "are hashed for caching and issues to consider." |
31 | 193 | ] |
32 | 194 | }, |
33 | 195 | { |
|
36 | 198 | "source": [ |
37 | 199 | "## Environments (containers)\n", |
38 | 200 | "\n", |
| 201 | + "Work in progress...\n", |
| 202 | + "\n", |
39 | 203 | "See [Containers and Environments](../explanation/environments.rst) for more details." |
40 | 204 | ] |
41 | 205 | }, |
42 | 206 | { |
43 | 207 | "cell_type": "markdown", |
44 | 208 | "metadata": {}, |
45 | 209 | "source": [ |
46 | | - "## Provenance" |
| 210 | + "## Provenance\n", |
| 211 | + "\n", |
| 212 | + "Work in progress..." |
47 | 213 | ] |
48 | 214 | }, |
49 | 215 | { |
|
53 | 219 | } |
54 | 220 | ], |
55 | 221 | "metadata": { |
| 222 | + "kernelspec": { |
| 223 | + "display_name": "wf12", |
| 224 | + "language": "python", |
| 225 | + "name": "python3" |
| 226 | + }, |
56 | 227 | "language_info": { |
57 | | - "name": "python" |
| 228 | + "codemirror_mode": { |
| 229 | + "name": "ipython", |
| 230 | + "version": 3 |
| 231 | + }, |
| 232 | + "file_extension": ".py", |
| 233 | + "mimetype": "text/x-python", |
| 234 | + "name": "python", |
| 235 | + "nbconvert_exporter": "python", |
| 236 | + "pygments_lexer": "ipython3", |
| 237 | + "version": "3.12.5" |
58 | 238 | } |
59 | 239 | }, |
60 | 240 | "nbformat": 4, |
|
0 commit comments