@@ -69,7 +69,16 @@ def location_converter(path: ty.Union[Path, str, None]) -> Path:
69
69
@attrs .define
70
70
class PersistentCache :
71
71
"""Persistent cache in which to store computationally expensive hashes between nodes
72
- and workflow/task runs
72
+ and workflow/task runs. It does this in via the `get_or_calculate_hash` method, which
73
+ takes a locally unique key (e.g. file-system path + mtime) and a function to
74
+ calculate the hash if it isn't present in the persistent store.
75
+
76
+ The locally unique key is hashed (cheaply) using hashlib cryptography and this
77
+ "local hash" is use to name the entry of the (potentially expensive) hash of the
78
+ object itself (e.g. the contents of a file). This entry is saved as a text file
79
+ within a user-specific cache directory (see `platformdirs.user_cache_dir`), with
80
+ the name of the file being the "local hash" of the key and the contents of the
81
+ file being the "globally unique hash" of the object itself.
73
82
74
83
Parameters
75
84
----------
@@ -166,6 +175,17 @@ def from_path(
166
175
167
176
@attrs .define
168
177
class Cache :
178
+ """Cache for hashing objects, used to avoid infinite recursion caused by circular
179
+ references between objects, and to store hashes of objects that have already been
180
+ hashed to avoid recomputation.
181
+
182
+ This concept is extended to persistent caching of hashes for certain object types,
183
+ for which calculating the hash is a potentially expensive operation (e.g.
184
+ File/Directory types). For these classes the `bytes_repr` override function yields a
185
+ "locally unique cache key" (e.g. file-system path + mtime) as the first item of its
186
+ iterator.
187
+ """
188
+
169
189
persistent : ty .Optional [PersistentCache ] = attrs .field (
170
190
default = None ,
171
191
converter = PersistentCache .from_path , # type: ignore[misc]
0 commit comments