Skip to content

Commit f2f70a6

Browse files
committed
added comments and doc strings to explain the use of the persistent cache
1 parent a031ea5 commit f2f70a6

File tree

2 files changed

+40
-3
lines changed

2 files changed

+40
-3
lines changed

pydra/utils/hash.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,13 +224,32 @@ def hash_single(obj: object, cache: Cache) -> Hash:
224224
# chunk
225225

226226
def calc_hash(first: ty.Optional[bytes] = None) -> Hash:
227+
"""
228+
Calculate the hash of the object
229+
230+
Parameters
231+
----------
232+
first : ty.Optional[bytes]
233+
the first bytes chunk from the bytes_repr iterator, passed if the first
234+
chunk wasn't a local cache key
235+
"""
227236
h = blake2b(digest_size=16, person=b"pydra-hash")
237+
# We want to use the first chunk that was popped to check for a cache-key
238+
# if present
228239
if first is not None:
229240
h.update(first)
230241
for chunk in bytes_it: # Note that `bytes_it` is in outer scope
231242
h.update(chunk)
232243
return Hash(h.digest())
233244

245+
# Read the first chunk of the bytes_repr iterator, check to see whether it returns
246+
# a "cache-key" tuple instead of a bytes chunk for the type of the object to cache
247+
# (i.e. file objects). If it does use that key to check the persistent cache for
248+
# a precomputed hash and otherwise calculate the hash and store it in the
249+
# persistent cache with that key.
250+
251+
# If the first chunk is a bytes chunk (i.e. the object type doesn't have an associated
252+
# 'cache-key'), then simply calculate the hash of the object.
234253
first = next(bytes_it)
235254
if isinstance(first, tuple):
236255
tp = type(obj)

pydra/utils/tests/test_hash.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -323,11 +323,18 @@ def text_file(tmp_path):
323323

324324

325325
def test_persistent_hash_cache(cache_path, text_file):
326+
"""
327+
Test the persistent hash cache with a text file
328+
329+
The cache is used to store the hash of the text file, and the hash is
330+
retrieved from the cache when the file is unchanged.
331+
"""
326332
# Test hash is stable between calls
327333
hsh = hash_object(text_file, persistent_cache=cache_path)
328334
assert hsh == hash_object(text_file, persistent_cache=cache_path)
329335

330-
# Test that cached hash has been used
336+
# Test that cached hash has been used by explicitly modifying it and seeing that the
337+
# hash is the same as the modified hash
331338
cache_files = list(cache_path.iterdir())
332339
assert len(cache_files) == 1
333340
modified_hash = "modified".encode()
@@ -342,6 +349,10 @@ def test_persistent_hash_cache(cache_path, text_file):
342349

343350

344351
def test_persistent_hash_cache_cleanup1(cache_path, text_file):
352+
"""
353+
Test the persistent hash is cleaned up after use if the periods between cleanups
354+
is greater than the environment variable PYDRA_HASH_CACHE_CLEANUP_PERIOD
355+
"""
345356
with mock.patch.dict(
346357
os.environ,
347358
{
@@ -350,21 +361,28 @@ def test_persistent_hash_cache_cleanup1(cache_path, text_file):
350361
},
351362
):
352363
persistent_cache = PersistentCache()
353-
hsh = hash_object(text_file, persistent_cache=persistent_cache)
364+
hash_object(text_file, persistent_cache=persistent_cache)
354365
assert len(list(cache_path.iterdir())) == 1
355366
persistent_cache.clean_up()
356367
assert len(list(cache_path.iterdir())) == 0
357368

358369

359370
def test_persistent_hash_cache_cleanup2(cache_path, text_file):
371+
"""
372+
Test the persistent hash is cleaned up after use if the periods between cleanups
373+
is greater than the explicitly provided cleanup_period
374+
"""
360375
persistent_cache = PersistentCache(cache_path, cleanup_period=-100)
361-
hsh = hash_object(text_file, persistent_cache=persistent_cache)
376+
hash_object(text_file, persistent_cache=persistent_cache)
362377
assert len(list(cache_path.iterdir())) == 1
363378
time.sleep(2)
364379
persistent_cache.clean_up()
365380
assert len(list(cache_path.iterdir())) == 0
366381

367382

368383
def test_persistent_hash_cache_not_dir(text_file):
384+
"""
385+
Test that an error is raised if the provided cache path is not a directory
386+
"""
369387
with pytest.raises(ValueError, match="is not a directory"):
370388
PersistentCache(text_file.fspath)

0 commit comments

Comments
 (0)