Skip to content

Commit 9df85a4

Browse files
committed
ENH: Add DeterministicGzipFile wrapper class
1 parent 9e938dd commit 9df85a4

File tree

2 files changed

+76
-1
lines changed

2 files changed

+76
-1
lines changed

nibabel/openers.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,21 @@
4040
HAVE_INDEXED_GZIP = False
4141

4242

43+
class DeterministicGzipFile(gzip.GzipFile):
44+
""" Deterministic variant of GzipFile
45+
46+
This writer does not add filename information to the header, and defaults
47+
to a modification time (``mtime``) of 0 seconds.
48+
"""
49+
def __init__(self, filename=None, mode=None, compresslevel=9, fileobj=None, mtime=0):
50+
if mode and 'b' not in mode:
51+
mode += 'b'
52+
if filename:
53+
fileobj = self.myfileobj = open(filename, mode or 'rb')
54+
return super().__init__(filename="", mode=mode, compresslevel=compresslevel,
55+
fileobj=fileobj, mtime=mtime)
56+
57+
4358
def _gzip_open(filename, mode='rb', compresslevel=9, keep_open=False):
4459

4560
# use indexed_gzip if possible for faster read access. If keep_open ==

nibabel/tests/test_openers.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import time
1717

1818
from numpy.compat.py3k import asstr, asbytes
19-
from ..openers import Opener, ImageOpener, HAVE_INDEXED_GZIP, BZ2File
19+
from ..openers import Opener, ImageOpener, HAVE_INDEXED_GZIP, BZ2File, DeterministicGzipFile
2020
from ..tmpdirs import InTemporaryDirectory
2121
from ..volumeutils import BinOpener
2222

@@ -356,6 +356,66 @@ def md5sum(fname):
356356
return hashlib.md5(fobj.read()).hexdigest()
357357

358358

359+
def test_DeterministicGzipFile():
360+
with InTemporaryDirectory():
361+
msg = b"Hello, I'd like to have an argument."
362+
363+
# No filename, no mtime
364+
with open("ref.gz", "wb") as fobj:
365+
with GzipFile(filename="", mode="wb", fileobj=fobj, mtime=0) as gzobj:
366+
gzobj.write(msg)
367+
anon_chksum = md5sum("ref.gz")
368+
369+
with DeterministicGzipFile("default.gz", "wb") as fobj:
370+
internal_fobj = fobj.myfileobj
371+
fobj.write(msg)
372+
# Check that myfileobj is being closed by GzipFile.close()
373+
# This is in case GzipFile changes its internal implementation
374+
assert internal_fobj.closed
375+
376+
assert md5sum("default.gz") == anon_chksum
377+
378+
# No filename, current mtime
379+
now = time.time()
380+
with open("ref.gz", "wb") as fobj:
381+
with GzipFile(filename="", mode="wb", fileobj=fobj, mtime=now) as gzobj:
382+
gzobj.write(msg)
383+
now_chksum = md5sum("ref.gz")
384+
385+
with DeterministicGzipFile("now.gz", "wb", mtime=now) as fobj:
386+
fobj.write(msg)
387+
388+
assert md5sum("now.gz") == now_chksum
389+
390+
# Change in default behavior
391+
with mock.patch("time.time") as t:
392+
t.return_value = now
393+
394+
# GzipFile will use time.time()
395+
with open("ref.gz", "wb") as fobj:
396+
with GzipFile(filename="", mode="wb", fileobj=fobj) as gzobj:
397+
gzobj.write(msg)
398+
assert md5sum("ref.gz") == now_chksum
399+
400+
# DeterministicGzipFile will use 0
401+
with DeterministicGzipFile("now.gz", "wb") as fobj:
402+
fobj.write(msg)
403+
assert md5sum("now.gz") == anon_chksum
404+
405+
# GzipFile is filename dependent, DeterministicGzipFile is independent
406+
with GzipFile("filenameA.gz", mode="wb", mtime=0) as gzobj:
407+
gzobj.write(msg)
408+
fnameA_chksum = md5sum("filenameA.gz")
409+
assert fnameA_chksum != anon_chksum
410+
411+
with DeterministicGzipFile("filenameA.gz", "wb") as fobj:
412+
fobj.write(msg)
413+
414+
# But the contents are the same with different filenames
415+
assert md5sum("filenameA.gz") == anon_chksum
416+
417+
418+
359419
def test_bitwise_determinism():
360420
with InTemporaryDirectory():
361421
msg = b"Hello, I'd like to have an argument."

0 commit comments

Comments
 (0)