Skip to content

Commit e7a0f91

Browse files
committed
[ENH] Do not save the original name and time stamp of gzip files (#295)
* [ENH] Do not save the original name and time stamp of gzip files Fixes nipreps/fmriprep#1480 * fix indentation * add new test * do not hardlink gzipped files
1 parent dd5bd37 commit e7a0f91

File tree

2 files changed

+34
-4
lines changed

2 files changed

+34
-4
lines changed

niworkflows/utils/misc.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def _copy_any(src, dst):
140140

141141
src_isgz = src.endswith('.gz')
142142
dst_isgz = dst.endswith('.gz')
143-
if src_isgz == dst_isgz:
143+
if not src_isgz and not dst_isgz:
144144
copyfile(src, dst, copy=True, use_hardlink=True)
145145
return False # Make sure we do not reuse the hardlink later
146146

@@ -149,10 +149,16 @@ def _copy_any(src, dst):
149149
os.unlink(dst)
150150

151151
src_open = gzip.open if src_isgz else open
152-
dst_open = gzip.open if dst_isgz else open
153152
with src_open(src, 'rb') as f_in:
154-
with dst_open(dst, 'wb') as f_out:
155-
copyfileobj(f_in, f_out)
153+
with open(dst, 'wb') as f_out:
154+
if dst_isgz:
155+
# Remove FNAME header from gzip (poldracklab/fmriprep#1480)
156+
gz_out = gzip.GzipFile('', 'wb', 9, f_out, 0.)
157+
copyfileobj(f_in, gz_out)
158+
gz_out.close()
159+
else:
160+
copyfileobj(f_in, f_out)
161+
156162
return True
157163

158164

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""Test utils"""
2+
from pathlib import Path
3+
from tempfile import TemporaryDirectory
4+
from subprocess import check_call
5+
from niworkflows.utils.misc import _copy_any
6+
7+
8+
def test_copy_gzip():
9+
with TemporaryDirectory() as tmpdir:
10+
tmppath = Path(tmpdir)
11+
filepath = tmppath / 'name1.txt'
12+
filepath2 = tmppath / 'name2.txt'
13+
assert not filepath2.exists()
14+
open(str(filepath), 'w').close()
15+
check_call(['gzip', '-N', str(filepath)])
16+
assert not filepath.exists()
17+
18+
gzpath1 = '%s/%s' % (tmppath, 'name1.txt.gz')
19+
gzpath2 = '%s/%s' % (tmppath, 'name2.txt.gz')
20+
_copy_any(gzpath1, gzpath2)
21+
assert Path(gzpath2).exists()
22+
check_call(['gunzip', '-N', '-f', gzpath2])
23+
assert not filepath.exists()
24+
assert filepath2.exists()

0 commit comments

Comments
 (0)