Skip to content

Commit c1c09b4

Browse files
committed
Do specify filter="tar" when extracting tars
Situation is complicated is that when extracting .zip shutil.unpack_archive would also pass it and would crash so we need to pass only when extracting tar. And also that kwargs option was added in 3.12, started to be enforced in 3.13 and in 3.14 would be required
1 parent af9a6e3 commit c1c09b4

File tree

1 file changed

+22
-3
lines changed

1 file changed

+22
-3
lines changed

heudiconv/parser.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import os.path as op
1010
import re
1111
import shutil
12+
import sys
1213
from types import ModuleType
1314
from typing import Optional
1415

@@ -22,7 +23,18 @@
2223

2324
_VCS_REGEX = r"%s\.(?:git|gitattributes|svn|bzr|hg)(?:%s|$)" % (op.sep, op.sep)
2425

25-
_UNPACK_FORMATS = tuple(sum((x[1] for x in shutil.get_unpack_formats()), []))
26+
27+
def _get_unpack_formats() -> dict[str, bool]:
28+
"""For each extension return if it is a tar"""
29+
out = {}
30+
for _, exts, d in shutil.get_unpack_formats():
31+
for e in exts:
32+
out[e] = bool(re.search(r"\btar\b", d.lower()))
33+
return out
34+
35+
36+
_UNPACK_FORMATS = _get_unpack_formats()
37+
_TAR_UNPACK_FORMATS = tuple(k for k, is_tar in _UNPACK_FORMATS.items() if is_tar)
2638

2739

2840
@docstring_parameter(_VCS_REGEX)
@@ -114,7 +126,7 @@ def get_extracted_dicoms(fl: Iterable[str]) -> ItemsView[Optional[str], list[str
114126

115127
# needs sorting to keep the generated "session" label deterministic
116128
for _, t in enumerate(sorted(fl)):
117-
if not t.endswith(_UNPACK_FORMATS):
129+
if not t.endswith(tuple(_UNPACK_FORMATS)):
118130
sessions[None].append(t)
119131
continue
120132

@@ -127,7 +139,14 @@ def get_extracted_dicoms(fl: Iterable[str]) -> ItemsView[Optional[str], list[str
127139

128140
# check content and sanitize permission bits before extraction
129141
os.chmod(tmpdir, mode=0o700)
130-
shutil.unpack_archive(t, extract_dir=tmpdir)
142+
# For tar (only!) starting with 3.12 we should provide filter
143+
# (enforced in 3.14) on how to filter/safe-guard filenames.
144+
kws = {}
145+
if sys.version_info >= (3, 12) and t.endswith(_TAR_UNPACK_FORMATS):
146+
# Allow for a user-workaround if would be desired
147+
# see e.g. https://docs.python.org/3.12/library/tarfile.html#extraction-filters
148+
kws["filter"] = os.environ.get("HEUDICONV_TAR_FILTER", "tar")
149+
shutil.unpack_archive(t, extract_dir=tmpdir, **kws)
131150

132151
archive_content = list(find_files(regex=".*", topdir=tmpdir))
133152

0 commit comments

Comments
 (0)