Skip to content

Commit faaf765

Browse files
authored
Merge pull request #796 from yarikoptic/enh-filter
Do specify filter="tar" when extracting tars
2 parents af9a6e3 + bf404c8 commit faaf765

File tree

1 file changed

+22
-3
lines changed

1 file changed

+22
-3
lines changed

heudiconv/parser.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import os.path as op
1010
import re
1111
import shutil
12+
import sys
1213
from types import ModuleType
1314
from typing import Optional
1415

@@ -22,7 +23,18 @@
2223

2324
_VCS_REGEX = r"%s\.(?:git|gitattributes|svn|bzr|hg)(?:%s|$)" % (op.sep, op.sep)
2425

25-
_UNPACK_FORMATS = tuple(sum((x[1] for x in shutil.get_unpack_formats()), []))
26+
27+
def _get_unpack_formats() -> dict[str, bool]:
28+
"""For each extension return if it is a tar"""
29+
out = {}
30+
for _, exts, d in shutil.get_unpack_formats():
31+
for e in exts:
32+
out[e] = bool(re.search(r"\btar\b", d.lower()))
33+
return out
34+
35+
36+
_UNPACK_FORMATS = _get_unpack_formats()
37+
_TAR_UNPACK_FORMATS = tuple(k for k, is_tar in _UNPACK_FORMATS.items() if is_tar)
2638

2739

2840
@docstring_parameter(_VCS_REGEX)
@@ -114,7 +126,7 @@ def get_extracted_dicoms(fl: Iterable[str]) -> ItemsView[Optional[str], list[str
114126

115127
# needs sorting to keep the generated "session" label deterministic
116128
for _, t in enumerate(sorted(fl)):
117-
if not t.endswith(_UNPACK_FORMATS):
129+
if not t.endswith(tuple(_UNPACK_FORMATS)):
118130
sessions[None].append(t)
119131
continue
120132

@@ -127,7 +139,14 @@ def get_extracted_dicoms(fl: Iterable[str]) -> ItemsView[Optional[str], list[str
127139

128140
# check content and sanitize permission bits before extraction
129141
os.chmod(tmpdir, mode=0o700)
130-
shutil.unpack_archive(t, extract_dir=tmpdir)
142+
# For tar (only!) starting with 3.12 we should provide filter
143+
# (enforced in 3.14) on how to filter/safe-guard filenames.
144+
kws: dict[str, str] = {}
145+
if sys.version_info >= (3, 12) and t.endswith(_TAR_UNPACK_FORMATS):
146+
# Allow for a user-workaround if would be desired
147+
# see e.g. https://docs.python.org/3.12/library/tarfile.html#extraction-filters
148+
kws["filter"] = os.environ.get("HEUDICONV_TAR_FILTER", "tar")
149+
shutil.unpack_archive(t, extract_dir=tmpdir, **kws) # type: ignore[arg-type]
131150

132151
archive_content = list(find_files(regex=".*", topdir=tmpdir))
133152

0 commit comments

Comments
 (0)