9
9
import os .path as op
10
10
import re
11
11
import shutil
12
+ import sys
12
13
from types import ModuleType
13
14
from typing import Optional
14
15
22
23
23
24
_VCS_REGEX = r"%s\.(?:git|gitattributes|svn|bzr|hg)(?:%s|$)" % (op .sep , op .sep )
24
25
25
- _UNPACK_FORMATS = tuple (sum ((x [1 ] for x in shutil .get_unpack_formats ()), []))
26
+
27
+ def _get_unpack_formats () -> dict [str , bool ]:
28
+ """For each extension return if it is a tar"""
29
+ out = {}
30
+ for _ , exts , d in shutil .get_unpack_formats ():
31
+ for e in exts :
32
+ out [e ] = bool (re .search (r"\btar\b" , d .lower ()))
33
+ return out
34
+
35
+
36
+ _UNPACK_FORMATS = _get_unpack_formats ()
37
+ _TAR_UNPACK_FORMATS = tuple (k for k , is_tar in _UNPACK_FORMATS .items () if is_tar )
26
38
27
39
28
40
@docstring_parameter (_VCS_REGEX )
@@ -114,7 +126,7 @@ def get_extracted_dicoms(fl: Iterable[str]) -> ItemsView[Optional[str], list[str
114
126
115
127
# needs sorting to keep the generated "session" label deterministic
116
128
for _ , t in enumerate (sorted (fl )):
117
- if not t .endswith (_UNPACK_FORMATS ):
129
+ if not t .endswith (tuple ( _UNPACK_FORMATS ) ):
118
130
sessions [None ].append (t )
119
131
continue
120
132
@@ -127,7 +139,14 @@ def get_extracted_dicoms(fl: Iterable[str]) -> ItemsView[Optional[str], list[str
127
139
128
140
# check content and sanitize permission bits before extraction
129
141
os .chmod (tmpdir , mode = 0o700 )
130
- shutil .unpack_archive (t , extract_dir = tmpdir )
142
+ # For tar (only!) starting with 3.12 we should provide filter
143
+ # (enforced in 3.14) on how to filter/safe-guard filenames.
144
+ kws : dict [str , str ] = {}
145
+ if sys .version_info >= (3 , 12 ) and t .endswith (_TAR_UNPACK_FORMATS ):
146
+ # Allow for a user-workaround if would be desired
147
+ # see e.g. https://docs.python.org/3.12/library/tarfile.html#extraction-filters
148
+ kws ["filter" ] = os .environ .get ("HEUDICONV_TAR_FILTER" , "tar" )
149
+ shutil .unpack_archive (t , extract_dir = tmpdir , ** kws ) # type: ignore[arg-type]
131
150
132
151
archive_content = list (find_files (regex = ".*" , topdir = tmpdir ))
133
152
0 commit comments