Skip to content

Commit 9efe641

Browse files
committed
add proper error handling to tar file processing
1 parent f0983d0 commit 9efe641

File tree

1 file changed

+16
-7
lines changed

1 file changed

+16
-7
lines changed

biothings/utils/common.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import random
2222
import string
2323
import sys
24+
import tarfile
2425
import time
2526
import types
2627
import urllib.parse
@@ -172,15 +173,23 @@ def anyfile(infile, mode="r"):
172173
rawfile = os.path.splitext(infile)[0]
173174
filetype = os.path.splitext(infile)[1].lower()
174175

175-
# check for tarball before other formats
176-
root = os.path.splitext(infile)[0]
177-
secondary_filetype = os.path.splitext(root)[1].lower()
178176

179-
# this is to match both plain ".tar" or compressed tarballs like ".tar.gz" and ".tar.xz"
180-
if filetype == ".tar" or secondary_filetype == ".tar":
181-
import tarfile
177+
# use tarfile built-in method to check for tar file before anything else
178+
if tarfile.is_tarfile(infile):
182179
tar_file = tarfile.open(infile, mode)
183-
return io.TextIOWrapper(tar_file.extractfile(rawfile))
180+
try:
181+
extracted = tar_file.extractfile(rawfile)
182+
except KeyError:
183+
# provided rawfile does not appear in the tarball
184+
tar_file.close()
185+
raise Exception("target member does not contain the provided tar file.")
186+
187+
# extracted member is not a regular file or link
188+
if extracted is None:
189+
tar_file.close()
190+
raise Exception("invalid target file: must be a regular file or a link")
191+
192+
return io.TextIOWrapper(extracted)
184193

185194
if filetype == ".gz":
186195
# import gzip

0 commit comments

Comments
 (0)