Skip to content

Commit b2b1e83

Browse files
authored
Merge pull request #424 from biothings/tar-file-handling
Tar file handling
2 parents 5407535 + 9efe641 commit b2b1e83

File tree

1 file changed

+21
-1
lines changed

1 file changed

+21
-1
lines changed

biothings/utils/common.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import random
2222
import string
2323
import sys
24+
import tarfile
2425
import time
2526
import types
2627
import urllib.parse
@@ -160,7 +161,7 @@ def safewfile(filename, prompt=True, default="C", mode="w"):
160161

161162
def anyfile(infile, mode="r"):
162163
"""
163-
return a file handler with the support for gzip/zip comppressed files.
164+
return a file handler with the support for gzip/zip compressed files.
164165
if infile is a two value tuple, then first one is the compressed file;
165166
the second one is the actual filename in the compressed file.
166167
e.g., ('a.zip', 'aa.txt')
@@ -171,6 +172,25 @@ def anyfile(infile, mode="r"):
171172
else:
172173
rawfile = os.path.splitext(infile)[0]
173174
filetype = os.path.splitext(infile)[1].lower()
175+
176+
177+
# use tarfile built-in method to check for tar file before anything else
178+
if tarfile.is_tarfile(infile):
179+
tar_file = tarfile.open(infile, mode)
180+
try:
181+
extracted = tar_file.extractfile(rawfile)
182+
except KeyError:
183+
# provided rawfile does not appear in the tarball
184+
tar_file.close()
185+
raise Exception("target member does not contain the provided tar file.")
186+
187+
# extracted member is not a regular file or link
188+
if extracted is None:
189+
tar_file.close()
190+
raise Exception("invalid target file: must be a regular file or a link")
191+
192+
return io.TextIOWrapper(extracted)
193+
174194
if filetype == ".gz":
175195
# import gzip
176196
in_f = io.TextIOWrapper(gzip.GzipFile(infile, mode))

0 commit comments

Comments
 (0)