@@ -10,7 +10,6 @@ import contextlib
1010import datetime
1111import gzip
1212import hashlib
13- import io
1413import json
1514import lzma
1615import multiprocessing
@@ -333,41 +332,21 @@ def gpg_verify_path(path: pathlib.Path, public_key_data: bytes, signature_data:
333332 subprocess .run (["gpgconf" , "--kill" , "gpg-agent" ], env = env )
334333
335334
336- class ArchiveTypeNotSupported (Exception ):
337- def __init__ (self , path : pathlib .Path ):
338- super (Exception , self ).__init__ ("Archive type not supported for %s" % path )
339-
340-
341- def open_stream (path : pathlib .Path ):
342- """Attempt to identify a path as an extractable archive by looking at its
343- content."""
344- fh = path .open (mode = "rb" )
345- magic = fh .read (6 )
346- fh .seek (0 )
347- if magic [:2 ] == b"PK" :
348- return "zip" , fh
349- if magic [:2 ] == b"\x1f \x8b " :
350- fh = gzip .GzipFile (fileobj = fh )
351- elif magic [:3 ] == b"BZh" :
352- fh = bz2 .BZ2File (fh )
353- elif magic == b"\xfd 7zXZ\x00 " :
354- fh = lzma .LZMAFile (fh )
355- elif magic [:4 ] == b"\x28 \xb5 \x2f \xfd " :
356- fh = ZstdDecompressor ().stream_reader (fh )
357- fh = io .BufferedReader (fh )
358- try :
359- # A full tar info header is 512 bytes.
360- headers = fh .peek (512 )
361- # 257 is the offset of the ustar magic.
362- magic = headers [257 : 257 + 8 ]
363- # For older unix tar, rely on TarInfo.frombuf's checksum check
364- if magic in (b"ustar\x00 00" , b"ustar \x00 " ) or tarfile .TarInfo .frombuf (
365- headers [:512 ], tarfile .ENCODING , "surrogateescape"
366- ):
367- return "tar" , fh
368- except Exception as e :
369- pass
370- raise ArchiveTypeNotSupported (path )
335+ def open_tar_stream (path : pathlib .Path ):
336+ """"""
337+ if path .suffix == ".bz2" :
338+ return bz2 .open (str (path ), "rb" )
339+ elif path .suffix in (".gz" , ".tgz" ) :
340+ return gzip .open (str (path ), "rb" )
341+ elif path .suffix == ".xz" :
342+ return lzma .open (str (path ), "rb" )
343+ elif path .suffix == ".zst" :
344+ dctx = ZstdDecompressor ()
345+ return dctx .stream_reader (path .open ("rb" ))
346+ elif path .suffix == ".tar" :
347+ return path .open ("rb" )
348+ else :
349+ raise ValueError ("unknown archive format for tar file: %s" % path )
371350
372351
373352def archive_type (path : pathlib .Path ):
@@ -380,7 +359,7 @@ def archive_type(path: pathlib.Path):
380359 return None
381360
382361
383- def extract_archive (path , dest_dir ):
362+ def extract_archive (path , dest_dir , typ ):
384363 """Extract an archive to a destination directory."""
385364
386365 # Resolve paths to absolute variants.
@@ -392,8 +371,8 @@ def extract_archive(path, dest_dir):
392371
393372 # We pipe input to the decompressor program so that we can apply
394373 # custom decompressors that the program may not know about.
395- typ , ifh = open_stream (path )
396374 if typ == "tar" :
375+ ifh = open_tar_stream (path )
397376 # On Windows, the tar program doesn't support things like symbolic
398377 # links, while Windows actually support them. The tarfile module in
399378 # python does. So use that. But since it's significantly slower than
@@ -440,8 +419,10 @@ def repack_archive(
440419):
441420 assert orig != dest
442421 log ("Repacking as %s" % dest )
443- orig_typ , ifh = open_stream (orig )
422+ orig_typ = archive_type (orig )
444423 typ = archive_type (dest )
424+ if not orig_typ :
425+ raise Exception ("Archive type not supported for %s" % orig .name )
445426 if not typ :
446427 raise Exception ("Archive type not supported for %s" % dest .name )
447428
@@ -467,7 +448,7 @@ def repack_archive(
467448 ctx = ZstdCompressor ()
468449 if orig_typ == "zip" :
469450 assert typ == "tar"
470- zip = zipfile .ZipFile (ifh )
451+ zip = zipfile .ZipFile (orig )
471452 # Convert the zip stream to a tar on the fly.
472453 with ctx .stream_writer (fh ) as compressor , tarfile .open (
473454 fileobj = compressor , mode = "w:"
@@ -509,6 +490,7 @@ def repack_archive(
509490 raise Exception ("Repacking a tar to zip is not supported" )
510491 assert typ == "tar"
511492
493+ ifh = open_tar_stream (orig )
512494 if filter :
513495 # To apply the filter, we need to open the tar stream and
514496 # tweak it.
@@ -551,12 +533,11 @@ def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
551533 if not extract :
552534 return
553535
554- try :
555- extract_archive (dest_path , dest_dir )
536+ typ = archive_type (dest_path )
537+ if typ :
538+ extract_archive (dest_path , dest_dir , typ )
556539 log ("Removing %s" % dest_path )
557540 dest_path .unlink ()
558- except ArchiveTypeNotSupported :
559- pass
560541
561542
562543def fetch_urls (downloads ):
0 commit comments