Skip to content

Commit 4974ef2

Browse files
authored
Revert "Bug 1853082 - Don't rely on file extension to guess archive type in fetch-content. r=taskgraph-reviewers,bhearsum" (#543)
This reverts commit 02f110d. That change is not backwards-compatible, since it causes files that would previously not be extracted to now be detected as an archive (e.g. python wheel as a zip archive). Reverting for now, we can re-apply it with a major version bump later.
1 parent 030f753 commit 4974ef2

File tree

1 file changed

+25
-44
lines changed

1 file changed

+25
-44
lines changed

src/taskgraph/run-task/fetch-content

Lines changed: 25 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import contextlib
1010
import datetime
1111
import gzip
1212
import hashlib
13-
import io
1413
import json
1514
import lzma
1615
import multiprocessing
@@ -333,41 +332,21 @@ def gpg_verify_path(path: pathlib.Path, public_key_data: bytes, signature_data:
333332
subprocess.run(["gpgconf", "--kill", "gpg-agent"], env=env)
334333

335334

336-
class ArchiveTypeNotSupported(Exception):
337-
def __init__(self, path: pathlib.Path):
338-
super(Exception, self).__init__("Archive type not supported for %s" % path)
339-
340-
341-
def open_stream(path: pathlib.Path):
342-
"""Attempt to identify a path as an extractable archive by looking at its
343-
content."""
344-
fh = path.open(mode="rb")
345-
magic = fh.read(6)
346-
fh.seek(0)
347-
if magic[:2] == b"PK":
348-
return "zip", fh
349-
if magic[:2] == b"\x1f\x8b":
350-
fh = gzip.GzipFile(fileobj=fh)
351-
elif magic[:3] == b"BZh":
352-
fh = bz2.BZ2File(fh)
353-
elif magic == b"\xfd7zXZ\x00":
354-
fh = lzma.LZMAFile(fh)
355-
elif magic[:4] == b"\x28\xb5\x2f\xfd":
356-
fh = ZstdDecompressor().stream_reader(fh)
357-
fh = io.BufferedReader(fh)
358-
try:
359-
# A full tar info header is 512 bytes.
360-
headers = fh.peek(512)
361-
# 257 is the offset of the ustar magic.
362-
magic = headers[257 : 257 + 8]
363-
# For older unix tar, rely on TarInfo.frombuf's checksum check
364-
if magic in (b"ustar\x0000", b"ustar \x00") or tarfile.TarInfo.frombuf(
365-
headers[:512], tarfile.ENCODING, "surrogateescape"
366-
):
367-
return "tar", fh
368-
except Exception as e:
369-
pass
370-
raise ArchiveTypeNotSupported(path)
335+
def open_tar_stream(path: pathlib.Path):
336+
""""""
337+
if path.suffix == ".bz2":
338+
return bz2.open(str(path), "rb")
339+
elif path.suffix in (".gz", ".tgz") :
340+
return gzip.open(str(path), "rb")
341+
elif path.suffix == ".xz":
342+
return lzma.open(str(path), "rb")
343+
elif path.suffix == ".zst":
344+
dctx = ZstdDecompressor()
345+
return dctx.stream_reader(path.open("rb"))
346+
elif path.suffix == ".tar":
347+
return path.open("rb")
348+
else:
349+
raise ValueError("unknown archive format for tar file: %s" % path)
371350

372351

373352
def archive_type(path: pathlib.Path):
@@ -380,7 +359,7 @@ def archive_type(path: pathlib.Path):
380359
return None
381360

382361

383-
def extract_archive(path, dest_dir):
362+
def extract_archive(path, dest_dir, typ):
384363
"""Extract an archive to a destination directory."""
385364

386365
# Resolve paths to absolute variants.
@@ -392,8 +371,8 @@ def extract_archive(path, dest_dir):
392371

393372
# We pipe input to the decompressor program so that we can apply
394373
# custom decompressors that the program may not know about.
395-
typ, ifh = open_stream(path)
396374
if typ == "tar":
375+
ifh = open_tar_stream(path)
397376
# On Windows, the tar program doesn't support things like symbolic
398377
# links, while Windows actually support them. The tarfile module in
399378
# python does. So use that. But since it's significantly slower than
@@ -440,8 +419,10 @@ def repack_archive(
440419
):
441420
assert orig != dest
442421
log("Repacking as %s" % dest)
443-
orig_typ, ifh = open_stream(orig)
422+
orig_typ = archive_type(orig)
444423
typ = archive_type(dest)
424+
if not orig_typ:
425+
raise Exception("Archive type not supported for %s" % orig.name)
445426
if not typ:
446427
raise Exception("Archive type not supported for %s" % dest.name)
447428

@@ -467,7 +448,7 @@ def repack_archive(
467448
ctx = ZstdCompressor()
468449
if orig_typ == "zip":
469450
assert typ == "tar"
470-
zip = zipfile.ZipFile(ifh)
451+
zip = zipfile.ZipFile(orig)
471452
# Convert the zip stream to a tar on the fly.
472453
with ctx.stream_writer(fh) as compressor, tarfile.open(
473454
fileobj=compressor, mode="w:"
@@ -509,6 +490,7 @@ def repack_archive(
509490
raise Exception("Repacking a tar to zip is not supported")
510491
assert typ == "tar"
511492

493+
ifh = open_tar_stream(orig)
512494
if filter:
513495
# To apply the filter, we need to open the tar stream and
514496
# tweak it.
@@ -551,12 +533,11 @@ def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
551533
if not extract:
552534
return
553535

554-
try:
555-
extract_archive(dest_path, dest_dir)
536+
typ = archive_type(dest_path)
537+
if typ:
538+
extract_archive(dest_path, dest_dir, typ)
556539
log("Removing %s" % dest_path)
557540
dest_path.unlink()
558-
except ArchiveTypeNotSupported:
559-
pass
560541

561542

562543
def fetch_urls(downloads):

0 commit comments

Comments
 (0)