44
55module PlatformEngines
66
7- using SHA, Downloads, Tar
7+ using SHA, Downloads, Tar, Dates, Printf
88import ... Pkg: Pkg, TOML, pkg_server, depots1, can_fancyprint, stderr_f, atomic_toml_write
99using .. MiniProgressBars
10- using Base. BinaryPlatforms, p7zip_jll
10+ using Base. BinaryPlatforms, p7zip_jll, Zstd_jll
1111
12- export verify, unpack, package, download_verify_unpack
12+ export verify, unpack, package, download_verify_unpack, get_extract_cmd, detect_archive_format
1313
1414const EXE7Z_LOCK = ReentrantLock ()
1515const EXE7Z = Ref {String} ()
16+ const EXEZSTD_LOCK = ReentrantLock ()
17+ const EXEZSTD = Ref {String} ()
1618
1719function exe7z ()
1820 # If the JLL is available, use the wrapper function defined in there
@@ -28,6 +30,20 @@ function exe7z()
2830 end
2931end
3032
33+ function exezstd ()
34+ # If the JLL is available, use the wrapper function defined in there
35+ if Zstd_jll. is_available ()
36+ return Zstd_jll. zstd ()
37+ end
38+
39+ return lock (EXEZSTD_LOCK) do
40+ if ! isassigned (EXEZSTD)
41+ EXEZSTD[] = findzstd ()
42+ end
43+ return Cmd ([EXEZSTD[]])
44+ end
45+ end
46+
3147function find7z ()
3248 name = " 7z"
3349 Sys. iswindows () && (name = " $name .exe" )
@@ -40,6 +56,18 @@ function find7z()
4056 error (" 7z binary not found" )
4157end
4258
59+ function findzstd ()
60+ name = " zstd"
61+ Sys. iswindows () && (name = " $name .exe" )
62+ for dir in (joinpath (" .." , " libexec" ), " ." )
63+ path = normpath (Sys. BINDIR:: String , dir, name)
64+ isfile (path) && return path
65+ end
66+ path = Sys. which (name)
67+ path != = nothing && return path
68+ error (" zstd binary not found" )
69+ end
70+
4371is_secure_url (url:: AbstractString ) =
4472 occursin (r" ^(https://|\w +://(127\. 0\. 0\. 1|localhost)(:\d +)?($|/))" i , url)
4573
@@ -232,6 +260,13 @@ function get_metadata_headers(url::AbstractString)
232260 end
233261 push! (headers, " Julia-CI-Variables" => join (ci_info, ' ;' ))
234262 push! (headers, " Julia-Interactive" => string (isinteractive ()))
263+
264+ # Add Accept-Encoding header only for compressed archive resources
265+ # (registries, packages, artifacts - not for metadata endpoints like /registries or /meta)
266+ if occursin (r" /(registry|package|artifact)/" , url)
267+ push! (headers, " Accept-Encoding" => " zstd, gzip" )
268+ end
269+
235270 for (key, val) in ENV
236271 m = match (r" ^JULIA_PKG_SERVER_([A-Z0-9_]+)$" i , key)
237272 m === nothing && continue
@@ -403,22 +438,89 @@ function copy_symlinks()
403438 lowercase (var) in (" false" , " f" , " no" , " n" , " 0" ) ? false : nothing
404439end
405440
441+ """
442+ detect_archive_format(tarball_path::AbstractString)
443+
444+ Detect compression format by reading file magic bytes.
445+ Returns one of: "zstd", "gzip", "bzip2", "xz", "lz4", "tar", or "unknown".
446+
447+ Note: This is used both for determining file extensions after download
448+ and for selecting the appropriate decompression tool.
449+ """
450+ function detect_archive_format (tarball_path:: AbstractString )
451+ file_size = filesize (tarball_path)
452+
453+ if file_size == 0
454+ error (" cannot detect compression format: $tarball_path is empty" )
455+ end
456+
457+ magic = open (tarball_path, " r" ) do io
458+ read (io, min (6 , file_size))
459+ end
460+
461+ # Check magic bytes for various formats
462+ # Zstd: 0x28 0xB5 0x2F 0xFD (4 bytes)
463+ if length (magic) >= 4 && magic[1 : 4 ] == [0x28 , 0xB5 , 0x2F , 0xFD ]
464+ return " zstd"
465+ end
466+ # Gzip: 0x1F 0x8B (2 bytes)
467+ if length (magic) >= 2 && magic[1 : 2 ] == [0x1F , 0x8B ]
468+ return " gzip"
469+ end
470+ # Bzip2: 0x42 0x5A 0x68 (BZh) (3 bytes)
471+ if length (magic) >= 3 && magic[1 : 3 ] == [0x42 , 0x5A , 0x68 ]
472+ return " bzip2"
473+ end
474+ # XZ: 0xFD 0x37 0x7A 0x58 0x5A 0x00 (6 bytes)
475+ if length (magic) >= 6 && magic[1 : 6 ] == [0xFD , 0x37 , 0x7A , 0x58 , 0x5A , 0x00 ]
476+ return " xz"
477+ end
478+ # LZ4: 0x04 0x22 0x4D 0x18 (4 bytes)
479+ if length (magic) >= 4 && magic[1 : 4 ] == [0x04 , 0x22 , 0x4D , 0x18 ]
480+ return " lz4"
481+ end
482+ return " unknown"
483+ end
484+
485+ """
486+ get_extract_cmd(tarball_path::AbstractString)
487+
488+ Get the decompression command for a tarball by detecting format via magic bytes.
489+ """
490+ function get_extract_cmd (tarball_path:: AbstractString )
491+ format = detect_archive_format (tarball_path)
492+ if format == " zstd"
493+ return ` $(exezstd ()) -d -c $tarball_path `
494+ else
495+ return ` $(exe7z ()) x $tarball_path -so`
496+ end
497+ end
498+
406499function unpack (
407500 tarball_path:: AbstractString ,
408501 dest:: AbstractString ;
409502 verbose:: Bool = false ,
410503 )
411- return Tar. extract (` $( exe7z ()) x $ tarball_path -so ` , dest, copy_symlinks = copy_symlinks ())
504+ return Tar. extract (get_extract_cmd ( tarball_path) , dest, copy_symlinks = copy_symlinks ())
412505end
413506
414507"""
415508 package(src_dir::AbstractString, tarball_path::AbstractString)
416509
417510Compress `src_dir` into a tarball located at `tarball_path`.
511+ Supports both gzip and zstd compression based on file extension.
418512"""
419513function package (src_dir:: AbstractString , tarball_path:: AbstractString ; io = stderr_f ())
420514 rm (tarball_path, force = true )
421- cmd = ` $(exe7z ()) a -si -tgzip -mx9 $tarball_path `
515+ # Choose compression based on file extension (case-insensitive)
516+ tarball_lower = lowercase (tarball_path)
517+ if endswith (tarball_lower, " .zst" ) || endswith (tarball_lower, " .tar.zst" )
518+ # Use zstd compression (level 19 for good compression)
519+ cmd = ` $(exezstd ()) -19 -c -T -o $tarball_path `
520+ else
521+ # Use gzip compression (default)
522+ cmd = ` $(exe7z ()) a -si -tgzip -mx9 $tarball_path `
523+ end
422524 return open (pipeline (cmd, stdout = devnull , stderr = io), write = true ) do io
423525 Tar. create (src_dir, io)
424526 end
@@ -497,7 +599,7 @@ function download_verify_unpack(
497599
498600 # If extension of url contains a recognized extension, use it, otherwise use ".gz"
499601 ext = url_ext (url)
500- if ! (ext in [" tar" , " gz" , " tgz" , " bz2" , " xz" ])
602+ if ! (ext in [" tar" , " gz" , " tgz" , " bz2" , " xz" , " zst " ])
501603 ext = " gz"
502604 end
503605
@@ -538,7 +640,7 @@ function download_verify_unpack(
538640 @info (" Unpacking $(tarball_path) into $(dest) ..." )
539641 end
540642 isnothing (progress) || progress (10000 , 10000 ; status = " unpacking" )
541- open (` $( exe7z ()) x $ tarball_path -so ` ) do io
643+ open (get_extract_cmd ( tarball_path) ) do io
542644 Tar. extract (io, dest, copy_symlinks = copy_symlinks ())
543645 end
544646 finally
@@ -685,12 +787,12 @@ function verify(
685787end
686788
687789# Verify the git-tree-sha1 hash of a compressed archive.
688- function verify_archive_tree_hash (tar_gz :: AbstractString , expected_hash:: Base.SHA1 )
790+ function verify_archive_tree_hash (compressed_tar :: AbstractString , expected_hash:: Base.SHA1 )
689791 # This can fail because unlike sha256 verification of the downloaded
690792 # tarball, tree hash verification requires that the file can i) be
691793 # decompressed and ii) is a proper archive.
692794 calc_hash = try
693- Base. SHA1 (open (Tar. tree_hash, ` $( exe7z ()) x $tar_gz -so ` ))
795+ Base. SHA1 (open (Tar. tree_hash, get_extract_cmd (compressed_tar) ))
694796 catch err
695797 @warn " unable to decompress and read archive" exception = err
696798 return false
0 commit comments