Skip to content

Commit 18683f9

Browse files
author
ripley
committed
Add zstd support to tar() and untar()
Other tar-related updates. git-svn-id: https://svn.r-project.org/R/trunk@87605 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent e539753 commit 18683f9

File tree

6 files changed

+140
-54
lines changed

6 files changed

+140
-54
lines changed

doc/NEWS.Rd

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,10 @@
172172
\item New function \code{use()} to use packages in R scripts with
173173
full control over what gets added to the search path. (Actually
174174
already available since \R 4.4.0.)
175+
176+
\item There is some support for \command{zstd} compression of
177+
tarballs in \code{tar()} and \code{untar()}. (This depends on OS
178+
support of \code{libzstd} or by \command{tar}.)
175179
}
176180
}
177181
@@ -358,8 +362,11 @@
358362
359363
\item \code{R CMD check} with a true value for environment variable
360364
\env{_R_CHECK_BASHISMS_} checks more thoroughly, including for
361-
\command{bash} scripts and components of
365+
\command{bash} scripts and bashisms in components of
362366
\command{autoconf}-generated \command{configure} scripts.
367+
368+
\item \code{R CMD build} now supports \option{--compression =
369+
zstd} on platforms with sufficient support for \command{zstd}.
363370
}
364371
}
365372

src/library/tools/R/build.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ inRbuildignore <- function(files, pkgdir) {
153153
' "no" (default), "qpdf", "gs", "gs+qpdf", "both"',
154154
" --compact-vignettes same as --compact-vignettes=qpdf",
155155
" --compression= type of compression to be used on tarball:",
156-
' "gzip" (default), "none", "bzip2", "xz"',
156+
' "gzip" (default), "none", "bzip2", "xz", "zstd"',
157157
" --md5 add MD5 sums",
158158
" --log log to file 'pkg-00build.log' when processing ",
159159
" the pkgdir with basename 'pkg'",
@@ -930,7 +930,7 @@ inRbuildignore <- function(files, pkgdir) {
930930
install_dependencies <- "most"
931931
} else if (substr(a, 1, 14) == "--compression=") {
932932
compression <- match.arg(substr(a, 15, 1000),
933-
c("none", "gzip", "bzip2", "xz"))
933+
c("none", "gzip", "bzip2", "xz", "zstd"))
934934
} else if (substr(a, 1, 7) == "--user=") {
935935
user <- substr(a, 8, 64)
936936
} else if (startsWith(a, "-")) {
@@ -1206,7 +1206,8 @@ inRbuildignore <- function(files, pkgdir) {
12061206

12071207
## Finalize
12081208
ext <- switch(compression,
1209-
"none"="", "gzip"= ".gz", "bzip2" = ".bz2", "xz" = ".xz")
1209+
"none"="", "gzip"= ".gz", "bzip2" = ".bz2",
1210+
"xz" = ".xz", "zstd" = ".zst")
12101211
filename <- paste0(pkgname, "_", desc["Version"], ".tar", ext)
12111212
filepath <- file.path(startdir, filename)
12121213
## NB: ../../../../tests/reg-packages.R relies on this exact format!

src/library/utils/R/tar.R

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# File src/library/utils/R/tar.R
22
# Part of the R package, https://www.R-project.org
33
#
4-
# Copyright (C) 1995-2023 The R Core Team
4+
# Copyright (C) 1995-2025 The R Core Team
55
#
66
# This program is free software; you can redistribute it and/or modify
77
# it under the terms of the GNU General Public License as published by
@@ -50,8 +50,9 @@ untar <- function(tarfile, files = NULL, list = FALSE, exdir = ".",
5050
if (!missing(compressed))
5151
warning("untar(compressed=) is deprecated", call. = FALSE, domain = NA)
5252
if (is.character(compressed)) {
53-
cflag <- switch(match.arg(compressed, c("gzip", "bzip2", "xz")),
54-
"gzip" = "z", "bzip2" = "j", "xz" = "J")
53+
cflag <- switch(match.arg(compressed, c("gzip", "bzip2", "xz", "zstd")),
54+
"gzip" = "z", "bzip2" = "j", "xz" = "J",
55+
"zstd" = "-zstd")
5556
} else if (is.logical(compressed)) {
5657
if (is.na(compressed) && support_old_tars) {
5758
magic <- readBin(tarfile, "raw", n = 6L)
@@ -60,6 +61,7 @@ untar <- function(tarfile, files = NULL, list = FALSE, exdir = ".",
6061
else if(rawToChar(magic[1:3]) == "BZh") cflag <- "j"
6162
## (https://tukaani.org/xz/xz-file-format.txt)
6263
else if(all(magic[1:6] == c(0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00))) cflag <- "J"
64+
else if(all(magic[1:4] == c(0x28, 0xb5, 0x2f, 0xfd))) cflag <- "-zstd"
6365
} else if (isTRUE(compressed)) cflag <- "z"
6466
} else stop("'compressed' must be logical or character")
6567

@@ -82,6 +84,12 @@ untar <- function(tarfile, files = NULL, list = FALSE, exdir = ".",
8284
tarfile <- "-"
8385
cflag <- ""
8486
} else stop(sprintf("No %s command found", sQuote("xz")))
87+
if (cflag == "-zstd")
88+
if (nzchar(Sys.which("zstd"))) {
89+
TAR <- paste("zstd -dc", shQuote(tarfile), "|", TAR)
90+
tarfile <- "-"
91+
cflag <- ""
92+
} else stop(sprintf("No %s command found", sQuote("zstd")))
8593
}
8694

8795
if (list) {
@@ -368,7 +376,7 @@ untar2 <- function(tarfile, files = NULL, list = FALSE, exdir = ".",
368376
}
369377

370378
tar <- function(tarfile, files = NULL,
371-
compression = c("none", "gzip", "bzip2", "xz"),
379+
compression = c("none", "gzip", "bzip2", "xz", "zstd"),
372380
compression_level = 6, tar = Sys.getenv("tar"),
373381
extra_flags = "")
374382
{
@@ -391,7 +399,8 @@ tar <- function(tarfile, files = NULL,
391399
"none" = "-cf",
392400
"gzip" = "-zcf",
393401
"bzip2" = "-jcf",
394-
"xz" = "-Jcf")
402+
"xz" = "-Jcf",
403+
"zstd" = "--zstd -cf")
395404

396405
if (grepl("darwin", R.version$os)) {
397406
## Precaution for macOS to omit resource forks
@@ -430,7 +439,8 @@ tar <- function(tarfile, files = NULL,
430439
"none" = file(tarfile, "wb"),
431440
"gzip" = gzfile(tarfile, "wb", compression = compression_level),
432441
"bzip2" = bzfile(tarfile, "wb", compression = compression_level),
433-
"xz" = xzfile(tarfile, "wb", compression = compression_level))
442+
"xz" = xzfile(tarfile, "wb", compression = compression_level),
443+
"zstd" = zstdfile(tarfile, "wb", compression = compression_level))
434444
on.exit(close(con))
435445
} else if(inherits(tarfile, "connection")) con <- tarfile
436446
else stop("'tarfile' must be a character string or a connection")

src/library/utils/man/tar.Rd

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
}
1414
\usage{
1515
tar(tarfile, files = NULL,
16-
compression = c("none", "gzip", "bzip2", "xz"),
16+
compression = c("none", "gzip", "bzip2", "xz", "zstd"),
1717
compression_level = 6, tar = Sys.getenv("tar"),
1818
extra_flags = "")
1919
}
@@ -30,7 +30,8 @@ tar(tarfile, files = NULL,
3030
be used (default none). Can be abbreviated.}
3131

3232
\item{compression_level}{integer: the level of compression. Only used
33-
for the internal method.}
33+
for the internal method: see the help for \code{\link{gzfile}} for
34+
possible values.}
3435

3536
\item{tar}{character string: the path to the command to be used. If
3637
the command itself contains spaces it needs to be quoted (e.g., by
@@ -66,7 +67,7 @@ tar(tarfile, files = NULL,
6667
For GNU \command{tar},
6768
\option{--format=ustar} forces a more portable format. (The default is
6869
set at compilation and will be shown at the end of the output from
69-
\command{tar --help}: for version 1.34 \sQuote{out-of-the-box} it is
70+
\command{tar --help}: for version 1.35 \sQuote{out-of-the-box} it is
7071
\option{--format=gnu}, but the manual says the intention is to change
7172
%% https://www.gnu.org/software/tar/manual/tar.html#Formats
7273
to \option{--format=posix} which is the same as \code{pax} --
@@ -78,7 +79,7 @@ tar(tarfile, files = NULL,
7879

7980
%% This uses -T, not supported by Solaris nor Heirloom Toolchest.
8081
One issue which can cause an external command to fail is a command
81-
line too long for the system shell: as from \R 3.5.0 this is worked
82+
line too long for the system shell: this is worked
8283
around if the external command is detected to be GNU \command{tar} or
8384
\I{libarchive} \command{tar} (aka \command{bsdtar}).
8485

@@ -169,25 +170,35 @@ tar(tarfile, files = NULL,
169170
\section{Compression}{
170171
When an external \command{tar} command is used, compressing the tar
171172
archive requires that \command{tar} supports the \option{-z},
172-
\option{-j} or \option{-J} flag, and may require the appropriate
173-
command (\command{gzip}, \command{bzip2} or \command{xz}) to be
174-
available. For GNU \command{tar}, further compression programs can be
175-
specified by e.g.\sspace{}\code{extra_flags = "-I lz4"} or
176-
\code{"--zstd"}, \code{"--lzip"} or \code{"--lzop"} in argument
177-
\code{extra_flags}. Some versions of \command{bsdtar} accept options
178-
such as \option{--zstd}, \option{--lz4}, \option{--lzop} and
179-
\option{--lrzip} or an external compressor \emph{via}
180-
\option{--use-compress-program lz4}: these could be supplied in
181-
\code{extra_flags}.
173+
\option{-j}, \option{-J} or \option{--zstd}flag, and may require the
174+
appropriate command (\command{gzip}, \command{bzip2} \command{xz} or
175+
\command{zstd}) to be available. For GNU \command{tar}, further
176+
compression programs can be specified by
177+
e.g.\sspace{}\code{extra_flags = "-I lz4"} or \code{"--lzip"} or
178+
\code{"--lzop"} in argument \code{extra_flags}. Some versions of
179+
\command{bsdtar} accept options such as \option{--lz4},
180+
\option{--lzop} and \option{--lrzip} or an external compressor
181+
\emph{via} \option{--use-compress-program lz4}: these could be
182+
supplied in \code{extra_flags}.
182183

183184
\I{NetBSD} prior to 8.0 used flag \option{--xz} rather than \option{-J},
184185
so this should be used \emph{via} \code{extra_flags = "--xz"} rather
185186
than \code{compression = "xz"}. The commands from \I{OpenBSD} and the
186-
\I{Heirloom Toolchest} are not documented to support \command{xz}.
187+
\I{Heirloom Toolchest} are not documented to support \command{xz} nor
188+
\command{zstd}.
189+
190+
The \command{tar} program in recent macOS (e.g.\sspace{}15.2) does
191+
support \command{zstd} compression.\emph{via} an
192+
external command, but Apple does not supply one.
187193

188194
The \command{tar} programs in commercial Unixen such as \I{AIX} and
189195
Solaris do not support compression.
190196

197+
GNU \command{tar} added support in version 1.22 for \command{xz}
198+
compression and in version 1.31 for \command{zstd} compression.
199+
\command{bsdtar} added support for \command{xz} in 2019 and for
200+
\command{zstd} in 2020.
201+
191202
Neither the internal or the known external \command{tar} commands
192203
support parallel compression --- but this function can be used to write
193204
an uncompressed tarball which can then be compressed in parallel, for

src/library/utils/man/untar.Rd

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
% File src/library/utils/man/untar.Rd
22
% Part of the R package, https://www.R-project.org
3-
% Copyright 2009-2019 R Core Team
3+
% Copyright 2009-2025 R Core Team
44
% Distributed under GPL 2 or later
55

66
\name{untar}
@@ -40,13 +40,14 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",
4040

4141
\item{compressed}{(Deprecated in favour of auto-detection, used only
4242
for an external \command{tar} command.) Logical or character
43-
string. Values \code{"gzip"}, \code{"bzip2"} and \code{"xz"} select
44-
that form of compression (and may be abbreviated to the first
45-
letter). \code{TRUE} indicates \command{gzip} compression,
46-
\code{FALSE} no known compression, and \code{NA} (the default)
47-
indicates that the type is to be inferred from the file header.
48-
49-
The external command may ignore the selected compression type but
43+
string. Values \code{"gzip"}, \code{"bzip2"}, \code{"xz"} and
44+
\code{"zstd"} select that form of compression (and may be
45+
abbreviated to the first letter). \code{TRUE} indicates
46+
\command{gzip} compression, \code{FALSE} no known compression, and
47+
\code{NA} (the default) indicates that the type is to be inferred
48+
from the file header.
49+
50+
The external command may ignore the selected compression type and
5051
detect a type automagically.
5152
}
5253

@@ -85,9 +86,9 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",
8586
}
8687

8788
\item{tar}{character string: the path to the command to be used or
88-
\code{"internal"}. If the command itself contains spaces it needs
89-
to be quoted -- but \code{tar} can also contain flags separated from
90-
the command by spaces.}
89+
\code{"internal"} or \code{""}. If the command itself contains
90+
spaces it needs to be quoted -- but \code{tar} can also contain
91+
flags separated from the command by spaces.}
9192
}
9293

9394
\details{
@@ -107,16 +108,24 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",
107108
\item{GNU tar:}{Modern GNU \command{tar} versions support
108109
compressed archives and since 1.15 are able to detect the type of
109110
compression automatically: version 1.22 added support for
110-
\command{xz} compression.
111+
\command{xz} compression and version 1.31 for \command{zstd}
112+
compression.
111113

112114
On a Unix-alike, \command{configure} will set environment variable
113115
\env{TAR}, preferring GNU tar if found.}
114116

115117
%% bsdtar had it in FreeBSB 5.3 (2004)
116118
\item{\code{bsdtar}:}{macOS 10.6 and later (and FreeBSD and some
117-
other OSes) have a \command{tar}
118-
from the \I{libarchive} project which detects all three forms
119-
of compression automagically (even if undocumented in macOS).}
119+
other OSes) have a \command{tar} from the \I{libarchive} project
120+
which detects known-to-it forms of compression automagically.
121+
However, this may rely on an external command being available: macOS
122+
has a tar which knows about \code{zstd} compression, but relies
123+
on a \command{zstd} command which it does not supply.
124+
125+
This added support for \command{xz} in 2019 and for \command{zstd}
126+
in 2020 (if the appropriate library or external program is
127+
available).
128+
}
120129

121130
\item{NetBSD:}{It is undocumented if \I{NetBSD}'s \command{tar} can
122131
detect compression automagically: for versions before 8 the flag
@@ -132,22 +141,23 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",
132141

133142
\item{Heirloom Toolchest:}{This \command{tar} does automagically
134143
detect \command{gzip} and \command{bzip2} compression (undocumented)
135-
but has no support for \command{xz} compression.}
144+
but had no support for \command{xz} nor \command{zstd} compression.}
136145

137146
\item{Older support:}{Environment variable \env{R_GZIPCMD} gives the
138147
command to decompress \command{gzip} files, and
139148
\env{R_BZIPCMD} for \command{bzip2} files. (On Unix-alikes
140-
these are set at installation if found.) \command{xz} is used if
141-
available: if not decompression is expected to fail.}
149+
these are set at installation if found.) An external program called
150+
\command{xz} or \command{zstd} is used if available: if not
151+
decompression is expected to fail.}
142152
}
143153

144154
Arguments \code{compressed}, \code{extras} and \code{verbose} are only
145155
used when an external \command{tar} is used.
146156

147157
Some external \command{tar} commands will detect some of
148-
\command{lrzip}, \command{lzma}, \command{lz4}, \command{lzop} and
149-
\command{zstd} compression in addition to \command{gzip},
150-
\command{bzip2} and \command{xz}. (For some external \command{tar}
158+
\command{lrzip}, \command{lzma}, \command{lz4} and \command{lzop}
159+
compression in addition to \command{gzip}, \command{bzip2},
160+
\command{xz} and \command{zstd}. (For some external \command{tar}
151161
commands, compressed tarfiles can only be read if the appropriate
152162
utility program is available.) For GNU \command{tar}, further
153163
(de)compression programs can be specified by e.g.\sspace{}\code{extras
@@ -162,14 +172,14 @@ untar(tarfile, files = NULL, list = FALSE, exdir = ".",
162172
linking operation fails (as it may on a FAT file system), a file copy
163173
is tried. Since it uses \code{\link{gzfile}} to read a file it can
164174
handle files compressed by any of the methods that function can
165-
handle: at least \command{compress}, \command{gzip}, \command{bzip2}
166-
and \command{xz} compression, and some types of \command{lzma}
167-
compression. It does not guard against restoring absolute file paths,
168-
as some \command{tar} implementations do. It will create the parent
169-
directories for directories or files in the archive if necessary. It
170-
handles the \I{USTAR}/POSIX, GNU and \command{pax} ways of handling file
171-
paths of more than 100 bytes, and the GNU way of handling link targets
172-
of more than 100 bytes.
175+
handle: at least \command{compress}, \command{gzip}, \command{bzip2},
176+
\command{xz} and \command{zstd} compression, and some types of
177+
\command{lzma} compression. It does not guard against restoring
178+
absolute file paths, as some \command{tar} implementations do. It
179+
will create the parent directories for directories or files in the
180+
archive if necessary. It handles the \I{USTAR}/POSIX, GNU and
181+
\command{pax} ways of handling file paths of more than 100 bytes, and
182+
the GNU way of handling link targets of more than 100 bytes.
173183

174184
You may see warnings from the internal implementation such
175185
as \preformatted{ unsupported entry type 'x'}

src/library/utils/tests/tar.R

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
### tests of compressiin on un(tar)
2+
3+
options(warn = 1)
4+
old <- getwd()
5+
6+
for(f in c("none", "gzip", "bzip2", "xz", "zstd"))
7+
{
8+
setwd(R.home('library'))
9+
z <- if (f=="none") "utils.tar" else paste0("utils.tar.", f)
10+
zz <- file.path(old, z)
11+
message("making ", z)
12+
## zstd support is optional
13+
y <- try(tar(zz, "utils", f))
14+
if(inherits(y, "try-error")) next
15+
print(file.size(zz))
16+
setwd(old)
17+
print(head(untar(zz, list = TRUE, tar = "internal")))
18+
untar(zz, tar = "internal")
19+
}
20+
21+
## Now try external untar
22+
for(f in c("none", "gzip", "bzip2", "xz", "zstd"))
23+
{
24+
z <- if (f=="none") "utils.tar" else paste0("utils.tar.", f)
25+
if (!file.exists(z)) next
26+
message("unpacking ", z)
27+
y <- untar(z)
28+
if(inherits(y, "try-error")) next
29+
print(head(dir("utils"), 5))
30+
}
31+
32+
## and external tar
33+
TAR <- Sys.getenv("TAR", "tar")
34+
for(f in c("none", "gzip", "bzip2", "xz", "zstd"))
35+
{
36+
setwd(R.home('library'))
37+
z <- if (f=="none") "utils.tar" else paste0("utils.tar.", f)
38+
zz <- file.path(old, z)
39+
message("making ", z)
40+
y <- try(tar(zz, "utils", f, tar = TAR))
41+
if(inherits(y, "try-error")) next
42+
print(file.size(zz))
43+
setwd(old)
44+
}
45+
46+
unlink("utils", recursive = TRUE)
47+

0 commit comments

Comments
 (0)