Skip to content

Commit 80fe2d9

Browse files
willstrantoncopybara-github
authored andcommitted
Add support for decompressing bz2, gz, xz, zst
Bazel already supports decompressing tar files compressed with bz2, gz, xz, and zst. This change adds support for decompressing individual files (not in a tar bundle) that are compressed with those compression algorithms. In other words, support for decompression: before: .tar.bz2, .tar.gz, .tar.xz, .tar.zst after : .tar.bz2, .bz2, .tar.gz, .gz, .tar.xz, .xz, .tar.zst, .zst Fixes: #20125 Closes #27413. PiperOrigin-RevId: 839740791 Change-Id: I82595436f6ecab23374db7c50a5027a4bb279578
1 parent 6e5b6d7 commit 80fe2d9

File tree

13 files changed

+655
-28
lines changed

13 files changed

+655
-28
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright 2025 The Bazel Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package com.google.devtools.build.lib.bazel.repository.decompressor;
16+
17+
import com.google.devtools.build.lib.bazel.repository.decompressor.DecompressorValue.Decompressor;
18+
import java.io.BufferedInputStream;
19+
import java.io.IOException;
20+
import java.io.InputStream;
21+
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
22+
import org.apache.commons.compress.compressors.bzip2.BZip2Utils;
23+
24+
/** Decompresses a bzip2 compressed file. */
25+
public class Bz2Function extends CompressedFunction {
26+
public static final Decompressor INSTANCE = new Bz2Function();
27+
28+
@Override
29+
protected InputStream getDecompressorStream(BufferedInputStream compressedInputStream)
30+
throws IOException {
31+
return new BZip2CompressorInputStream(compressedInputStream, true);
32+
}
33+
34+
@Override
35+
protected String getUncompressedFileName(InputStream in, String compressedFileName) {
36+
return BZip2Utils.getUncompressedFileName(compressedFileName);
37+
}
38+
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Copyright 2025 The Bazel Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package com.google.devtools.build.lib.bazel.repository.decompressor;
16+
17+
import com.google.common.collect.ImmutableMap;
18+
import com.google.devtools.build.lib.bazel.repository.decompressor.DecompressorValue.Decompressor;
19+
import com.google.devtools.build.lib.vfs.Path;
20+
import java.io.BufferedInputStream;
21+
import java.io.IOException;
22+
import java.io.InputStream;
23+
import java.io.OutputStream;
24+
25+
/**
26+
* Common code for decompressing a single compressed file (compressor formats).
27+
*
28+
* <p>Apache Commons Compress calls all formats that compress a single stream of data compressor
29+
* formats while all formats that collect multiple entries inside a single (potentially compressed)
30+
* archive are archiver formats. This class handles the former, compressor formats.
31+
*
32+
* <p>It ignores the {@link DecompressorDescriptor#prefix()} setting because compressed files cannot
33+
* contain directories.
34+
*/
35+
public abstract class CompressedFunction implements Decompressor {
36+
37+
protected abstract InputStream getDecompressorStream(BufferedInputStream compressedInputStream)
38+
throws IOException;
39+
40+
/**
41+
* Returns the uncompressed file name (eg. file.gz -> file). Some compressors have metadata that
42+
* stores the original name. If that's the case, the original name is used (eg. file.gz ->
43+
* originalName). Only a basename + ext should be passed in for the compressedFileName.
44+
*/
45+
protected abstract String getUncompressedFileName(
46+
InputStream in, final String compressedFileName);
47+
48+
/**
49+
* Set custom file attributes, like last modified time, on the extracted file. Only certain
50+
* compressors support this.
51+
*/
52+
protected void setFileAttributes(InputStream in, Path uncompressedFile) throws IOException {}
53+
54+
// This is the same value as picked for .tar files, which appears to have worked well.
55+
private static final int BUFFER_SIZE = 32 * 1024;
56+
57+
@Override
58+
public Path decompress(DecompressorDescriptor descriptor)
59+
throws InterruptedException, IOException {
60+
if (Thread.interrupted()) {
61+
throw new InterruptedException();
62+
}
63+
64+
ImmutableMap<String, String> renameFiles = descriptor.renameFiles();
65+
try (InputStream decompressorStream =
66+
getDecompressorStream(
67+
new BufferedInputStream(descriptor.archivePath().getInputStream(), BUFFER_SIZE))) {
68+
String entryName =
69+
getUncompressedFileName(decompressorStream, descriptor.archivePath().getBaseName());
70+
entryName = renameFiles.getOrDefault(entryName, entryName);
71+
Path filePath = descriptor.destinationPath().getRelative(entryName);
72+
filePath.getParentDirectory().createDirectoryAndParents();
73+
try (OutputStream out = filePath.getOutputStream()) {
74+
decompressorStream.transferTo(out);
75+
}
76+
setFileAttributes(decompressorStream, filePath);
77+
if (Thread.interrupted()) {
78+
throw new InterruptedException();
79+
}
80+
}
81+
return descriptor.destinationPath();
82+
}
83+
}

src/main/java/com/google/devtools/build/lib/bazel/repository/decompressor/DecompressorValue.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,20 @@ static Decompressor getDecompressor(Path archivePath) throws RepositoryFunctionE
104104
return TarFunction.INSTANCE;
105105
} else if (baseName.endsWith(".tar.gz") || baseName.endsWith(".tgz")) {
106106
return TarGzFunction.INSTANCE;
107+
} else if (baseName.endsWith(".gz")) { // Must be after .tar.gz.
108+
return GzFunction.INSTANCE;
107109
} else if (baseName.endsWith(".tar.xz") || baseName.endsWith(".txz")) {
108110
return TarXzFunction.INSTANCE;
111+
} else if (baseName.endsWith(".xz")) { // Must be after .tar.xz.
112+
return XzFunction.INSTANCE;
109113
} else if (baseName.endsWith(".tar.zst") || baseName.endsWith(".tzst")) {
110114
return TarZstFunction.INSTANCE;
115+
} else if (baseName.endsWith(".zst")) { // Must be after .tar.zst.
116+
return ZstFunction.INSTANCE;
111117
} else if (baseName.endsWith(".tar.bz2") || baseName.endsWith(".tbz")) {
112118
return TarBz2Function.INSTANCE;
119+
} else if (baseName.endsWith(".bz2")) { // Must be after .tar.bz2.
120+
return Bz2Function.INSTANCE;
113121
} else if (baseName.endsWith(".ar") || baseName.endsWith(".deb")) {
114122
return ArFunction.INSTANCE;
115123
} else if (baseName.endsWith(".7z")) {
@@ -118,7 +126,8 @@ static Decompressor getDecompressor(Path archivePath) throws RepositoryFunctionE
118126
throw new RepositoryFunctionException(
119127
Starlark.errorf(
120128
"Expected a file with a .zip, .jar, .war, .aar, .nupkg, .whl, .tar, .tar.gz, .tgz,"
121-
+ " .tar.xz, , .tar.zst, .tzst, .tar.bz2, .tbz, .ar, .deb or .7z suffix (got %s)",
129+
+ " .gz, .tar.xz, .txz, .xz, .tar.zst, .tzst, .zst, .tar.bz2, .tbz, .bz2, .ar,"
130+
+ " .deb or .7z suffix (got %s)",
122131
archivePath),
123132
Transience.PERSISTENT);
124133
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright 2025 The Bazel Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package com.google.devtools.build.lib.bazel.repository.decompressor;
16+
17+
import com.google.devtools.build.lib.bazel.repository.decompressor.DecompressorValue.Decompressor;
18+
import com.google.devtools.build.lib.vfs.Path;
19+
import com.google.devtools.build.lib.vfs.PathFragment;
20+
import java.io.BufferedInputStream;
21+
import java.io.IOException;
22+
import java.io.InputStream;
23+
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
24+
import org.apache.commons.compress.compressors.gzip.GzipParameters;
25+
import org.apache.commons.compress.compressors.gzip.GzipUtils;
26+
27+
/** Decompresses a gzip compressed file. */
28+
public class GzFunction extends CompressedFunction {
29+
public static final Decompressor INSTANCE = new GzFunction();
30+
31+
@Override
32+
protected InputStream getDecompressorStream(BufferedInputStream compressedInputStream)
33+
throws IOException {
34+
return new GzipCompressorInputStream(compressedInputStream, true);
35+
}
36+
37+
@Override
38+
protected String getUncompressedFileName(InputStream in, String compressedFileName) {
39+
String fileName = ((GzipCompressorInputStream) in).getMetaData().getFileName();
40+
if (fileName != null && !fileName.isBlank()) {
41+
// filename should be the simple basename + ext, but convert to a PathFragment and run
42+
// getBaseName to ensure that any path separators and uplevel references are dropped.
43+
return PathFragment.create(fileName).getBaseName();
44+
}
45+
return GzipUtils.getUncompressedFileName(compressedFileName);
46+
}
47+
48+
@Override
49+
protected void setFileAttributes(InputStream in, Path uncompressedFile) throws IOException {
50+
GzipParameters metaData = ((GzipCompressorInputStream) in).getMetaData();
51+
uncompressedFile.setLastModifiedTime(metaData.getModificationTime() * 1000);
52+
}
53+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Copyright 2025 The Bazel Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package com.google.devtools.build.lib.bazel.repository.decompressor;
16+
17+
import com.google.devtools.build.lib.bazel.repository.decompressor.DecompressorValue.Decompressor;
18+
import java.io.BufferedInputStream;
19+
import java.io.IOException;
20+
import java.io.InputStream;
21+
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
22+
import org.apache.commons.compress.compressors.xz.XZUtils;
23+
24+
/** Decompresses an xz (LZMA) compressed file. */
25+
public class XzFunction extends CompressedFunction {
26+
public static final Decompressor INSTANCE = new XzFunction();
27+
28+
/**
29+
* Uses {@link XZCompressorInputStream} from Apache Commons Compress to decompress.
30+
*
31+
* <p>Why not use {@link org.tukaani.xz.XZInputStream} which is used in {@link TarXzFunction}? The
32+
* Apache Commons Compress libraries are wrappers around org.tukaani.xz.XZInputStream, so they
33+
* should be the same. Since we also use {@link
34+
* org.apache.commons.compress.compressors.xz.XZUtils}, we keep consistency and use the Apache
35+
* wrapper consistently in this class.
36+
*
37+
* @see <a
38+
* href="https://commons.apache.org/proper/commons-compress/apidocs/org/apache/commons/compress/compressors/xz/package-summary.html">javadoc</a>
39+
*/
40+
@Override
41+
protected InputStream getDecompressorStream(BufferedInputStream compressedInputStream)
42+
throws IOException {
43+
return new XZCompressorInputStream(compressedInputStream);
44+
}
45+
46+
@Override
47+
protected String getUncompressedFileName(InputStream in, String compressedFileName) {
48+
return XZUtils.getUncompressedFileName(compressedFileName);
49+
}
50+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Copyright 2025 The Bazel Authors. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package com.google.devtools.build.lib.bazel.repository.decompressor;
16+
17+
import com.github.luben.zstd.ZstdInputStreamNoFinalizer;
18+
import com.google.common.collect.ImmutableMap;
19+
import com.google.devtools.build.lib.bazel.repository.decompressor.DecompressorValue.Decompressor;
20+
import java.io.BufferedInputStream;
21+
import java.io.IOException;
22+
import java.io.InputStream;
23+
import org.apache.commons.compress.compressors.FileNameUtil;
24+
25+
/** Decompresses a Zstandard compressed file. */
26+
public class ZstFunction extends CompressedFunction {
27+
public static final Decompressor INSTANCE = new ZstFunction();
28+
// Apache Commons Compress does not provide a readily available mapping of compressed ->
29+
// uncompressed filenames for Zst, so we make our own.
30+
static final FileNameUtil fileNameUtil = new FileNameUtil(ImmutableMap.of(".zst", ""), ".zst");
31+
32+
@Override
33+
protected InputStream getDecompressorStream(BufferedInputStream compressedInputStream)
34+
throws IOException {
35+
return new ZstdInputStreamNoFinalizer(compressedInputStream);
36+
}
37+
38+
@Override
39+
protected String getUncompressedFileName(InputStream in, String compressedFileName) {
40+
return fileNameUtil.getUncompressedFileName(compressedFileName);
41+
}
42+
}

src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -912,8 +912,8 @@ public Object download(
912912
The archive type of the downloaded file. By default, the archive type is \
913913
determined from the file extension of the URL. If the file has no \
914914
extension, you can explicitly specify either "zip", "jar", "war", \
915-
"aar", "nupkg", "whl", "tar", "tar.gz", "tgz", "tar.xz", "txz", ".tar.zst", \
916-
".tzst", "tar.bz2", ".tbz", ".ar", ".deb", or ".7z" here.
915+
"aar", "nupkg", "whl", "tar", "tar.gz", "tgz", "gz", "tar.xz", "txz", "xz", "tar.zst", \
916+
"tzst", "zst", "tar.bz2", "tbz", "bz2", "ar", "deb", or "7z" here.
917917
"""),
918918
@Param(
919919
name = "strip_prefix",

src/test/java/com/google/devtools/build/lib/bazel/repository/decompressor/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ java_library(
4040
"//third_party:junit4",
4141
"//third_party:truth",
4242
"@rules_java//java/runfiles",
43+
"@zstd-jni",
4344
],
4445
)
4546

0 commit comments

Comments
 (0)