From ca960ca540effd9b233ba67ef76a5365fc78c86b Mon Sep 17 00:00:00 2001 From: Tyler French Date: Fri, 23 Jan 2026 14:54:53 -0500 Subject: [PATCH] PROTOTYPE: support remote cache chunking using FastCDC --- .../google/devtools/build/lib/remote/BUILD | 4 + .../lib/remote/ChunkedBlobDownloader.java | 73 +++++ .../build/lib/remote/ChunkedBlobUploader.java | 178 +++++++++++ .../build/lib/remote/CombinedCache.java | 41 +++ .../build/lib/remote/GrpcCacheClient.java | 113 ++++++- .../lib/remote/RemoteExecutionCache.java | 1 + .../build/lib/remote/RemoteModule.java | 15 +- .../lib/remote/RemoteServerCapabilities.java | 23 ++ .../devtools/build/lib/remote/chunking/BUILD | 26 ++ .../lib/remote/chunking/ChunkingConfig.java | 87 +++++ .../lib/remote/chunking/FastCDCChunker.java | 296 ++++++++++++++++++ .../lib/remote/common/RemoteCacheClient.java | 21 ++ .../remote/logging/LoggingInterceptor.java | 4 + .../lib/remote/logging/SpliceBlobHandler.java | 44 +++ .../lib/remote/logging/SplitBlobHandler.java | 45 +++ .../lib/remote/options/RemoteOptions.java | 15 + .../build/lib/remote/util/DigestUtil.java | 12 + src/main/protobuf/BUILD | 1 + src/main/protobuf/remote_execution_log.proto | 28 ++ ...eStreamBuildEventArtifactUploaderTest.java | 3 +- .../build/lib/remote/GrpcCacheClientTest.java | 3 +- ...SpawnRunnerWithGrpcRemoteExecutorTest.java | 3 +- .../execution/v2/remote_execution.proto | 236 +++++++++++++- 23 files changed, 1264 insertions(+), 8 deletions(-) create mode 100644 src/main/java/com/google/devtools/build/lib/remote/ChunkedBlobDownloader.java create mode 100644 src/main/java/com/google/devtools/build/lib/remote/ChunkedBlobUploader.java create mode 100644 src/main/java/com/google/devtools/build/lib/remote/chunking/BUILD create mode 100644 src/main/java/com/google/devtools/build/lib/remote/chunking/ChunkingConfig.java create mode 100644 src/main/java/com/google/devtools/build/lib/remote/chunking/FastCDCChunker.java create mode 100644 src/main/java/com/google/devtools/build/lib/remote/logging/SpliceBlobHandler.java create mode 100644 src/main/java/com/google/devtools/build/lib/remote/logging/SplitBlobHandler.java diff --git a/src/main/java/com/google/devtools/build/lib/remote/BUILD b/src/main/java/com/google/devtools/build/lib/remote/BUILD index 8f048d05f72a42..5babf9605d6ae2 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/BUILD +++ b/src/main/java/com/google/devtools/build/lib/remote/BUILD @@ -28,7 +28,9 @@ java_library( srcs = glob( ["*.java"], exclude = [ + "ChunkingConfig.java", "ExecutionStatusException.java", + "FastCDCChunker.java", "ReferenceCountedChannel.java", "ChannelConnectionWithServerCapabilitiesFactory.java", "RemoteRetrier.java", @@ -53,6 +55,7 @@ java_library( ":Retrier", ":abstract_action_input_prefetcher", ":lease_service", + "//src/main/java/com/google/devtools/build/lib/concurrent:task_deduplicator", ":remote_important_output_handler", ":remote_output_checker", ":scrubber", @@ -97,6 +100,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/exec/local", "//src/main/java/com/google/devtools/build/lib/packages/semantics", "//src/main/java/com/google/devtools/build/lib/profiler", + "//src/main/java/com/google/devtools/build/lib/remote/chunking", "//src/main/java/com/google/devtools/build/lib/remote/circuitbreaker", "//src/main/java/com/google/devtools/build/lib/remote/common", "//src/main/java/com/google/devtools/build/lib/remote/common:bulk_transfer_exception", diff --git a/src/main/java/com/google/devtools/build/lib/remote/ChunkedBlobDownloader.java b/src/main/java/com/google/devtools/build/lib/remote/ChunkedBlobDownloader.java new file mode 100644 index 00000000000000..a94ec3cfdfd833 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/ChunkedBlobDownloader.java @@ -0,0 +1,73 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.remote; + +import static com.google.devtools.build.lib.remote.util.Utils.getFromFuture; +import build.bazel.remote.execution.v2.Digest; +import build.bazel.remote.execution.v2.SplitBlobResponse; +import com.google.common.flogger.GoogleLogger; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.devtools.build.lib.remote.common.CacheNotFoundException; +import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext; +import java.io.IOException; +import java.io.OutputStream; +import java.util.concurrent.ExecutionException; +import java.util.List; + +/** + * Downloads blobs by sequentially fetching chunks via the SplitBlob API. + */ +public class ChunkedBlobDownloader { + private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); + + private final GrpcCacheClient grpcCacheClient; + + public ChunkedBlobDownloader(GrpcCacheClient grpcCacheClient) { + this.grpcCacheClient = grpcCacheClient; + } + + /** + * Downloads a blob using chunked download via the SplitBlob API. This should be + * called with virtual threads. + */ + public void downloadChunked( + RemoteActionExecutionContext context, Digest blobDigest, OutputStream out) + throws CacheNotFoundException, InterruptedException { + try { + doDownloadChunked(context, blobDigest, out); + } catch (IOException e) { + logger.atWarning().withCause(e).log("Chunked download failed for %s", blobDigest.getHash()); + throw new CacheNotFoundException(blobDigest); + } + } + + private void doDownloadChunked( + RemoteActionExecutionContext context, Digest blobDigest, OutputStream out) + throws IOException, InterruptedException { + ListenableFuture splitResponseFuture = grpcCacheClient.splitBlob(context, blobDigest); + if (splitResponseFuture == null) { + throw new CacheNotFoundException(blobDigest); + } + downloadAndReassembleChunks(context, getFromFuture(splitResponseFuture).getChunkDigestsList(), out); + } + + private void downloadAndReassembleChunks( + RemoteActionExecutionContext context, List chunkDigests, OutputStream out) + throws IOException, InterruptedException { + for (Digest chunkDigest : chunkDigests) { + getFromFuture(grpcCacheClient.downloadBlob(context, chunkDigest, out)); + } + } +} diff --git a/src/main/java/com/google/devtools/build/lib/remote/ChunkedBlobUploader.java b/src/main/java/com/google/devtools/build/lib/remote/ChunkedBlobUploader.java new file mode 100644 index 00000000000000..fc47d7cb7906e1 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/ChunkedBlobUploader.java @@ -0,0 +1,178 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.remote; + +import static com.google.devtools.build.lib.remote.util.Utils.getFromFuture; +import build.bazel.remote.execution.v2.Digest; +import build.bazel.remote.execution.v2.SplitBlobResponse; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.devtools.build.lib.concurrent.TaskDeduplicator; +import com.google.devtools.build.lib.remote.chunking.ChunkingConfig; +import com.google.devtools.build.lib.remote.chunking.FastCDCChunker; +import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext; +import com.google.devtools.build.lib.remote.util.DigestUtil; +import com.google.devtools.build.lib.vfs.Path; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; + +/** + * Uploads blobs in chunks using Content-Defined Chunking with FastCDC 2020. + * + *

+ * Upload flow for blobs above threshold: + * + *

    + *
  1. Chunk file with FastCDC + *
  2. Call findMissingDigests on chunk digests + *
  3. Upload only missing chunks + *
  4. Call SpliceBlob to register the blob as the concatenation of chunks + *
+ */ +public class ChunkedBlobUploader { + private final GrpcCacheClient grpcCacheClient; + private final FastCDCChunker chunker; + private final long chunkingThreshold; + private final TaskDeduplicator chunkUploadDeduplicator = new TaskDeduplicator<>(); + + public ChunkedBlobUploader(GrpcCacheClient grpcCacheClient, DigestUtil digestUtil) { + this(grpcCacheClient, ChunkingConfig.defaults(), digestUtil); + } + + public ChunkedBlobUploader( + GrpcCacheClient grpcCacheClient, ChunkingConfig config, DigestUtil digestUtil) { + this.grpcCacheClient = grpcCacheClient; + this.chunker = new FastCDCChunker(config, digestUtil); + this.chunkingThreshold = config.chunkingThreshold(); + } + + public long getChunkingThreshold() { + return chunkingThreshold; + } + + public void uploadChunked(RemoteActionExecutionContext context, Digest blobDigest, Path file) + throws IOException, InterruptedException { + if (isAlreadyChunkedOnServer(context, blobDigest)) { + return; + } + doChunkedUpload(context, blobDigest, file); + } + + private boolean isAlreadyChunkedOnServer( + RemoteActionExecutionContext context, Digest blobDigest) throws InterruptedException { + ListenableFuture splitFuture = grpcCacheClient.splitBlob(context, blobDigest); + if (splitFuture == null) { + return false; + } + try { + SplitBlobResponse response = splitFuture.get(); + return isTrulyChunked(response, blobDigest); + } catch (ExecutionException e) { + return false; + } + } + + // TODO(https://github.com/bazelbuild/remote-apis/pull/358): should make this check unnecessary. + private static boolean isTrulyChunked(SplitBlobResponse response, Digest blobDigest) { + if (response == null || response.getChunkDigestsCount() == 0) { + return false; + } + if (response.getChunkDigestsCount() == 1 && response.getChunkDigests(0).equals(blobDigest)) { + return false; + } + return true; + } + + private void doChunkedUpload(RemoteActionExecutionContext context, Digest blobDigest, Path file) + throws IOException, InterruptedException { + List chunkDigests; + try (InputStream input = file.getInputStream()) { + chunkDigests = chunker.chunkToDigests(input); + } + if (chunkDigests.isEmpty()) { + return; + } + + ImmutableSet missingDigests; + try { + missingDigests = grpcCacheClient.findMissingDigests(context, chunkDigests).get(); + } catch (ExecutionException e) { + throw new IOException("Failed to find missing digests", e.getCause()); + } + + uploadMissingChunks(context, missingDigests, chunkDigests, file); + + try { + grpcCacheClient.spliceBlob(context, blobDigest, chunkDigests).get(); + } catch (ExecutionException e) { + throw new IOException("Failed to splice blob", e.getCause()); + } + } + + private void uploadMissingChunks( + RemoteActionExecutionContext context, + ImmutableSet missingDigests, + List chunkDigests, + Path file) + throws IOException, InterruptedException { + if (missingDigests.isEmpty()) { + return; + } + + // Rather than keeping the offsets of the chunks, + // We can just use the size from the digests of the prev + // chunks to compute just the offsets we need. + Map digestToOffset = new HashMap<>(); + long offset = 0; + for (Digest digest : chunkDigests) { + if (missingDigests.contains(digest)) { + digestToOffset.put(digest, offset); + } + offset += digest.getSizeBytes(); + } + + for (Digest chunkDigest : missingDigests) { + long chunkOffset = digestToOffset.get(chunkDigest); + getFromFuture(chunkUploadDeduplicator.executeIfNew(chunkDigest, + () -> uploadChunk(context, chunkDigest, chunkOffset, file))); + } + } + + private ListenableFuture uploadChunk(RemoteActionExecutionContext context, Digest digest, long offset, + Path file) { + try { + byte[] data = readChunkData(file, offset, (int) digest.getSizeBytes()); + return grpcCacheClient.uploadBlob(context, digest, () -> new ByteArrayInputStream(data)); + } catch (IOException e) { + return Futures.immediateFailedFuture(e); + } + + } + + private byte[] readChunkData(Path file, long offset, int length) throws IOException { + try (InputStream input = file.getInputStream()) { + input.skipNBytes(offset); + return input.readNBytes(length); + } + } +} diff --git a/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java b/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java index 26cca8fe703b5d..928a053d60645e 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/CombinedCache.java @@ -32,9 +32,12 @@ import com.google.common.flogger.GoogleLogger; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListeningExecutorService; +import com.google.common.util.concurrent.MoreExecutors; import com.google.devtools.build.lib.concurrent.ThreadSafety; import com.google.devtools.build.lib.exec.SpawnCheckingCacheEvent; import com.google.devtools.build.lib.exec.SpawnProgressEvent; +import com.google.devtools.build.lib.remote.chunking.ChunkingConfig; import com.google.devtools.build.lib.remote.common.CacheNotFoundException; import com.google.devtools.build.lib.remote.common.LazyFileOutputStream; import com.google.devtools.build.lib.remote.common.OutputDigestMismatchException; @@ -64,6 +67,7 @@ import java.util.List; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -93,6 +97,11 @@ public class CombinedCache extends AbstractReferenceCounted { private final CountDownLatch closeCountDownLatch = new CountDownLatch(1); protected final AsyncTaskCache.NoResult casUploadCache = AsyncTaskCache.NoResult.create(); + @SuppressWarnings("AllowVirtualThreads") + private final ListeningExecutorService virtualThreadExecutor = + MoreExecutors.listeningDecorator( + Executors.newThreadPerTaskExecutor(Thread.ofVirtual().name("combined-cache-", 0).factory())); + @Nullable protected final RemoteCacheClient remoteCacheClient; @Nullable protected final DiskCacheClient diskCacheClient; @Nullable protected final String symlinkTemplate; @@ -130,6 +139,15 @@ public ServerCapabilities getRemoteServerCapabilities() throws IOException { return remoteCacheClient.getServerCapabilities(); } + @Nullable + public ChunkingConfig getChunkingConfig() { + try { + return ChunkingConfig.fromServerCapabilities(getRemoteServerCapabilities()); + } catch (IOException e) { + return null; + } + } + /** * Class to keep track of which cache (disk or remote) a given [cached] ActionResult comes from. */ @@ -440,6 +458,29 @@ private ListenableFuture downloadBlobFromRemote( RemoteActionExecutionContext context, Digest digest, OutputStream out) { checkState(remoteCacheClient != null && context.getReadCachePolicy().allowRemoteCache()); + if (remoteCacheClient instanceof GrpcCacheClient grpcClient) { + ChunkedBlobDownloader chunkedDownloader = grpcClient.getChunkedDownloader(); + if (chunkedDownloader != null && digest.getSizeBytes() > grpcClient.getChunkingThreshold()) { + ListenableFuture chunkedDownloadFuture = + virtualThreadExecutor.submit(() -> { + chunkedDownloader.downloadChunked(context, digest, out); + return null; + }); + return Futures.catchingAsync( + chunkedDownloadFuture, + CacheNotFoundException.class, + (e) -> regularDownloadBlobFromRemote(context, digest, out), + directExecutor()); + } + } + + return regularDownloadBlobFromRemote(context, digest, out); + } + + private ListenableFuture regularDownloadBlobFromRemote( + RemoteActionExecutionContext context, Digest digest, OutputStream out) { + checkState(remoteCacheClient != null && context.getReadCachePolicy().allowRemoteCache()); + if (diskCacheClient != null && context.getWriteCachePolicy().allowDiskCache()) { Path tempPath = diskCacheClient.getTempPath(); LazyFileOutputStream tempOut = new LazyFileOutputStream(tempPath); diff --git a/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java b/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java index 345744ce70cc6d..b37b69a09bedef 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java +++ b/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java @@ -15,6 +15,7 @@ package com.google.devtools.build.lib.remote; import static com.google.common.base.Strings.isNullOrEmpty; +import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static com.google.devtools.build.lib.remote.util.DigestUtil.isOldStyleDigestFunction; import build.bazel.remote.execution.v2.ActionCacheGrpc; @@ -29,6 +30,9 @@ import build.bazel.remote.execution.v2.GetActionResultRequest; import build.bazel.remote.execution.v2.RequestMetadata; import build.bazel.remote.execution.v2.ServerCapabilities; +import build.bazel.remote.execution.v2.SpliceBlobRequest; +import build.bazel.remote.execution.v2.SplitBlobRequest; +import build.bazel.remote.execution.v2.SplitBlobResponse; import build.bazel.remote.execution.v2.UpdateActionResultRequest; import com.google.bytestream.ByteStreamGrpc; import com.google.bytestream.ByteStreamGrpc.ByteStreamStub; @@ -40,15 +44,18 @@ import com.google.common.base.VerifyException; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.flogger.GoogleLogger; import com.google.common.io.CountingOutputStream; import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.SettableFuture; import com.google.devtools.build.lib.authandtls.CallCredentialsProvider; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.remote.RemoteRetrier.ProgressiveBackoff; +import com.google.devtools.build.lib.remote.chunking.ChunkingConfig; import com.google.devtools.build.lib.remote.common.CacheNotFoundException; import com.google.devtools.build.lib.remote.common.MissingDigestsFinder; import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext; @@ -59,6 +66,7 @@ import com.google.devtools.build.lib.remote.util.TracingMetadataUtils; import com.google.devtools.build.lib.remote.util.Utils; import com.google.devtools.build.lib.remote.zstd.ZstdDecompressingOutputStream; +import com.google.devtools.build.lib.vfs.Path; import com.google.protobuf.ByteString; import io.grpc.Channel; import io.grpc.Status; @@ -71,6 +79,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; +import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Supplier; @@ -88,6 +97,14 @@ public class GrpcCacheClient implements RemoteCacheClient, MissingDigestsFinder private final RemoteRetrier retrier; private final ByteStreamUploader uploader; private final int maxMissingBlobsDigestsPerMessage; + @Nullable private final ChunkedBlobDownloader chunkedDownloader; + @Nullable private final ChunkedBlobUploader chunkedUploader; + @Nullable private final ChunkingConfig chunkingConfig; + + @SuppressWarnings("AllowVirtualThreads") + private final ListeningExecutorService virtualThreadExecutor = + MoreExecutors.listeningDecorator( + Executors.newThreadPerTaskExecutor(Thread.ofVirtual().name("grpc-cache-", 0).factory())); private final AtomicBoolean closed = new AtomicBoolean(); @@ -97,12 +114,14 @@ public GrpcCacheClient( CallCredentialsProvider callCredentialsProvider, RemoteOptions options, RemoteRetrier retrier, - DigestUtil digestUtil) { + DigestUtil digestUtil, + @Nullable ChunkingConfig chunkingConfig) { this.callCredentialsProvider = callCredentialsProvider; this.channel = channel; this.options = options; this.digestUtil = digestUtil; this.retrier = retrier; + this.chunkingConfig = chunkingConfig; this.uploader = new ByteStreamUploader( options.remoteInstanceName, @@ -115,6 +134,23 @@ public GrpcCacheClient( maxMissingBlobsDigestsPerMessage = computeMaxMissingBlobsDigestsPerMessage(); Preconditions.checkState( maxMissingBlobsDigestsPerMessage > 0, "Error: gRPC message size too small."); + + if (options.experimentalRemoteCacheChunking) { + ChunkingConfig config = chunkingConfig != null ? chunkingConfig : ChunkingConfig.defaults(); + this.chunkedDownloader = new ChunkedBlobDownloader(this); + this.chunkedUploader = new ChunkedBlobUploader(this, config, digestUtil); + } else { + this.chunkedDownloader = null; + this.chunkedUploader = null; + } + } + + /** Returns the chunking threshold in bytes, or default if chunking is disabled. */ + public long getChunkingThreshold() { + if (chunkedUploader != null) { + return chunkedUploader.getChunkingThreshold(); + } + return ChunkingConfig.DEFAULT_CHUNKING_THRESHOLD; } private int computeMaxMissingBlobsDigestsPerMessage() { @@ -164,6 +200,59 @@ private ActionCacheFutureStub acFutureStub( .withDeadlineAfter(options.remoteTimeout.toSeconds(), TimeUnit.SECONDS); } + @Override + public ListenableFuture spliceBlob( + RemoteActionExecutionContext context, + Digest blobDigest, + List chunkDigests) { + if (!options.experimentalRemoteCacheChunking) { + return null; + } + SpliceBlobRequest request = + SpliceBlobRequest.newBuilder() + .setInstanceName(options.remoteInstanceName) + .setBlobDigest(blobDigest) + .addAllChunkDigests(chunkDigests) + .setDigestFunction(digestUtil.getDigestFunction()) + .build(); + return Futures.transform( + Utils.refreshIfUnauthenticatedAsync( + () -> + retrier.executeAsync( + () -> + channel.withChannelFuture( + ch -> casFutureStub(context, ch).spliceBlob(request))), + callCredentialsProvider), + unused -> null, + directExecutor()); + } + + /** + * Queries the server for chunk information about a blob using the SplitBlob RPC. + * + * @return a future with the split blob response, or null if chunking is not enabled + */ + @Nullable + public ListenableFuture splitBlob( + RemoteActionExecutionContext context, Digest digest) { + if (!options.experimentalRemoteCacheChunking) { + return null; + } + SplitBlobRequest request = + SplitBlobRequest.newBuilder() + .setInstanceName(options.remoteInstanceName) + .setBlobDigest(digest) + .setDigestFunction(digestUtil.getDigestFunction()) + .build(); + return Utils.refreshIfUnauthenticatedAsync( + () -> + retrier.executeAsync( + () -> + channel.withChannelFuture( + ch -> casFutureStub(context, ch).splitBlob(request))), + callCredentialsProvider); + } + @Override public void close() { if (closed.getAndSet(true)) { @@ -172,6 +261,28 @@ public void close() { channel.release(); } + @Nullable + public ChunkedBlobDownloader getChunkedDownloader() { + return chunkedDownloader; + } + + @Nullable + public ChunkedBlobUploader getChunkedUploader() { + return chunkedUploader; + } + + @Override + public ListenableFuture uploadFile( + RemoteActionExecutionContext context, Digest digest, Path file) { + if (chunkedUploader != null && digest.getSizeBytes() > getChunkingThreshold()) { + return virtualThreadExecutor.submit(() -> { + chunkedUploader.uploadChunked(context, digest, file); + return null; + }); + } + return RemoteCacheClient.super.uploadFile(context, digest, file); + } + /** Returns true if 'options.remoteCache' uses 'grpc' or an empty scheme */ public static boolean isRemoteCacheOptions(RemoteOptions options) { if (isNullOrEmpty(options.remoteCache)) { diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionCache.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionCache.java index 8c91f00256019a..1ab55bdd95280f 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionCache.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionCache.java @@ -32,6 +32,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.flogger.GoogleLogger; +import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.devtools.build.lib.actions.cache.VirtualActionInput; import com.google.devtools.build.lib.profiler.Profiler; diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java index 61ba8004f0af44..0f21035986c2c0 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteModule.java @@ -18,6 +18,7 @@ import build.bazel.remote.execution.v2.Digest; import build.bazel.remote.execution.v2.DigestFunction; +import build.bazel.remote.execution.v2.ServerCapabilities; import com.github.benmanes.caffeine.cache.Cache; import com.google.auth.Credentials; import com.google.common.annotations.VisibleForTesting; @@ -64,6 +65,7 @@ import com.google.devtools.build.lib.remote.RemoteServerCapabilities.ServerCapabilitiesRequirement; import com.google.devtools.build.lib.remote.Retrier.ResultClassifier; import com.google.devtools.build.lib.remote.Retrier.ResultClassifier.Result; +import com.google.devtools.build.lib.remote.chunking.ChunkingConfig; import com.google.devtools.build.lib.remote.circuitbreaker.CircuitBreakerFactory; import com.google.devtools.build.lib.remote.common.RemoteCacheClient; import com.google.devtools.build.lib.remote.common.RemoteExecutionClient; @@ -714,9 +716,20 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException { } } + ChunkingConfig chunkingConfig = null; + if (remoteOptions.experimentalRemoteCacheChunking) { + try { + ServerCapabilities capabilities = cacheChannel.getServerCapabilities(); + chunkingConfig = ChunkingConfig.fromServerCapabilities(capabilities); + } catch (IOException e) { + chunkingConfig = ChunkingConfig.defaults(); + } + } + RemoteCacheClient remoteCacheClient = new GrpcCacheClient( - cacheChannel.retain(), callCredentialsProvider, remoteOptions, retrier, digestUtil); + cacheChannel.retain(), callCredentialsProvider, remoteOptions, retrier, digestUtil, + chunkingConfig); cacheChannel.release(); DiskCacheClient diskCacheClient = null; diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteServerCapabilities.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteServerCapabilities.java index 6028de5b1d4257..5ca0cf96ff4c1a 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteServerCapabilities.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteServerCapabilities.java @@ -17,6 +17,7 @@ import build.bazel.remote.execution.v2.CacheCapabilities; import build.bazel.remote.execution.v2.CapabilitiesGrpc; import build.bazel.remote.execution.v2.CapabilitiesGrpc.CapabilitiesFutureStub; +// import build.bazel.remote.execution.v2.ChunkingFunction; import build.bazel.remote.execution.v2.Compressor; import build.bazel.remote.execution.v2.DigestFunction; import build.bazel.remote.execution.v2.ExecutionCapabilities; @@ -246,6 +247,28 @@ public static ClientServerCompatibilityStatus checkClientServerCompatibility( "--remote_cache_compression requested but remote does not support compression"); } + if (remoteOptions.experimentalRemoteCacheChunking) { + if (!cacheCap.getSplitBlobSupport()) { + result.addError( + "--experimental_remote_cache_chunking requested but remote does not support" + + " SplitBlob"); + } + if (!cacheCap.getSpliceBlobSupport()) { + result.addError( + "--experimental_remote_cache_chunking requested but remote does not support" + + " SpliceBlob"); + } + // TODO(https://github.com/bazelbuild/remote-apis/pull/357): Re-enable once servers + // advertise ChunkingConfiguration with FASTCDC_2020. + // if (!cacheCap.hasChunkingConfiguration() + // || !cacheCap.getChunkingConfiguration().getSupportedChunkingAlgorithmsList() + // .contains(ChunkingFunction.Value.FASTCDC_2020)) { + // result.addError( + // "--experimental_remote_cache_chunking requested but remote does not support" + // + " FASTCDC_2020 chunking algorithm"); + // } + } + // Check result cache priority is in the supported range. checkPriorityInRange( remoteOptions.remoteResultCachePriority, diff --git a/src/main/java/com/google/devtools/build/lib/remote/chunking/BUILD b/src/main/java/com/google/devtools/build/lib/remote/chunking/BUILD new file mode 100644 index 00000000000000..f3eb55bdca3f93 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/chunking/BUILD @@ -0,0 +1,26 @@ +load("@rules_java//java:defs.bzl", "java_library") + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//src:__subpackages__"], +) + +filegroup( + name = "srcs", + srcs = glob(["*"]), + visibility = ["//src:__subpackages__"], +) + +java_library( + name = "chunking", + srcs = [ + "ChunkingConfig.java", + "FastCDCChunker.java" + ], + deps = [ + "//src/main/java/com/google/devtools/build/lib/remote/options", + "//src/main/java/com/google/devtools/build/lib/remote/util:digest_utils", + "//third_party:guava", + "@remoteapis//:build_bazel_remote_execution_v2_remote_execution_java_proto", + ], +) diff --git a/src/main/java/com/google/devtools/build/lib/remote/chunking/ChunkingConfig.java b/src/main/java/com/google/devtools/build/lib/remote/chunking/ChunkingConfig.java new file mode 100644 index 00000000000000..3d2cbf4eebafda --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/chunking/ChunkingConfig.java @@ -0,0 +1,87 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.remote.chunking; + +import build.bazel.remote.execution.v2.CacheCapabilities; +// import build.bazel.remote.execution.v2.ChunkingConfiguration; +// import build.bazel.remote.execution.v2.ChunkingConfiguration.FastCDCParams; +// import build.bazel.remote.execution.v2.ChunkingFunction; +import build.bazel.remote.execution.v2.ServerCapabilities; + +/** Configuration for content-defined chunking. All sizes are in bytes. */ +public record ChunkingConfig( + long chunkingThreshold, int avgChunkSize, int normalizationLevel, int seed) { + + public static final int DEFAULT_AVG_CHUNK_SIZE = 512 * 1024; + public static final int DEFAULT_NORMALIZATION_LEVEL = 2; + public static final int DEFAULT_SEED = 0; + public static final long DEFAULT_CHUNKING_THRESHOLD = 2 * 1024 * 1024; + + public int minChunkSize() { + return avgChunkSize / 4; + } + + public int maxChunkSize() { + return avgChunkSize * 4; + } + + public static ChunkingConfig defaults() { + return new ChunkingConfig( + DEFAULT_CHUNKING_THRESHOLD, + DEFAULT_AVG_CHUNK_SIZE, + DEFAULT_NORMALIZATION_LEVEL, + DEFAULT_SEED); + } + + public static ChunkingConfig fromServerCapabilities(ServerCapabilities capabilities) { + if (!capabilities.hasCacheCapabilities()) { + return null; + } + CacheCapabilities cacheCap = capabilities.getCacheCapabilities(); + return defaults(); + + // TODO(https://github.com/bazelbuild/remote-apis/pull/357): Enable once servers + // advertise threshold and ChunkingConfiguration with FASTCDC_2020. + + // if (!cacheCap.hasChunkingConfiguration()) { + // return defaults(); + // } + + // ChunkingConfiguration config = cacheCap.getChunkingConfiguration(); + // if (!config.getSupportedChunkingAlgorithmsList().contains(ChunkingFunction.Value.FASTCDC_2020)) { + // return null; + // } + + // long threshold = config.getChunkingThresholdBytes(); + // long threshold = threshold = DEFAULT_CHUNKING_THRESHOLD; + // int avgSize = DEFAULT_AVG_CHUNK_SIZE; + // int normalization = DEFAULT_NORMALIZATION_LEVEL; + // int seed = DEFAULT_SEED; + + // if (config.hasFastcdcParams()) { + // FastCDCParams params = config.getFastcdcParams(); + // long configAvgSize = params.getAvgChunkSizeBytes(); + // if (configAvgSize >= 1024 && configAvgSize <= 1024 * 1024) { + // avgSize = (int) configAvgSize; + // } + // int configNorm = params.getNormalizationLevel(); + // if (configNorm >= 0 && configNorm <= 3) { + // normalization = configNorm; + // } + // seed = params.getSeed(); + // } + // return new ChunkingConfig(threshold, avgSize, normalization, seed); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/remote/chunking/FastCDCChunker.java b/src/main/java/com/google/devtools/build/lib/remote/chunking/FastCDCChunker.java new file mode 100644 index 00000000000000..f1f1de51967604 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/chunking/FastCDCChunker.java @@ -0,0 +1,296 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.remote.chunking; + +import static com.google.common.base.Preconditions.checkArgument; + +import build.bazel.remote.execution.v2.Digest; +import com.google.devtools.build.lib.remote.util.DigestUtil; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +/** + * FastCDC 2020 implementation for splitting large blobs. + * + *

+ * This module implements the canonical FastCDC algorithm as described in the + * [paper](https://ieeexplore.ieee.org/document/9055082) by Wen Xia, et al., in + * 2020. + */ +public final class FastCDCChunker { + + // Masks for each of the desired number of bits, where 0 through 5 are unused. + // The values for sizes 64 bytes through 128 kilo-bytes come from the C + // reference implementation (found in the destor repository) while the extra + // values come from the restic-FastCDC repository. The FastCDC paper claims that + // the deduplication ratio is slightly improved when the mask bits are spread + // relatively evenly, hence these seemingly "magic" values. + // @formatter:off + private static final long[] MASKS = { + 0, // 0: padding + 0, // 1: padding + 0, // 2: padding + 0, // 3: padding + 0, // 4: padding + 0x0000000001804110L, // 5: unused except for NC 3 + 0x0000000001803110L, // 6: 64B + 0x0000000018035100L, // 7: 128B + 0x0000001800035300L, // 8: 256B + 0x0000019000353000L, // 9: 512B + 0x0000590003530000L, // 10: 1KB + 0x0000d90003530000L, // 11: 2KB + 0x0000d90103530000L, // 12: 4KB + 0x0000d90303530000L, // 13: 8KB + 0x0000d90313530000L, // 14: 16KB + 0x0000d90f03530000L, // 15: 32KB + 0x0000d90303537000L, // 16: 64KB + 0x0000d90703537000L, // 17: 128KB + 0x0000d90707537000L, // 18: 256KB + 0x0000d91707537000L, // 19: 512KB + 0x0000d91747537000L, // 20: 1MB + 0x0000d91767537000L, // 21: 2MB + 0x0000d93767537000L, // 22: 4MB + 0x0000d93777537000L, // 23: 8MB + 0x0000d93777577000L, // 24: 16MB + 0x0000db3777577000L, // 25: unused except for NC 3 + }; + + // GEAR contains seemingly random numbers which are created by computing the MD5 digest of values + // from 0 to 255, using only the high 8 bytes of the 16-byte digest. This is the "gear hash" + // referred to in the FastCDC paper. + private static final long[] GEAR = { + 0x3b5d3c7d207e37dcL, 0x784d68ba91123086L, 0xcd52880f882e7298L, 0xeacf8e4e19fdcca7L, + 0xc31f385dfbd1632bL, 0x1d5f27001e25abe6L, 0x83130bde3c9ad991L, 0xc4b225676e9b7649L, + 0xaa329b29e08eb499L, 0xb67fcbd21e577d58L, 0x0027baaada2acf6bL, 0xe3ef2d5ac73c2226L, + 0x0890f24d6ed312b7L, 0xa809e036851d7c7eL, 0xf0a6fe5e0013d81bL, 0x1d026304452cec14L, + 0x03864632648e248fL, 0xcdaacf3dcd92b9b4L, 0xf5e012e63c187856L, 0x8862f9d3821c00b6L, + 0xa82f7338750f6f8aL, 0x1e583dc6c1cb0b6fL, 0x7a3145b69743a7f1L, 0xabb20fee404807ebL, + 0xb14b3cfe07b83a5dL, 0xb9dc27898adb9a0fL, 0x3703f5e91baa62beL, 0xcf0bb866815f7d98L, + 0x3d9867c41ea9dcd3L, 0x1be1fa65442bf22cL, 0x14300da4c55631d9L, 0xe698e9cbc6545c99L, + 0x4763107ec64e92a5L, 0xc65821fc65696a24L, 0x76196c064822f0b7L, 0x485be841f3525e01L, + 0xf652bc9c85974ff5L, 0xcad8352face9e3e9L, 0x2a6ed1dceb35e98eL, 0xc6f483badc11680fL, + 0x3cfd8c17e9cf12f1L, 0x89b83c5e2ea56471L, 0xae665cfd24e392a9L, 0xec33c4e504cb8915L, + 0x3fb9b15fc9fe7451L, 0xd7fd1fd1945f2195L, 0x31ade0853443efd8L, 0x255efc9863e1e2d2L, + 0x10eab6008d5642cfL, 0x46f04863257ac804L, 0xa52dc42a789a27d3L, 0xdaaadf9ce77af565L, + 0x6b479cd53d87febbL, 0x6309e2d3f93db72fL, 0xc5738ffbaa1ff9d6L, 0x6bd57f3f25af7968L, + 0x67605486d90d0a4aL, 0xe14d0b9663bfbdaeL, 0xb7bbd8d816eb0414L, 0xdef8a4f16b35a116L, + 0xe7932d85aaaffed6L, 0x08161cbae90cfd48L, 0x855507beb294f08bL, 0x91234ea6ffd399b2L, + 0xad70cf4b2435f302L, 0xd289a97565bc2d27L, 0x8e558437ffca99deL, 0x96d2704b7115c040L, + 0x0889bbcdfc660e41L, 0x5e0d4e67dc92128dL, 0x72a9f8917063ed97L, 0x438b69d409e016e3L, + 0xdf4fed8a5d8a4397L, 0x00f41dcf41d403f7L, 0x4814eb038e52603fL, 0x9dafbacc58e2d651L, + 0xfe2f458e4be170afL, 0x4457ec414df6a940L, 0x06e62f1451123314L, 0xbd1014d173ba92ccL, + 0xdef318e25ed57760L, 0x9fea0de9dfca8525L, 0x459de1e76c20624bL, 0xaeec189617e2d666L, + 0x126a2c06ab5a83cbL, 0xb1321532360f6132L, 0x65421503dbb40123L, 0x2d67c287ea089ab3L, + 0x6c93bff5a56bd6b6L, 0x4ffb2036cab6d98dL, 0xce7b785b1be7ad4fL, 0xedb42ef6189fd163L, + 0xdc905288703988f6L, 0x365f9c1d2c691884L, 0xc640583680d99bfeL, 0x3cd4624c07593ec6L, + 0x7f1ea8d85d7c5805L, 0x014842d480b57149L, 0x0b649bcb5a828688L, 0xbcd5708ed79b18f0L, + 0xe987c862fbd2f2f0L, 0x982731671f0cd82cL, 0xbaf13e8b16d8c063L, 0x8ea3109cbd951bbaL, + 0xd141045bfb385cadL, 0x2acbc1a0af1f7d30L, 0xe6444d89df03bfdfL, 0xa18cc771b8188ff9L, + 0x9834429db01c39bbL, 0x214add07fe086a1fL, 0x8f07c19b1f6b3ff9L, 0x56a297b1bf4ffe55L, + 0x94d558e493c54fc7L, 0x40bfc24c764552cbL, 0x931a706f8a8520cbL, 0x32229d322935bd52L, + 0x2560d0f5dc4fefafL, 0x9dbcc48355969bb6L, 0x0fd81c3985c0b56aL, 0xe03817e1560f2bdaL, + 0xc1bb4f81d892b2d5L, 0xb0c4864f4e28d2d7L, 0x3ecc49f9d9d6c263L, 0x51307e99b52ba65eL, + 0x8af2b688da84a752L, 0xf5d72523b91b20b6L, 0x6d95ff1ff4634806L, 0x562f21555458339aL, + 0xc0ce47f889336346L, 0x487823e5089b40d8L, 0xe4727c7ebc6d9592L, 0x5a8f7277e94970baL, + 0xfca2f406b1c8bb50L, 0x5b1f8a95f1791070L, 0xd304af9fc9028605L, 0x5440ab7fc930e748L, + 0x312d25fbca2ab5a1L, 0x10f4a4b234a4d575L, 0x90301d55047e7473L, 0x3b6372886c61591eL, + 0x293402b77c444e06L, 0x451f34a4d3e97dd7L, 0x3158d814d81bc57bL, 0x034942425b9bda69L, + 0xe2032ff9e532d9bbL, 0x62ae066b8b2179e5L, 0x9545e10c2f8d71d8L, 0x7ff7483eb2d23fc0L, + 0x00945fcebdc98d86L, 0x8764bbbe99b26ca2L, 0x1b1ec62284c0bfc3L, 0x58e0fcc4f0aa362bL, + 0x5f4abefa878d458dL, 0xfd74ac2f9607c519L, 0xa4e3fb37df8cbfa9L, 0xbf697e43cac574e5L, + 0x86f14a3f68f4cd53L, 0x24a23d076f1ce522L, 0xe725cd8048868cc8L, 0xbf3c729eb2464362L, + 0xd8f6cd57b3cc1ed8L, 0x6329e52425541577L, 0x62aa688ad5ae1ac0L, 0x0a242566269bf845L, + 0x168b1a4753aca74bL, 0xf789afefff2e7e3cL, 0x6c3362093b6fccdbL, 0x4ce8f50bd28c09b2L, + 0x006a2db95ae8aa93L, 0x975b0d623c3d1a8cL, 0x18605d3935338c5bL, 0x5bb6f6136cad3c71L, + 0x0f53a20701f8d8a6L, 0xab8c5ad2e7e93c67L, 0x40b5ac5127acaa29L, 0x8c7bf63c2075895fL, + 0x78bd9f7e014a805cL, 0xb2c9e9f4f9c8c032L, 0xefd6049827eb91f3L, 0x2be459f482c16fbdL, + 0xd92ce0c5745aaa8cL, 0x0aaa8fb298d965b9L, 0x2b37f92c6c803b15L, 0x8c54a5e94e0f0e78L, + 0x95f9b6e90c0a3032L, 0xe7939faa436c7874L, 0xd16bfe8f6a8a40c9L, 0x44982b86263fd2faL, + 0xe285fb39f984e583L, 0x779a8df72d7619d3L, 0xf2d79a8de8d5dd1eL, 0xd1037354d66684e2L, + 0x004c82a4e668a8e5L, 0x31d40a7668b044e6L, 0xd70578538bd02c11L, 0xdb45431078c5f482L, + 0x977121bb7f6a51adL, 0x73d5ccbd34eff8ddL, 0xe437a07d356e17cdL, 0x47b2782043c95627L, + 0x9fb251413e41d49aL, 0xccd70b60652513d3L, 0x1c95b31e8a1b49b2L, 0xcae73dfd1bcb4c1bL, + 0x34d98331b1f5b70fL, 0x784e39f22338d92fL, 0x18613d4a064df420L, 0xf1d8dae25f0bcebeL, + 0x33f77c15ae855efcL, 0x3c88b3b912eb109cL, 0x956a2ec96bafeea5L, 0x1aa005b5e0ad0e87L, + 0x5500d70527c4bb8eL, 0xe36c57196421cc44L, 0x13c4d286cc36ee39L, 0x5654a23d818b2a81L, + 0x77b1dc13d161abdcL, 0x734f44de5f8d5eb5L, 0x60717e174a6c89a2L, 0xd47d9649266a211eL, + 0x5b13a4322bb69e90L, 0xf7669609f8b5fc3cL, 0x21e6ac55bedcdac9L, 0x9b56b62b61166deaL, + 0xf48f66b939797e9cL, 0x35f332f9c0e6ae9aL, 0xcc733f6a9a878db0L, 0x3da161e41cc108c2L, + 0xb7d74ae535914d51L, 0x4d493b0b11d36469L, 0xce264d1dfba9741aL, 0xa9d1f2dc7436dc06L, + 0x70738016604c2a27L, 0x231d36e96e93f3d5L, 0x7666881197838d19L, 0x4a2a83090aaad40cL, + 0xf1e761591668b35dL, 0x7363236497f730a7L, 0x301080e37379dd4dL, 0x502dea2971827042L, + 0xc2c5eb858f32625fL, 0x786afb9edfafbdffL, 0xdaee0d868490b2a4L, 0x617366b3268609f6L, + 0xae0e35a0fe46173eL, 0xd1a07de93e824f11L, 0x079b8b115ea4cca8L, 0x93a99274558faebbL, + 0xfb1e6e22e08a03b3L, 0xea635fdba3698dd0L, 0xcf53659328503a5cL, 0xcde3b31e6fd5d780L, + 0x8e3e4221d3614413L, 0xef14d0d86bf1a22cL, 0xe1d830d3f16c5ddbL, 0xaabd2b2a451504e1L, + }; + // @formatter:on + + private static final long[] GEAR_LS = computeGearLs(); + + private static long[] computeGearLs() { + long[] gearLs = new long[GEAR.length]; + for (int i = 0; i < GEAR.length; i++) { + gearLs[i] = GEAR[i] << 1; + } + return gearLs; + } + + private final int minSize; + private final int maxSize; + private final int avgSize; + private final long maskS; + private final long maskL; + private final long maskSLs; + private final long maskLLs; + private final DigestUtil digestUtil; + + public FastCDCChunker(DigestUtil digestUtil) { + this(ChunkingConfig.defaults(), digestUtil); + } + + public FastCDCChunker(ChunkingConfig config, DigestUtil digestUtil) { + this(config.minChunkSize(), config.avgChunkSize(), config.maxChunkSize(), + config.normalizationLevel(), digestUtil); + } + + public FastCDCChunker( + int minSize, int avgSize, int maxSize, int normalization, DigestUtil digestUtil) { + checkArgument(minSize > 0, "minSize must be positive"); + checkArgument(avgSize >= minSize, "avgSize must be >= minSize"); + checkArgument(maxSize >= avgSize, "maxSize must be >= avgSize"); + checkArgument(avgSize > 0 && (avgSize & (avgSize - 1)) == 0, "avgSize must be a power of 2, got %s", avgSize); + checkArgument(normalization >= 0 && normalization <= 3, "normalization must be 0-3"); + + this.minSize = minSize; + this.avgSize = avgSize; + this.maxSize = maxSize; + this.digestUtil = digestUtil; + + int bits = 31 - Integer.numberOfLeadingZeros(avgSize); + int smallBits = bits + normalization; + int largeBits = bits - normalization; + checkArgument(smallBits <= 25 && largeBits >= 5, "normalization level too extreme for avgSize"); + + this.maskS = MASKS[smallBits]; + this.maskL = MASKS[largeBits]; + this.maskSLs = this.maskS << 1; + this.maskLLs = this.maskL << 1; + } + + /** + * Finds the next chunk boundary. + */ + private int cut(byte[] buf, int off, int len) { + if (len <= minSize) { + return len; + } + + int n = Math.min(len, maxSize); + int center = Math.min(n, avgSize); + + // Round down to even boundaries for 2-byte processing so we don't need to + // divide by 2 in the loop. + int minLimit = minSize & ~1; + int centerLimit = center & ~1; + int remainingLimit = n & ~1; + + long hash = 0; + + // Below avgSize: use maskS to discourage early cuts (too small chunks) + for (int a = minLimit; a < centerLimit; a += 2) { + hash = (hash << 2) + GEAR_LS[buf[off + a] & 0xFF]; + if ((hash & maskSLs) == 0) { + return a; + } + hash = hash + GEAR[buf[off + a + 1] & 0xFF]; + if ((hash & maskS) == 0) { + return a + 1; + } + } + + // Above avgSize: use maskL to encourage cuts (too large chunks) + for (int a = centerLimit; a < remainingLimit; a += 2) { + hash = (hash << 2) + GEAR_LS[buf[off + a] & 0xFF]; + if ((hash & maskLLs) == 0) { + return a; + } + hash = hash + GEAR[buf[off + a + 1] & 0xFF]; + if ((hash & maskL) == 0) { + return a + 1; + } + } + + return n; + } + + /** + * Chunks a file and returns metadata (digest, offset, length) for each chunk. + * + *

+ * This method is used for building MerkleTree entries for large files. It + * returns ChunkRef objects that store the file offset and length of each chunk, + * allowing the chunk data to be read later by seeking to the offset. + * + *

+ * Note: We don't need the raw data here. We can read from the original file + * (seekable) when uploading, similar to how whole blobs work. + * + * @param input the input stream to chunk (should be from a seekable file) + * @return list of ChunkRefs with digest, file offset, and length for each chunk + */ + public List chunkToDigests(InputStream input) throws IOException { + List digests = new ArrayList<>(); + + byte[] buf = new byte[maxSize * 2]; + int cursor = 0; + int end = 0; + boolean eof = false; + + while (true) { + int available = end - cursor; + if (available < maxSize && !eof) { + if (cursor > 0 && available > 0) { + System.arraycopy(buf, cursor, buf, 0, available); + } + cursor = 0; + end = available; + + while (end < buf.length) { + int n = input.read(buf, end, buf.length - end); + if (n == -1) { + eof = true; + break; + } + end += n; + } + available = end - cursor; + } + + if (available == 0) { + break; + } + + int chunkLen = cut(buf, cursor, available); + digests.add(digestUtil.compute(buf, cursor, chunkLen)); + + cursor += chunkLen; + } + + return digests; + } +} diff --git a/src/main/java/com/google/devtools/build/lib/remote/common/RemoteCacheClient.java b/src/main/java/com/google/devtools/build/lib/remote/common/RemoteCacheClient.java index f5160436969e40..7a25425ff8ee63 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/common/RemoteCacheClient.java +++ b/src/main/java/com/google/devtools/build/lib/remote/common/RemoteCacheClient.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.List; import java.util.Set; import javax.annotation.Nullable; @@ -160,6 +161,26 @@ default ListenableFuture uploadBlob( return uploadBlob(context, digest, data::newInput); } + /** + * Registers a blob as the concatenation of the given chunks via SpliceBlob RPC. + * + *

This is used for CDC (Content-Defined Chunking) uploads. After uploading all chunks, + * SpliceBlob is called to register the blob with the given digest as the concatenation of + * the chunks. + * + * @param context the context for the action. + * @param blobDigest The digest of the complete blob. + * @param chunkDigests The digests of the chunks that make up the blob, in order. + * @return A future representing pending completion of the splice operation, or null if + * SpliceBlob is not supported by this cache client. + */ + default ListenableFuture spliceBlob( + RemoteActionExecutionContext context, + Digest blobDigest, + List chunkDigests) { + return null; + } + /** Close resources associated with the remote cache. */ void close(); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/logging/LoggingInterceptor.java b/src/main/java/com/google/devtools/build/lib/remote/logging/LoggingInterceptor.java index ce1d0fe77b3cfa..875b17e505d0b2 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/logging/LoggingInterceptor.java +++ b/src/main/java/com/google/devtools/build/lib/remote/logging/LoggingInterceptor.java @@ -70,6 +70,10 @@ protected LoggingHandler selectHandler(MethodDescriptor } else if (method == ContentAddressableStorageGrpc.getFindMissingBlobsMethod()) { return new FindMissingBlobsHandler(); // + } else if (method == ContentAddressableStorageGrpc.getSplitBlobMethod()) { + return new SplitBlobHandler(); // + } else if (method == ContentAddressableStorageGrpc.getSpliceBlobMethod()) { + return new SpliceBlobHandler(); // } else if (method == ByteStreamGrpc.getReadMethod()) { return new ReadHandler(); // } else if (method == ByteStreamGrpc.getWriteMethod()) { diff --git a/src/main/java/com/google/devtools/build/lib/remote/logging/SpliceBlobHandler.java b/src/main/java/com/google/devtools/build/lib/remote/logging/SpliceBlobHandler.java new file mode 100644 index 00000000000000..988f47a5508155 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/logging/SpliceBlobHandler.java @@ -0,0 +1,44 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.remote.logging; + +import build.bazel.remote.execution.v2.SpliceBlobRequest; +import build.bazel.remote.execution.v2.SpliceBlobResponse; +import com.google.devtools.build.lib.remote.logging.RemoteExecutionLog.SpliceBlobDetails; +import com.google.devtools.build.lib.remote.logging.RemoteExecutionLog.RpcCallDetails; + +/** + * LoggingHandler for {@link + * google.devtools.remoteexecution.v1test.ContentAddressableStorage.SpliceBlob} gRPC call. + */ +public class SpliceBlobHandler implements LoggingHandler { + + private final SpliceBlobDetails.Builder builder = SpliceBlobDetails.newBuilder(); + + @Override + public void handleReq(SpliceBlobRequest message) { + builder.setRequest(message); + } + + @Override + public void handleResp(SpliceBlobResponse message) { + builder.setResponse(message); + } + + @Override + public RpcCallDetails getDetails() { + return RpcCallDetails.newBuilder().setSpliceBlob(builder).build(); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/remote/logging/SplitBlobHandler.java b/src/main/java/com/google/devtools/build/lib/remote/logging/SplitBlobHandler.java new file mode 100644 index 00000000000000..71cef8e9f51467 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/remote/logging/SplitBlobHandler.java @@ -0,0 +1,45 @@ +// Copyright 2026 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.remote.logging; + +import build.bazel.remote.execution.v2.SplitBlobRequest; +import build.bazel.remote.execution.v2.SplitBlobResponse; +import com.google.devtools.build.lib.remote.logging.RemoteExecutionLog.SplitBlobDetails; +import com.google.devtools.build.lib.remote.logging.RemoteExecutionLog.RpcCallDetails; + +/** + * LoggingHandler for {@link + * google.devtools.remoteexecution.v1test.ContentAddressableStorage.SplitBlob} gRPC call. + */ +public class SplitBlobHandler + implements LoggingHandler { + + private final SplitBlobDetails.Builder builder = SplitBlobDetails.newBuilder(); + + @Override + public void handleReq(SplitBlobRequest message) { + builder.setRequest(message); + } + + @Override + public void handleResp(SplitBlobResponse message) { + builder.setResponse(message); + } + + @Override + public RpcCallDetails getDetails() { + return RpcCallDetails.newBuilder().setSplitBlob(builder).build(); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java b/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java index c38a658bb39c0c..97830e88ba04e0 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java +++ b/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java @@ -723,6 +723,21 @@ public RemoteOutputsStrategyConverter() { + " output prefixes).") public Scrubber scrubber; + @Option( + name = "experimental_remote_cache_chunking", + defaultValue = "false", + documentationCategory = OptionDocumentationCategory.REMOTE, + metadataTags = OptionMetadataTag.EXPERIMENTAL, + effectTags = {OptionEffectTag.UNKNOWN}, + help = + "If enabled, Bazel will use Content-Defined Chunking (CDC) for remote cache read and write operations. " + + "Large blobs (>2MB) are split into chunks using the FastCDC 2020 algorithm, enabling deduplication " + + "and parallel transfers. For uploads, only missing chunks are uploaded and " + + "SpliceBlob is called to register the blob to chunk mapping on the remote. For downloads, SplitBlob is used to " + + "get chunk information and chunks are downloaded to reassemble the blob. The server must " + + "declare capabilities for SplitBlob and SpliceBlob RPCs for this to work.") + public boolean experimentalRemoteCacheChunking; + @Option( name = "experimental_throttle_remote_action_building", defaultValue = "true", diff --git a/src/main/java/com/google/devtools/build/lib/remote/util/DigestUtil.java b/src/main/java/com/google/devtools/build/lib/remote/util/DigestUtil.java index 5c1f2b3c644bd3..978e9c077e2ba2 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/util/DigestUtil.java +++ b/src/main/java/com/google/devtools/build/lib/remote/util/DigestUtil.java @@ -69,6 +69,18 @@ public Digest compute(byte[] blob) { return buildDigest(hashFn.getHashFunction().hashBytes(blob).toString(), blob.length); } + /** + * Computes a digest for a portion of a byte array. This is useful for uploading + * an individual chunk from a larger file. + * + * @param data the byte array + * @param offset the start offset in the array + * @param length the number of bytes to hash + */ + public Digest compute(byte[] data, int offset, int length) { + return buildDigest(hashFn.getHashFunction().hashBytes(data, offset, length).toString(), length); + } + /** * Computes a digest for a file. * diff --git a/src/main/protobuf/BUILD b/src/main/protobuf/BUILD index f8fce154cb489e..15e2f57381f9c8 100644 --- a/src/main/protobuf/BUILD +++ b/src/main/protobuf/BUILD @@ -285,6 +285,7 @@ java_library_srcs( deps = [":remote_scrubbing_java_proto"], ) + proto_library( name = "bazel_output_service_proto", srcs = ["bazel_output_service.proto"], diff --git a/src/main/protobuf/remote_execution_log.proto b/src/main/protobuf/remote_execution_log.proto index b3f265aaca959d..7518e4dfd44586 100644 --- a/src/main/protobuf/remote_execution_log.proto +++ b/src/main/protobuf/remote_execution_log.proto @@ -112,6 +112,32 @@ message FindMissingBlobsDetails { build.bazel.remote.execution.v2.FindMissingBlobsResponse response = 2; } +// Details for a call to +// build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob. +message SplitBlobDetails { + // The build.bazel.remote.execution.v2.SplitBlobRequest request + // sent. + build.bazel.remote.execution.v2.SplitBlobRequest request = 1; + + // The build.bazel.remote.execution.v2.SplitBlobResponse + // received. + build.bazel.remote.execution.v2.SplitBlobResponse response = 2; +} + + +// Details for a call to +// build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob. +message SpliceBlobDetails { + // The build.bazel.remote.execution.v2.SpliceBlobRequest request + // sent. + build.bazel.remote.execution.v2.SpliceBlobRequest request = 1; + + // The build.bazel.remote.execution.v2.SpliceBlobResponse + // received. + build.bazel.remote.execution.v2.SpliceBlobResponse response = 2; +} + + // Details for a call to google.bytestream.Read. message ReadDetails { // The google.bytestream.ReadRequest sent. @@ -178,5 +204,7 @@ message RpcCallDetails { QueryWriteStatusDetails query_write_status = 14; GetCapabilitiesDetails get_capabilities = 12; UpdateActionResultDetails update_action_result = 13; + SplitBlobDetails split_blob = 15; + SpliceBlobDetails splice_blob = 16; } } diff --git a/src/test/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploaderTest.java b/src/test/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploaderTest.java index 770df3ce036493..989fdee850a0d9 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploaderTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploaderTest.java @@ -531,7 +531,8 @@ private static CombinedCache newCombinedCache( CallCredentialsProvider.NO_CREDENTIALS, remoteOptions, retrier, - DIGEST_UTIL)); + DIGEST_UTIL, + /* chunkingConfig= */ null)); doAnswer( invocationOnMock -> missingDigestsFinder.findMissingDigests( diff --git a/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java b/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java index 9cd74f405652fb..8395e1e4f2dee7 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/GrpcCacheClientTest.java @@ -215,7 +215,8 @@ public int maxConcurrency() { }); channels.add(channel); return new GrpcCacheClient( - channel, callCredentialsProvider, remoteOptions, retrier, DIGEST_UTIL); + channel, callCredentialsProvider, remoteOptions, retrier, DIGEST_UTIL, + /* chunkingConfig= */ null); } private static byte[] downloadBlob( diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerWithGrpcRemoteExecutorTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerWithGrpcRemoteExecutorTest.java index 0c6c0dd80e2a05..9e59d4ad62d0a8 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerWithGrpcRemoteExecutorTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerWithGrpcRemoteExecutorTest.java @@ -333,7 +333,8 @@ public int maxConcurrency() { GoogleAuthUtils.newCallCredentialsProvider(null); GrpcCacheClient cacheProtocol = new GrpcCacheClient( - channel.retain(), callCredentialsProvider, remoteOptions, retrier, DIGEST_UTIL); + channel.retain(), callCredentialsProvider, remoteOptions, retrier, DIGEST_UTIL, + /* chunkingConfig= */ null); remoteCache = new RemoteExecutionCache( cacheProtocol, /* diskCacheClient= */ null, /* symlinkTemplate= */ null, DIGEST_UTIL); diff --git a/third_party/remoteapis/build/bazel/remote/execution/v2/remote_execution.proto b/third_party/remoteapis/build/bazel/remote/execution/v2/remote_execution.proto index 31e20dcf4eae18..8224f1a1e4b137 100644 --- a/third_party/remoteapis/build/bazel/remote/execution/v2/remote_execution.proto +++ b/third_party/remoteapis/build/bazel/remote/execution/v2/remote_execution.proto @@ -175,7 +175,7 @@ service ActionCache { // // In order to allow the server to perform access control based on the type of // action, and to assist with client debugging, the client MUST first upload - // the [Action][build.bazel.remote.execution.v2.Execution] that produced the + // the [Action][build.bazel.remote.execution.v2.Action] that produced the // result, along with its // [Command][build.bazel.remote.execution.v2.Command], into the // `ContentAddressableStorage`. @@ -325,6 +325,15 @@ service ActionCache { // each of the compression formats that the server supports, as well as in // uncompressed form. // +// Additionally, ByteStream requests MAY come with an additional plain text header +// that indicates the `resource_name` of the blob being sent. The header, if +// present, MUST follow the following convention: +// * name: `build.bazel.remote.execution.v2.resource-name`. +// * contents: the plain text resource_name of the request message. +// If set, the contents of the header MUST match the `resource_name` of the request +// message. Servers MAY use this header to assist in routing requests to the +// appropriate backend. +// // The lifetime of entries in the CAS is implementation specific, but it SHOULD // be long enough to allow for newly-added and recently looked-up entries to be // used in subsequent calls (e.g. to @@ -430,6 +439,110 @@ service ContentAddressableStorage { rpc GetTree(GetTreeRequest) returns (stream GetTreeResponse) { option (google.api.http) = { get: "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" }; } + + // Split a blob into chunks. + // + // This call splits a blob into chunks, stores the chunks in the CAS, and + // returns a list of the chunk digests. Using this list, a client can check + // which chunks are locally available and just fetch the missing ones. The + // desired blob can be assembled by concatenating the fetched chunks in the + // order of the digests in the list. + // + // This rpc can be used to reduce the required data to download a large blob + // from CAS if chunks from earlier downloads of a different version of this + // blob are locally available. For this procedure to work properly, blobs + // SHOULD be split in a content-defined way, rather than with fixed-sized + // chunking. + // + // If a split request is answered successfully, a client can expect the + // following guarantees from the server: + // 1. The blob chunks are stored in CAS. + // 2. Concatenating the blob chunks in the order of the digest list returned + // by the server results in the original blob. + // + // Servers which implement this functionality MUST declare that they support + // it by setting the + // [CacheCapabilities.split_blob_support][build.bazel.remote.execution.v2.CacheCapabilities.split_blob_support] + // field accordingly. + // + // Clients MUST check that the server supports this capability, before using + // it. + // + // Clients SHOULD verify that the digest of the blob assembled by the fetched + // chunks is equal to the requested blob digest. + // + // The lifetimes of the generated chunk blobs MAY be independent of the + // lifetime of the original blob. In particular: + // * A blob and any chunk derived from it MAY be evicted from the CAS at + // different times. + // * A call to [SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob] + // extends the lifetime of the original blob, and sets the lifetimes of + // the resulting chunks (or extends the lifetimes of already-existing + // chunks). + // * Touching a chunk extends its lifetime, but the server MAY choose not + // to extend the lifetime of the original blob. + // * Touching the original blob extends its lifetime, but the server MAY + // choose not to extend the lifetimes of chunks derived from it. + // + // When blob splitting and splicing is used at the same time, the clients and + // the server SHOULD agree out-of-band upon a chunking algorithm used by both + // parties to benefit from each others chunk data and avoid unnecessary data + // duplication. + // + // Errors: + // + // * `NOT_FOUND`: The requested blob is not present in the CAS. + // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob + // chunks. + rpc SplitBlob(SplitBlobRequest) returns (SplitBlobResponse) { + option (google.api.http) = { get: "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" }; + } + + // Splice a blob from chunks. + // + // This is the complementary operation to the + // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob] + // function to handle the chunked upload of large blobs to save upload + // traffic. + // + // If a client needs to upload a large blob and is able to split a blob into + // chunks in such a way that reusable chunks are obtained, e.g., by means of + // content-defined chunking, it can first determine which parts of the blob + // are already available in the remote CAS and upload the missing chunks, and + // then use this API to instruct the server to splice the original blob from + // the remotely available blob chunks. + // + // Servers which implement this functionality MUST declare that they support + // it by setting the + // [CacheCapabilities.splice_blob_support][build.bazel.remote.execution.v2.CacheCapabilities.splice_blob_support] + // field accordingly. + // + // Clients MUST check that the server supports this capability, before using + // it. + // + // In order to ensure data consistency of the CAS, the server MUST only add + // blobs to the CAS after verifying their digests. In particular, servers MUST NOT + // trust digests provided by the client. The server MAY accept a request as no-op + // if the client-specified blob is already in CAS; the lifetime of that blob SHOULD + // be extended as usual. If the client-specified blob is not already in the CAS, + // the server SHOULD verify that the digest of the newly created blob matches the + // digest specified by the client, and reject the request if they differ. + // + // When blob splitting and splicing is used at the same time, the clients and + // the server SHOULD agree out-of-band upon a chunking algorithm used by both + // parties to benefit from each others chunk data and avoid unnecessary data + // duplication. + // + // Errors: + // + // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS. + // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the + // spliced blob. + // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the + // provided expected digest. + rpc SpliceBlob(SpliceBlobRequest) returns (SpliceBlobResponse) { + option (google.api.http) = { post: "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" }; + } } // The Capabilities service may be used by remote execution clients to query @@ -1534,7 +1647,7 @@ message ExecutionStage { // Metadata about an ongoing // [execution][build.bazel.remote.execution.v2.Execution.Execute], which // will be contained in the [metadata -// field][google.longrunning.Operation.response] of the +// field][google.longrunning.Operation.metadata] of the // [Operation][google.longrunning.Operation]. message ExecuteOperationMetadata { // The current stage of execution. @@ -1557,6 +1670,15 @@ message ExecuteOperationMetadata { // The client can read this field to view details about the ongoing // execution. ExecutedActionMetadata partial_execution_metadata = 5; + + // The digest function that was used to compute the action digest. + // + // If the digest function used is one of BLAKE3, MD5, MURMUR3, SHA1, + // SHA256, SHA256TREE, SHA384, SHA512, or VSO, the server MAY leave + // this field unset. In that case the client SHOULD infer the digest + // function using the length of the action digest hash and the digest + // functions announced in the server's capabilities. + DigestFunction.Value digest_function = 6; } // A request message for @@ -1828,6 +1950,89 @@ message GetTreeResponse { string next_page_token = 2; } +// A request message for +// [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]. +message SplitBlobRequest { + // The instance of the execution system to operate against. A server may + // support multiple instances of the execution system (with their own workers, + // storage, caches, etc.). The server MAY require use of this field to select + // between them in an implementation-defined fashion, otherwise it can be + // omitted. + string instance_name = 1; + + // The digest of the blob to be split. + Digest blob_digest = 2; + + // The digest function of the blob to be split. + // + // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256, + // SHA384, SHA512, or VSO, the client MAY leave this field unset. In + // that case the server SHOULD infer the digest function using the + // length of the blob digest hashes and the digest functions announced + // in the server's capabilities. + DigestFunction.Value digest_function = 3; +} + +// A response message for +// [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]. +message SplitBlobResponse { + // The ordered list of digests of the chunks into which the blob was split. + // The original blob is assembled by concatenating the chunk data according to + // the order of the digests given by this list. + // + // The server MUST use the same digest function as the one explicitly or + // implicitly (through hash length) specified in the split request. + repeated Digest chunk_digests = 1; +} + +// A request message for +// [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]. +message SpliceBlobRequest { + // The instance of the execution system to operate against. A server may + // support multiple instances of the execution system (with their own workers, + // storage, caches, etc.). The server MAY require use of this field to select + // between them in an implementation-defined fashion, otherwise it can be + // omitted. + string instance_name = 1; + + // Expected digest of the spliced blob. The client MUST set this field due + // to the following reasons: + // 1. It allows the server to perform an early existence check of the blob + // or existing chunks that assemble the blob before spending the splicing + // effort, as described in the [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob] + // documentation. + // 2. It allows servers with different storage backends to dispatch the + // request to the correct storage backend based on the size and/or the + // hash of the blob. + // 3. If chunking information already exists for the blob, it allows + // the server to keep the existing chunking information or replace it with + // new chunking information. + Digest blob_digest = 2; + + // The ordered list of digests of the chunks which need to be concatenated to + // assemble the original blob. + repeated Digest chunk_digests = 3; + + // The digest function of all chunks to be concatenated and of the blob to be + // spliced. The server MUST use the same digest function for both cases. + // + // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256, SHA384, + // SHA512, or VSO, the client MAY leave this field unset. In that case the + // server SHOULD infer the digest function using the length of the blob digest + // hashes and the digest functions announced in the server's capabilities. + DigestFunction.Value digest_function = 4; +} + +// A response message for +// [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]. +message SpliceBlobResponse { + // Computed digest of the spliced blob. + // + // The server MUST use the same digest function as the one explicitly or + // implicitly (through hash length) specified in the splice request. + Digest blob_digest = 1; +} + // A request message for // [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities]. message GetCapabilitiesRequest { @@ -1958,7 +2163,7 @@ message ActionCacheUpdateCapabilities { // Allowed values for priority in // [ResultsCachePolicy][build.bazel.remoteexecution.v2.ResultsCachePolicy] and -// [ExecutionPolicy][build.bazel.remoteexecution.v2.ResultsCachePolicy] +// [ExecutionPolicy][build.bazel.remoteexecution.v2.ExecutionPolicy] // Used for querying both cache and execution valid priority ranges. message PriorityCapabilities { // Supported range of priorities, including boundaries. @@ -2047,6 +2252,31 @@ message CacheCapabilities { // [BatchUpdateBlobs][build.bazel.remote.execution.v2.ContentAddressableStorage.BatchUpdateBlobs] // requests. repeated Compressor.Value supported_batch_update_compressors = 7; + + // The maximum blob size that the server will accept for CAS blob uploads. + // - If it is 0, it means there is no limit set. A client may assume + // arbitrarily large blobs may be uploaded to and downloaded from the cache. + // - If it is larger than 0, implementations SHOULD NOT attempt to upload + // blobs with size larger than the limit. Servers SHOULD reject blob + // uploads over the `max_cas_blob_size_bytes` limit with response code + // `INVALID_ARGUMENT` + // - If the cache implementation returns a given limit, it MAY still serve + // blobs larger than this limit. + int64 max_cas_blob_size_bytes = 8; + + // Whether blob splitting is supported for the particular server/instance. If + // yes, the server/instance implements the specified behavior for blob + // splitting and a meaningful result can be expected from the + // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob] + // operation. + bool split_blob_support = 9; + + // Whether blob splicing is supported for the particular server/instance. If + // yes, the server/instance implements the specified behavior for blob + // splicing and a meaningful result can be expected from the + // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob] + // operation. + bool splice_blob_support = 10; } // Capabilities of the remote execution system.