Skip to content

Commit 3920a9b

Browse files
iancha1992fmeum
andauthored
[9.1.0] Reduce Merkle tree footprint by referencing FileArtifactValue (http… (bazelbuild#28918)
…s://github.com/bazelbuild/pull/28739) For regular files, use `FileArtifactValue`s instead of `Digest`s as the keys in the Merkle tree's blob map. The former are retained anyway, the latter are unique per tree. Also describe the remaining footprint of `MerkleTree` and ideas for future improvements. Work towards bazelbuild#20478 Work towards bazelbuild#28734 No - [ ] I have added tests for the new use cases (if any). - [ ] I have updated the documentation (if applicable). RELNOTES: None Closes bazelbuild#28739. PiperOrigin-RevId: 879654674 Change-Id: I87fc74392481a7409f3775c27d90eab8b5eb1b4b <!-- Thank you for contributing to Bazel! Please read the contribution guidelines: https://bazel.build/contribute.html --> ### Description <!-- Please provide a brief summary of the changes in this PR. --> ### Motivation <!-- Why is this change important? Does it fix a specific bug or add a new feature? If this PR fixes an existing issue, please link it here (e.g. "Fixes bazelbuild#1234"). --> ### Build API Changes <!-- Does this PR affect the Build API? (e.g. Starlark API, providers, command-line flags, native rules) If yes, please answer the following: 1. Has this been discussed in a design doc or issue? (Please link it) 2. Is the change backward compatible? 3. If it's a breaking change, what is the migration plan? --> No ### Checklist - [ ] I have added tests for the new use cases (if any). - [ ] I have updated the documentation (if applicable). ### Release Notes <!-- If this is a new feature, please add 'RELNOTES[NEW]: <description>' here. If this is a breaking change, please add 'RELNOTES[INC]: <reason>' here. If this change should be mentioned in release notes, please add 'RELNOTES: <reason>' here. --> RELNOTES: None Commit bazelbuild@bbdc0c3 Co-authored-by: Fabian Meumertzheim <fabian@meumertzhe.im>
1 parent adec20c commit 3920a9b

File tree

3 files changed

+99
-12
lines changed

3 files changed

+99
-12
lines changed

src/main/java/com/google/devtools/build/lib/remote/merkletree/MerkleTree.java

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,24 @@
1313
// limitations under the License.
1414
package com.google.devtools.build.lib.remote.merkletree;
1515

16+
import static com.google.common.collect.ImmutableMap.toImmutableMap;
17+
import static com.google.devtools.build.lib.remote.util.DigestUtil.DIGEST_COMPARATOR;
18+
import static java.util.Comparator.comparing;
19+
1620
import build.bazel.remote.execution.v2.Digest;
1721
import com.google.common.annotations.VisibleForTesting;
22+
import com.google.common.collect.Collections2;
1823
import com.google.common.collect.ImmutableSortedMap;
24+
import com.google.common.primitives.UnsignedBytes;
1925
import com.google.common.util.concurrent.ListenableFuture;
2026
import com.google.devtools.build.lib.actions.ActionInput;
2127
import com.google.devtools.build.lib.actions.cache.VirtualActionInput;
28+
import com.google.devtools.build.lib.actions.FileArtifactValue;
2229
import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext;
2330
import com.google.devtools.build.lib.remote.common.RemotePathResolver;
31+
import com.google.devtools.build.lib.remote.util.DigestUtil;
2432
import java.util.Collection;
33+
import java.util.Comparator;
2534
import java.util.Map;
2635
import java.util.Optional;
2736
import java.util.SortedMap;
@@ -77,12 +86,47 @@ record BlobsDiscarded(Digest digest, long inputFiles, long inputBytes) implement
7786
* A {@link MerkleTree} that retains all blobs that still need to be uploaded.
7887
*
7988
* <p>The empty blob doesn't have to be uploaded and is thus never included in the blobs map.
89+
*
90+
* <p>See {@link
91+
* com.google.devtools.build.lib.remote.RemoteExecutionServiceTest#buildRemoteAction_goldenTest}
92+
* for a test that verifies the memory footprint of this class. Since there can be thousands of
93+
* inflight remote executions that may have to retain their blobs until all inputs have been
94+
* uploaded, it's crucial to keep the memory footprint of this class as low as possible.
8095
*/
8196
final class Uploadable implements MerkleTree {
97+
private static final Comparator<FileArtifactValue> FILE_ARTIFACT_VALUE_COMPARATOR =
98+
comparing(FileArtifactValue::getDigest, UnsignedBytes.lexicographicalComparator())
99+
.thenComparing(FileArtifactValue::getSize);
100+
static final Comparator<Object> DIGEST_AND_METADATA_COMPARATOR =
101+
(o1, o2) ->
102+
switch (o1) {
103+
case Digest digest1 ->
104+
DIGEST_COMPARATOR.compare(
105+
digest1,
106+
switch (o2) {
107+
case Digest digest2 -> digest2;
108+
case FileArtifactValue metadata2 -> adaptToDigest(metadata2);
109+
default -> throw new IllegalStateException("Unexpected blob type: " + o2);
110+
});
111+
case FileArtifactValue metadata1 ->
112+
switch (o2) {
113+
case FileArtifactValue metadata2 ->
114+
FILE_ARTIFACT_VALUE_COMPARATOR.compare(metadata1, metadata2);
115+
case Digest digest2 ->
116+
DIGEST_COMPARATOR.compare(adaptToDigest(metadata1), digest2);
117+
default -> throw new IllegalStateException("Unexpected blob type: " + o2);
118+
};
119+
default -> throw new IllegalStateException("Unexpected blob type: " + o1);
120+
};
82121
private final RootOnly.BlobsUploaded root;
83-
private final ImmutableSortedMap<Digest, /* byte[] | ActionInput */ Object> blobs;
84-
85-
Uploadable(RootOnly.BlobsUploaded root, SortedMap<Digest, Object> blobs) {
122+
private final ImmutableSortedMap<
123+
/* Digest | FileArtifactValue */ Object, /* byte[] | ActionInput */ Object>
124+
blobs;
125+
126+
Uploadable(
127+
RootOnly.BlobsUploaded root,
128+
SortedMap</* Digest | FileArtifactValue */ Object, /* byte[] | ActionInput */ Object>
129+
blobs) {
86130
this.root = root;
87131
// A sorted map requires less memory than a regular hash map as it only stores two flat sorted
88132
// arrays. Access performance is not critical since it's only used to find missing blobs,
@@ -106,12 +150,13 @@ public long inputBytes() {
106150
}
107151

108152
public Collection<Digest> allDigests() {
109-
return blobs.keySet();
153+
return Collections2.transform(blobs.keySet(), MerkleTree.Uploadable::adaptToDigest);
110154
}
111155

112156
@VisibleForTesting
113157
public Map<Digest, Object> blobs() {
114-
return blobs;
158+
return blobs.entrySet().stream()
159+
.collect(toImmutableMap(e -> adaptToDigest(e.getKey()), Map.Entry::getValue));
115160
}
116161

117162
@Override
@@ -150,5 +195,14 @@ public Optional<ListenableFuture<Void>> upload(
150195
default -> throw new IllegalStateException("Unexpected blob type: " + blobs.get(digest));
151196
};
152197
}
198+
199+
private static Digest adaptToDigest(Object key) {
200+
return switch (key) {
201+
case Digest digest -> digest;
202+
case FileArtifactValue metadata ->
203+
DigestUtil.buildDigest(metadata.getDigest(), metadata.getSize());
204+
default -> throw new IllegalStateException("Unexpected blob type: " + key);
205+
};
206+
}
153207
}
154208
}

src/main/java/com/google/devtools/build/lib/remote/merkletree/MerkleTreeComputer.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import static com.google.common.util.concurrent.Futures.immediateFuture;
2424
import static com.google.common.util.concurrent.Futures.transform;
2525
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
26-
import static com.google.devtools.build.lib.remote.util.DigestUtil.DIGEST_COMPARATOR;
2726
import static com.google.devtools.build.lib.util.StringEncoding.internalToUnicode;
2827
import static com.google.devtools.build.lib.vfs.PathFragment.HIERARCHICAL_COMPARATOR;
2928
import static java.util.Comparator.comparing;
@@ -508,7 +507,10 @@ private MerkleTree buildWithPrecomputedSubTrees(
508507
long inputFiles = 0;
509508
long inputBytes = 0;
510509
var blobs =
511-
new TreeMap<Digest, /* byte[] | Path | VirtualActionInput */ Object>(DIGEST_COMPARATOR);
510+
new TreeMap<
511+
/* Digest | FileArtifactValue */ Object,
512+
/* byte[] | Path | VirtualActionInput */ Object>(
513+
MerkleTree.Uploadable.DIGEST_AND_METADATA_COMPARATOR);
512514
Deque<Directory.Builder> directoryStack = new ArrayDeque<>();
513515
directoryStack.push(Directory.newBuilder());
514516

@@ -566,7 +568,7 @@ private MerkleTree buildWithPrecomputedSubTrees(
566568
byte[] directoryBlob = directoryStack.pop().build().toByteArray();
567569
Digest directoryBlobDigest = digestUtil.compute(directoryBlob);
568570
if (blobPolicy != BlobPolicy.DISCARD && directoryBlobDigest.getSizeBytes() != 0) {
569-
blobs.put(directoryBlobDigest, directoryBlob);
571+
blobs.putIfAbsent(directoryBlobDigest, directoryBlob);
570572
}
571573
inputBytes += directoryBlobDigest.getSizeBytes();
572574
var topDirectory = directoryStack.peek();
@@ -659,7 +661,9 @@ private MerkleTree buildWithPrecomputedSubTrees(
659661
var digest = DigestUtil.buildDigest(metadata.getDigest(), metadata.getSize());
660662
addFile(currentDirectory, name, digest, nodeProperties);
661663
if (blobPolicy != BlobPolicy.DISCARD && digest.getSizeBytes() != 0) {
662-
blobs.put(digest, fileOrSourceDirectory);
664+
// If there is both a Digest and a FileArtifactValue key for the same content, prefer
665+
// the FileArtifactValue as it is retained anyway.
666+
blobs.put(metadata, fileOrSourceDirectory);
663667
}
664668
inputFiles++;
665669
inputBytes += digest.getSizeBytes();
@@ -669,7 +673,7 @@ private MerkleTree buildWithPrecomputedSubTrees(
669673
var digest = digestUtil.compute(virtualActionInput);
670674
addFile(currentDirectory, name, digest, nodeProperties);
671675
if (blobPolicy != BlobPolicy.DISCARD && digest.getSizeBytes() != 0) {
672-
blobs.put(digest, virtualActionInput);
676+
blobs.putIfAbsent(digest, virtualActionInput);
673677
}
674678
inputFiles++;
675679
inputBytes += digest.getSizeBytes();
@@ -688,7 +692,7 @@ private MerkleTree buildWithPrecomputedSubTrees(
688692
var digest = digestUtil.compute(artifactPathResolver.toPath(input));
689693
addFile(currentDirectory, name, digest, nodeProperties);
690694
if (blobPolicy != BlobPolicy.DISCARD && digest.getSizeBytes() != 0) {
691-
blobs.put(digest, input);
695+
blobs.putIfAbsent(digest, input);
692696
}
693697
inputFiles++;
694698
inputBytes += digest.getSizeBytes();

src/test/java/com/google/devtools/build/lib/remote/RemoteExecutionServiceTest.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -839,8 +839,37 @@ public void buildRemoteAction_goldenTest(@TestParameter({"1", "2", "3"}) int see
839839
var footprintOut =
840840
Paths.get(System.getenv("TEST_UNDECLARED_OUTPUTS_DIR"), "merkle_tree_footprint.txt");
841841
Files.writeString(footprintOut, merkleTreeUniqueRetention.toFootprint());
842+
// Detailed footprint:
843+
// COUNT AVG SUM DESCRIPTION
844+
// 18 181 3264 [B
845+
// 9 32 288 b.b.r.e.v2.Digest
846+
// 2 112 224 [Ljava.lang.Object;
847+
// 9 16 144 c.g.p.ByteString$LiteralByteString
848+
// 1 40 40 c.g.c.c.ImmutableSortedMap
849+
// 2 16 32 c.g.c.c.RegularImmutableList
850+
// 1 32 32 c.g.d.b.l.r.m.MerkleTree$RootOnly$BlobsUploaded
851+
// 1 24 24 c.g.c.c.RegularImmutableSortedSet
852+
// 1 16 16 c.g.d.b.l.r.m.MerkleTree$Uploadable
853+
// 44 4064 (total)
854+
//
855+
// Ignoring objects with constant count, the footprint is made up of:
856+
// * the two Object arrays backing the ImmutableSortedMap that tracks a map from digest-like
857+
// object to their backing blob. Assuming that most of these objects are naturally retained
858+
// elsehwere, as is the case for regular files (which are represented as their
859+
// FileArtifactValue mapping to their Artifact), this representation is already optimal at
860+
// 8 bytes per blob.
861+
// * the Digest objects for non-regular file blobs, in particular Directory protos. These
862+
// could be represented more efficiently by storing their raw hash bytes and the size in a
863+
// a flat byte array, but savings aren't expected to be significant.
864+
// * most importantly, the serialized Directory protos, which contain inlined Digest protos
865+
// as well as filenames for all files. This is where the largest gains can be made by
866+
// introducing a custom representation that is serialized on demand when actually uploading
867+
// to the remote. Such a representation could consist of a flat Object array containing
868+
// FileArtifactValues (to replace Digest protos), Artifacts (to retrieve the
869+
// basename for file nodes), and Integers (referencing intermediate segments of Artifact
870+
// exec paths for most directory nodes).
842871
// TODO: Get this number down.
843-
assertThat(stableRetainedSize).isEqualTo(6112);
872+
assertThat(stableRetainedSize).isEqualTo(4064);
844873
}
845874
}
846875

0 commit comments

Comments
 (0)