Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
9870591
Limit size of shardDeleteResults
joshua-adams-1 Aug 22, 2025
97e9969
[CI] Auto commit changes from spotless
Aug 26, 2025
24b7a62
Minor tweaks
joshua-adams-1 Aug 26, 2025
d888113
Merge branch 'limit-shard-blobs-to-delete' of github.com:joshua-adams…
joshua-adams-1 Aug 26, 2025
ee89eb2
Ran ./gradlew spotlessApply precommit
joshua-adams-1 Aug 26, 2025
92991b9
TBR - Add TODO
joshua-adams-1 Aug 27, 2025
3190772
Uses a setting to control the max `shardDeleteResults` size
joshua-adams-1 Sep 1, 2025
a16856c
Remove TODOs
joshua-adams-1 Sep 1, 2025
381d294
Fix failing unit tests
joshua-adams-1 Sep 1, 2025
203d513
Merge branch 'main' into limit-shard-blobs-to-delete
joshua-adams-1 Sep 1, 2025
daf09b6
Moved the limit logic out of the streams submodule and into
joshua-adams-1 Sep 3, 2025
dc70d5b
Merge branch 'main' of github.com:elastic/elasticsearch into limit-sh…
joshua-adams-1 Sep 3, 2025
0355c2a
Add tests
joshua-adams-1 Sep 4, 2025
f072128
Run ./gradlew spotlessApply precommit
joshua-adams-1 Sep 4, 2025
abb2d4c
Merge branch 'limit-shard-blobs-to-delete' of github.com:joshua-adams…
joshua-adams-1 Sep 4, 2025
a0d728f
Merge branch 'main' into limit-shard-blobs-to-delete
joshua-adams-1 Sep 4, 2025
654ebf2
Creates BoundedOutputStream
joshua-adams-1 Sep 11, 2025
5ef0111
Merge branch 'limit-shard-blobs-to-delete' of github.com:joshua-adams…
joshua-adams-1 Sep 11, 2025
bd9217b
Merge branch 'main' into limit-shard-blobs-to-delete
joshua-adams-1 Sep 11, 2025
ba81bcf
[CI] Auto commit changes from spotless
Sep 11, 2025
acd2182
Revert StreamOutput and delete tests
joshua-adams-1 Sep 11, 2025
be05a1f
Adds BoundedOutputStreamTests
joshua-adams-1 Sep 11, 2025
8d66c1e
Creates TruncatedOutputStream
joshua-adams-1 Sep 12, 2025
ce64bf5
Merge branch 'main' into limit-shard-blobs-to-delete
joshua-adams-1 Sep 12, 2025
0fa5099
Spotless commit
joshua-adams-1 Sep 12, 2025
3575240
Merge branch 'limit-shard-blobs-to-delete' of github.com:joshua-adams…
joshua-adams-1 Sep 12, 2025
d55893d
Add skippedResultsCount
joshua-adams-1 Sep 12, 2025
3725a3c
Rewrite the unit tests
joshua-adams-1 Sep 18, 2025
ed00f1a
Spotless apply
joshua-adams-1 Sep 18, 2025
ce6195d
Merge branch 'main' into limit-shard-blobs-to-delete
joshua-adams-1 Sep 18, 2025
37404a5
[CI] Update transport version definitions
Oct 2, 2025
fc41d60
Merge branch 'main' into limit-shard-blobs-to-delete
joshua-adams-1 Oct 2, 2025
d1e81f7
David comments
joshua-adams-1 Oct 7, 2025
2f0ea30
Fix test
joshua-adams-1 Oct 7, 2025
6babba9
Merge branch 'limit-shard-blobs-to-delete' of https://github.com/josh…
joshua-adams-1 Oct 7, 2025
1ff464d
Merge branch 'main' into limit-shard-blobs-to-delete
joshua-adams-1 Oct 7, 2025
0d01264
spotless
joshua-adams-1 Oct 7, 2025
d73ffef
Modify comment
joshua-adams-1 Oct 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public class BytesStreamOutput extends BytesStream {
@Nullable
protected ByteArray bytes;
protected int count;
protected int maximumSize;

/**
* Create a non recycling {@link BytesStreamOutput} with an initial capacity of 0.
Expand All @@ -54,10 +55,15 @@ public BytesStreamOutput(int expectedSize) {
}

protected BytesStreamOutput(int expectedSize, BigArrays bigArrays) {
this(expectedSize, bigArrays, Integer.MAX_VALUE);
}

protected BytesStreamOutput(int expectedSize, BigArrays bigArrays, int maximumSize) {
this.bigArrays = bigArrays;
if (expectedSize != 0) {
this.bytes = bigArrays.newByteArray(expectedSize, false);
}
this.maximumSize = maximumSize;
}

@Override
Expand Down Expand Up @@ -171,8 +177,8 @@ private static void copyToArray(BytesReference bytesReference, byte[] arr) {
}

protected void ensureCapacity(long offset) {
if (offset > Integer.MAX_VALUE) {
throw new IllegalArgumentException(getClass().getSimpleName() + " cannot hold more than 2GB of data");
if (offset > this.maximumSize) {
throw new IllegalArgumentException(getClass().getSimpleName() + " has exceeded it's max size of " + this.maximumSize);
}
if (bytes == null) {
this.bytes = bigArrays.newByteArray(BigArrays.overSize(offset, PageCacheRecycler.PAGE_SIZE_IN_BYTES, 1), false);
Expand All @@ -181,4 +187,10 @@ protected void ensureCapacity(long offset) {
}
}

public boolean hasCapacity(int length) {
if (length < 0) {
throw new IllegalArgumentException("Negative length");
}
return count + length <= maximumSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ public ReleasableBytesStreamOutput(BigArrays bigarrays) {
this(PageCacheRecycler.PAGE_SIZE_IN_BYTES, bigarrays);
}

public ReleasableBytesStreamOutput(BigArrays bigArrays, int maximumSize) {
super(PageCacheRecycler.PAGE_SIZE_IN_BYTES, bigArrays, maximumSize);
}

public ReleasableBytesStreamOutput(int expectedSize, BigArrays bigArrays) {
super(expectedSize, bigArrays);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,26 @@ public void writeIntLE(int i) throws IOException {
writeBytes(buffer, 0, 4);
}

/**
* Returns the number of bytes needed to encode the given int as a VInt.
*/
public static int bytesInVInt(int i) {
// Single byte shortcut: fits in 7 bits (i.e., values 0..127)
if (Integer.numberOfLeadingZeros(i) >= 25) {
return 1;
}
int byteCount = 1;
i >>>= 7;
while ((i & ~0x7F) != 0) {
byteCount++;
i >>>= 7;
}
if (i != 0) {
byteCount++;
}
return byteCount;
}

/**
* Writes an int in a variable-length format. Writes between one and
* five bytes. Smaller values take fewer bytes. Negative numbers
Expand All @@ -235,7 +255,7 @@ public void writeVInt(int i) throws IOException {
* In that case benchmarks of the method itself are faster but
* benchmarks of methods that use this method are slower.
* This is philosophically in line with vint in general - it biases
* twoards being simple and fast for smaller numbers.
* towards being simple and fast for smaller numbers.
*/
if (Integer.numberOfLeadingZeros(i) >= 25) {
writeByte((byte) i);
Expand Down Expand Up @@ -430,6 +450,27 @@ public void writeString(String str) throws IOException {
writeString(str, scratch.get(), 0);
}

/**
* Returns the number of bytes needed to encode the given string in UTF-8.
*/
public static int bytesInString(String str) {
int byteCount = 0;
final int charCount = str.length();
for (int i = 0; i < charCount; i++) {
final int c = str.charAt(i);
if (c <= 0x007F) {
byteCount += 1;
} else if (c > 0x07FF) {
byteCount += 3;
} else {
byteCount += 2;
}
}
// Add bytes for the length prefix (VInt)
byteCount += bytesInVInt(charCount);
return byteCount;
}

/**
* Write string as well as possibly the beginning of the given {@code buffer}. The given {@code buffer} will also be used when encoding
* the given string.
Expand Down Expand Up @@ -1137,6 +1178,17 @@ public <T> void writeCollection(final Collection<T> collection, final Writer<T>
}
}

/**
* Returns the number of bytes needed to encode the given string collectiom
*/
public static int bytesInStringCollection(Collection<String> collection) {
int byteCount = bytesInVInt(collection.size());
for (String item : collection) {
byteCount += bytesInString(item);
}
return byteCount;
}

/**
* Writes a collection of strings which can then be read using {@link StreamInput#readStringCollectionAsList} or another {@code
* readStringCollectionAs*} method. Make sure to read the collection back into the same type as was originally written.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.RelativeByteSizeValue;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
Expand Down Expand Up @@ -1006,7 +1007,8 @@ private void createSnapshotsDeletion(
SnapshotsServiceUtils.minCompatibleVersion(minimumNodeVersion, originalRepositoryData, snapshotIds),
originalRootBlobs,
blobStore().blobContainer(indicesPath()).children(OperationPurpose.SNAPSHOT_DATA),
originalRepositoryData
originalRepositoryData,
metadata.settings()
);
}));
}
Expand Down Expand Up @@ -1075,6 +1077,7 @@ class SnapshotsDeletion {
* {@link RepositoryData} blob newer than the one identified by {@link #originalRepositoryDataGeneration}.
*/
private final RepositoryData originalRepositoryData;
private final Settings settings;

/**
* Executor to use for all repository interactions.
Expand All @@ -1096,15 +1099,16 @@ class SnapshotsDeletion {
/**
* Tracks the shard-level blobs which can be deleted once all the metadata updates have completed.
*/
private final ShardBlobsToDelete shardBlobsToDelete = new ShardBlobsToDelete();
private final ShardBlobsToDelete shardBlobsToDelete;

SnapshotsDeletion(
Collection<SnapshotId> snapshotIds,
long originalRepositoryDataGeneration,
IndexVersion repositoryFormatIndexVersion,
Map<String, BlobMetadata> originalRootBlobs,
Map<String, BlobContainer> originalIndexContainers,
RepositoryData originalRepositoryData
RepositoryData originalRepositoryData,
Settings settings
) {
this.snapshotIds = snapshotIds;
this.originalRepositoryDataGeneration = originalRepositoryDataGeneration;
Expand All @@ -1113,6 +1117,9 @@ class SnapshotsDeletion {
this.originalRootBlobs = originalRootBlobs;
this.originalIndexContainers = originalIndexContainers;
this.originalRepositoryData = originalRepositoryData;
this.settings = settings;

shardBlobsToDelete = new ShardBlobsToDelete(this.settings);
}

// ---------------------------------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -1477,6 +1484,7 @@ private void cleanupUnlinkedShardLevelBlobs(ActionListener<Void> listener) {
listener.onResponse(null);
return;
}

snapshotExecutor.execute(ActionRunnable.wrap(listener, l -> {
try {
deleteFromContainer(OperationPurpose.SNAPSHOT_DATA, blobContainer(), filesToDelete);
Expand Down Expand Up @@ -1678,26 +1686,63 @@ void writeTo(StreamOutput out) throws IOException {
* need no further synchronization.
* </p>
*/
// If the size of this continues to be a problem even after compression, consider either a hard limit on its size (preferring leaked
// blobs over an OOME on the master) or else offloading it to disk or to the repository itself.
private final BytesStreamOutput shardDeleteResults = new ReleasableBytesStreamOutput(bigArrays);
private final BytesStreamOutput shardDeleteResults;

private int resultCount = 0;

private final StreamOutput compressed = new OutputStreamStreamOutput(
new BufferedOutputStream(
new DeflaterOutputStream(Streams.flushOnCloseStream(shardDeleteResults)),
DeflateCompressor.BUFFER_SIZE
)
);
private final StreamOutput compressed;

private final ArrayList<Closeable> resources = new ArrayList<>();

private final ShardGenerations.Builder shardGenerationsBuilder = ShardGenerations.builder();

ShardBlobsToDelete() {
resources.add(compressed);
resources.add(LeakTracker.wrap((Releasable) shardDeleteResults));
private final String SHARD_DELETE_RESULTS_MAX_SIZE_SETTING_NAME = "repositories.blobstore.max_shard_delete_results_size";
private final Setting<RelativeByteSizeValue> MAX_SHARD_DELETE_RESULTS_SIZE_SETTING = new Setting<>(
SHARD_DELETE_RESULTS_MAX_SIZE_SETTING_NAME,
"25%",
s -> RelativeByteSizeValue.parseRelativeByteSizeValue(s, SHARD_DELETE_RESULTS_MAX_SIZE_SETTING_NAME),
Setting.Property.NodeScope
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make this a dynamic setting too?

);

ShardBlobsToDelete(Settings settings) {
int maxSizeOfShardDeleteResults = calculateMaximumShardDeleteResultsSize(settings);
if (maxSizeOfShardDeleteResults > 0) {
this.shardDeleteResults = new ReleasableBytesStreamOutput(bigArrays, maxSizeOfShardDeleteResults);
this.compressed = new OutputStreamStreamOutput(
new BufferedOutputStream(
new DeflaterOutputStream(Streams.flushOnCloseStream(shardDeleteResults)),
DeflateCompressor.BUFFER_SIZE
)
);
resources.add(compressed);
resources.add(LeakTracker.wrap((Releasable) shardDeleteResults));
} else {
this.shardDeleteResults = null;
this.compressed = null;
}

}

/**
* Calculates the maximum size of the shardDeleteResults BytesStreamOutput
* The size should at most be 2GB, but no more than 25% of the total remaining heap space
* @return The maximum number of bytes the shardDeleteResults BytesStreamOutput can consume in the heap
*/
int calculateMaximumShardDeleteResultsSize(Settings settings) {
RelativeByteSizeValue configuredValue = MAX_SHARD_DELETE_RESULTS_SIZE_SETTING.get(settings);

long maxAllowedSizeInBytes;
if (configuredValue.isAbsolute()) {
maxAllowedSizeInBytes = configuredValue.getAbsolute().getBytes();
} else {
maxAllowedSizeInBytes = configuredValue.calculateValue(ByteSizeValue.ofBytes(Runtime.getRuntime().maxMemory()), null)
.getBytes();
}

if (maxAllowedSizeInBytes > Integer.MAX_VALUE) {
return Integer.MAX_VALUE;
}
return (int) maxAllowedSizeInBytes;
}

synchronized void addShardDeleteResult(
Expand All @@ -1706,10 +1751,33 @@ synchronized void addShardDeleteResult(
ShardGeneration newGeneration,
Collection<String> blobsToDelete
) {
if (compressed == null) {
// No output stream: skip writing, but still update generations
shardGenerationsBuilder.put(indexId, shardId, newGeneration);
return;
}
try {
shardGenerationsBuilder.put(indexId, shardId, newGeneration);
new ShardSnapshotMetaDeleteResult(Objects.requireNonNull(indexId.getId()), shardId, blobsToDelete).writeTo(compressed);
resultCount += 1;

// Calculate how much space we'd need to write
int bytesToWriteIndexId = StreamOutput.bytesInString(indexId.getId());
int bytesToWriteShardId = StreamOutput.bytesInVInt(shardId);
int bytesToWriteBlobsToDelete = StreamOutput.bytesInStringCollection(blobsToDelete);
int totalBytesRequired = bytesToWriteIndexId + bytesToWriteShardId + bytesToWriteBlobsToDelete;

// Only perform the write if there is capacity left
if (shardDeleteResults.hasCapacity(totalBytesRequired)) {
new ShardSnapshotMetaDeleteResult(Objects.requireNonNull(indexId.getId()), shardId, blobsToDelete).writeTo(compressed);
resultCount += 1;
} else {
logger.warn(
"Failure to clean up the following dangling blobs, {}, for index {} and shard {}",
blobsToDelete,
indexId,
shardId
);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't reasonably log every skipped blob at WARN like this - we've already captured several (compressed) GiB of blob names before getting to this point, so it wouldn't be surprising if there were several GiB more. We wouldn't expect users to go through these logs and delete the blobs manually - indeed we would strongly discourage that kind of behaviour.

Instead, let's log this at DEBUG and keep count of the number of blobs we skipped. Then at the end we can log at WARN how many blobs we've leaked.

Also nit it's not really a "failure", we're deliberately skipping this work because of resource constraints. We should mention in the user-facing WARN message that these dangling blobs will be cleaned up by subsequent deletes, and perhaps suggest that the master node needs a larger heap size to perform such large snapshot deletes in future.

compressed.close();
}
} catch (IOException e) {
assert false : e; // no IO actually happens here
throw new UncheckedIOException(e);
Expand All @@ -1721,6 +1789,10 @@ public ShardGenerations getUpdatedShardGenerations() {
}

public Iterator<String> getBlobPaths() {
if (compressed == null || shardDeleteResults == null) {
// No output stream: nothing to return
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise here I don't think we should change anything with respect to these values being null.

return Collections.emptyIterator();
}
final StreamInput input;
try {
compressed.close();
Expand Down Expand Up @@ -1750,6 +1822,9 @@ public Iterator<String> getBlobPaths() {

@Override
public void close() {
if (resources.isEmpty()) {
return;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise here, let's always track these resources even if the limit is zero.

try {
IOUtils.close(resources);
} catch (IOException e) {
Expand All @@ -1760,7 +1835,7 @@ public void close() {

// exposed for tests
int sizeInBytes() {
return shardDeleteResults.size();
return shardDeleteResults == null ? 0 : shardDeleteResults.size();
}
}

Expand Down
Loading