Skip to content

Commit 5a9d396

Browse files
authored
Account for reserved disk size (#103903)
1 parent 1637391 commit 5a9d396

File tree

7 files changed

+99
-12
lines changed

7 files changed

+99
-12
lines changed

docs/changelog/103903.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 103903
2+
summary: Account for reserved disk size
3+
area: Allocation
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/cluster/ClusterInfo.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -376,10 +376,6 @@ public void writeTo(StreamOutput out) throws IOException {
376376
out.writeCollection(shardIds);
377377
}
378378

379-
public long getTotal() {
380-
return total;
381-
}
382-
383379
public boolean containsShardId(ShardId shardId) {
384380
return shardIds.contains(shardId);
385381
}

server/src/main/java/org/elasticsearch/cluster/ClusterInfoSimulator.java

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,48 @@ public class ClusterInfoSimulator {
3535

3636
public ClusterInfoSimulator(RoutingAllocation allocation) {
3737
this.allocation = allocation;
38-
this.leastAvailableSpaceUsage = new HashMap<>(allocation.clusterInfo().getNodeLeastAvailableDiskUsages());
39-
this.mostAvailableSpaceUsage = new HashMap<>(allocation.clusterInfo().getNodeMostAvailableDiskUsages());
38+
this.leastAvailableSpaceUsage = getAdjustedDiskSpace(allocation, allocation.clusterInfo().getNodeLeastAvailableDiskUsages());
39+
this.mostAvailableSpaceUsage = getAdjustedDiskSpace(allocation, allocation.clusterInfo().getNodeMostAvailableDiskUsages());
4040
this.shardSizes = new CopyOnFirstWriteMap<>(allocation.clusterInfo().shardSizes);
4141
this.shardDataSetSizes = Map.copyOf(allocation.clusterInfo().shardDataSetSizes);
4242
this.dataPath = Map.copyOf(allocation.clusterInfo().dataPath);
4343
}
4444

45+
/**
46+
* Cluster info contains a reserved space that is necessary to finish initializing shards (that are currently in progress).
47+
* for all initializing shards sum(expected size) = reserved space + already used space
48+
* This deducts already used space from disk usage as when shard start is simulated it is going to add entire expected shard size.
49+
*/
50+
private static Map<String, DiskUsage> getAdjustedDiskSpace(RoutingAllocation allocation, Map<String, DiskUsage> diskUsage) {
51+
var diskUsageCopy = new HashMap<>(diskUsage);
52+
for (var entry : diskUsageCopy.entrySet()) {
53+
var nodeId = entry.getKey();
54+
var usage = entry.getValue();
55+
56+
var reserved = allocation.clusterInfo().getReservedSpace(nodeId, usage.path());
57+
if (reserved.total() == 0) {
58+
continue;
59+
}
60+
var node = allocation.routingNodes().node(nodeId);
61+
if (node == null) {
62+
continue;
63+
}
64+
65+
long adjustment = 0;
66+
for (ShardId shardId : reserved.shardIds()) {
67+
var shard = node.getByShardId(shardId);
68+
if (shard != null) {
69+
var expectedSize = getExpectedShardSize(shard, 0, allocation);
70+
adjustment += expectedSize;
71+
}
72+
}
73+
adjustment -= reserved.total();
74+
75+
entry.setValue(updateWithFreeBytes(usage, adjustment));
76+
}
77+
return diskUsageCopy;
78+
}
79+
4580
/**
4681
* This method updates disk usage to reflect shard relocations and new replica initialization.
4782
* In case of a single data path both mostAvailableSpaceUsage and leastAvailableSpaceUsage are update to reflect the change.

server/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdMonitor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ public void onNewInfo(ClusterInfo info) {
232232
}
233233
}
234234

235-
final long reservedSpace = info.getReservedSpace(usage.getNodeId(), usage.getPath()).getTotal();
235+
final long reservedSpace = info.getReservedSpace(usage.getNodeId(), usage.getPath()).total();
236236
final DiskUsage usageWithReservedSpace = new DiskUsage(
237237
usage.getNodeId(),
238238
usage.getNodeName(),

server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ public static long sizeOfUnaccountedShards(
125125
) {
126126
// Account for reserved space wherever it is available
127127
final ClusterInfo.ReservedSpace reservedSpace = clusterInfo.getReservedSpace(node.nodeId(), dataPath);
128-
long totalSize = reservedSpace.getTotal();
128+
long totalSize = reservedSpace.total();
129129
// NB this counts all shards on the node when the ClusterInfoService retrieved the node stats, which may include shards that are
130130
// no longer initializing because their recovery failed or was cancelled.
131131

server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/ClusterInfoSimulatorTests.java

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import java.util.HashMap;
4343
import java.util.List;
4444
import java.util.Map;
45+
import java.util.Set;
4546

4647
import static org.elasticsearch.cluster.ClusterInfo.shardIdentifierFromRouting;
4748
import static org.elasticsearch.cluster.metadata.IndexMetadata.INDEX_RESIZE_SOURCE_NAME_KEY;
@@ -158,6 +159,53 @@ public void testInitializeNewReplica() {
158159
);
159160
}
160161

162+
public void testInitializeNewReplicaWithReservedSpace() {
163+
164+
var recoveredSize = 70;
165+
var remainingSize = 30;
166+
var totalShardSize = recoveredSize + remainingSize;
167+
168+
var indexMetadata = IndexMetadata.builder("my-index").settings(indexSettings(IndexVersion.current(), 1, 1)).build();
169+
var existingPrimary = newShardRouting(new ShardId(indexMetadata.getIndex(), 0), "node-0", true, STARTED);
170+
var newReplica = newShardRouting(
171+
new ShardId(indexMetadata.getIndex(), 0),
172+
"node-1",
173+
false,
174+
INITIALIZING,
175+
RecoverySource.PeerRecoverySource.INSTANCE
176+
);
177+
178+
var initialClusterInfo = new ClusterInfoTestBuilder() //
179+
.withNode("node-0", new DiskUsageBuilder("/data", 1000, 1000 - totalShardSize))
180+
.withNode("node-1", new DiskUsageBuilder("/data", 1000, 1000 - recoveredSize))
181+
.withShard(existingPrimary, totalShardSize)
182+
.withReservedSpace("node-1", "/data", remainingSize, newReplica.shardId())
183+
.build();
184+
185+
var state = ClusterState.builder(ClusterName.DEFAULT)
186+
.metadata(Metadata.builder().put(indexMetadata, false))
187+
.routingTable(
188+
RoutingTable.builder()
189+
.add(IndexRoutingTable.builder(indexMetadata.getIndex()).addShard(existingPrimary).addShard(newReplica))
190+
)
191+
.build();
192+
var allocation = createRoutingAllocation(state, initialClusterInfo, SnapshotShardSizeInfo.EMPTY);
193+
var simulator = new ClusterInfoSimulator(allocation);
194+
simulator.simulateShardStarted(newReplica);
195+
196+
assertThat(
197+
simulator.getClusterInfo(),
198+
equalTo(
199+
new ClusterInfoTestBuilder() //
200+
.withNode("node-0", new DiskUsageBuilder("/data", 1000, 1000 - totalShardSize))
201+
.withNode("node-1", new DiskUsageBuilder("/data", 1000, 1000 - totalShardSize))
202+
.withShard(existingPrimary, totalShardSize)
203+
.withShard(newReplica, totalShardSize)
204+
.build()
205+
)
206+
);
207+
}
208+
161209
public void testRelocateShard() {
162210

163211
var fromNodeId = "node-0";
@@ -631,6 +679,11 @@ public ClusterInfoTestBuilder withShard(ShardRouting shard, long size) {
631679
return this;
632680
}
633681

682+
public ClusterInfoTestBuilder withReservedSpace(String nodeId, String path, long size, ShardId... shardIds) {
683+
reservedSpace.put(new NodeAndPath(nodeId, nodeId + path), new ReservedSpace(size, Set.of(shardIds)));
684+
return this;
685+
}
686+
634687
public ClusterInfo build() {
635688
return new ClusterInfo(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, Map.of(), Map.of(), reservedSpace);
636689
}

server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -905,8 +905,7 @@ public void testAccountForSizeOfMisplacedShardsDuringNewComputation() {
905905
IndexRoutingTable.builder(indexMetadata2.getIndex())
906906
.addShard(newShardRouting(index2ShardId, "node-1", true, INITIALIZING, index2SnapshotRecoverySource))
907907
);
908-
// TODO enable in https://github.com/elastic/elasticsearch/pull/103903
909-
if (false && randomBoolean()) {
908+
if (randomBoolean()) {
910909
// Shard is 75% downloaded
911910
clusterInfoBuilder //
912911
.withNodeUsedSpace("node-1", ByteSizeValue.ofMb(768).getBytes())
@@ -920,8 +919,7 @@ public void testAccountForSizeOfMisplacedShardsDuringNewComputation() {
920919
IndexRoutingTable.builder(indexMetadata2.getIndex())
921920
.addShard(newShardRouting(index2ShardId, "node-2", true, INITIALIZING, index2SnapshotRecoverySource))
922921
);
923-
// TODO enable in https://github.com/elastic/elasticsearch/pull/103903
924-
if (false && randomBoolean()) {
922+
if (randomBoolean()) {
925923
// Shard is 75% downloaded
926924
clusterInfoBuilder //
927925
.withNodeUsedSpace("node-2", ByteSizeValue.ofMb(768).getBytes())

0 commit comments

Comments
 (0)