Skip to content

Commit acece61

Browse files
authored
Fallback to the actual shard size when forecast is not available (#93461)
1 parent 1fb3a1b commit acece61

File tree

4 files changed

+177
-80
lines changed

4 files changed

+177
-80
lines changed

docs/changelog/93461.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 93461
2+
summary: Fallback to the actual shard size when forecast is not available
3+
area: Allocation
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
import java.util.Iterator;
5454
import java.util.List;
5555
import java.util.Map;
56-
import java.util.OptionalLong;
5756
import java.util.Set;
5857
import java.util.function.BiFunction;
5958
import java.util.stream.StreamSupport;
@@ -321,26 +320,26 @@ float minWeightDelta(Balancer balancer, String index) {
321320
* A {@link Balancer}
322321
*/
323322
public static class Balancer {
324-
private final Map<String, ModelNode> nodes;
325323
private final WriteLoadForecaster writeLoadForecaster;
326324
private final RoutingAllocation allocation;
327325
private final RoutingNodes routingNodes;
326+
private final Metadata metadata;
328327
private final WeightFunction weight;
329328

330329
private final float threshold;
331-
private final Metadata metadata;
332330
private final float avgShardsPerNode;
333331
private final double avgWriteLoadPerNode;
334332
private final double avgDiskUsageInBytesPerNode;
333+
private final Map<String, ModelNode> nodes;
335334
private final NodeSorter sorter;
336335

337336
public Balancer(WriteLoadForecaster writeLoadForecaster, RoutingAllocation allocation, WeightFunction weight, float threshold) {
338337
this.writeLoadForecaster = writeLoadForecaster;
339338
this.allocation = allocation;
340-
this.weight = weight;
341-
this.threshold = threshold;
342339
this.routingNodes = allocation.routingNodes();
343340
this.metadata = allocation.metadata();
341+
this.weight = weight;
342+
this.threshold = threshold;
344343
avgShardsPerNode = ((float) metadata.getTotalNumberOfShards()) / routingNodes.size();
345344
avgWriteLoadPerNode = getTotalWriteLoad(writeLoadForecaster, metadata) / routingNodes.size();
346345
avgDiskUsageInBytesPerNode = ((double) getTotalDiskUsageInBytes(allocation.clusterInfo(), metadata) / routingNodes.size());
@@ -371,15 +370,10 @@ private static long getTotalDiskUsageInBytes(ClusterInfo clusterInfo, Metadata m
371370

372371
// Visible for testing
373372
static long getIndexDiskUsageInBytes(ClusterInfo clusterInfo, IndexMetadata indexMetadata) {
374-
OptionalLong forecastedShardSizeInBytes = indexMetadata.getForecastedShardSizeInBytes();
375-
final long indexDiskUsageInBytes;
376-
if (forecastedShardSizeInBytes.isPresent()) {
377-
int i = numberOfCopies(indexMetadata);
378-
indexDiskUsageInBytes = forecastedShardSizeInBytes.getAsLong() * i;
379-
} else {
380-
indexDiskUsageInBytes = getIndexDiskUsageInBytesFromClusterInfo(clusterInfo, indexMetadata);
381-
}
382-
return indexDiskUsageInBytes;
373+
var forecastedShardSizeInBytes = indexMetadata.getForecastedShardSizeInBytes();
374+
return forecastedShardSizeInBytes.isPresent()
375+
? forecastedShardSizeInBytes.getAsLong() * numberOfCopies(indexMetadata)
376+
: getIndexDiskUsageInBytesFromClusterInfo(clusterInfo, indexMetadata);
383377
}
384378

385379
private static long getIndexDiskUsageInBytesFromClusterInfo(ClusterInfo clusterInfo, IndexMetadata indexMetadata) {
@@ -408,6 +402,10 @@ private static long getIndexDiskUsageInBytesFromClusterInfo(ClusterInfo clusterI
408402
return shardCount == 0 ? 0 : (totalSizeInBytes / shardCount) * numberOfCopies(indexMetadata);
409403
}
410404

405+
private static long getShardDiskUsageInBytes(ShardRouting shardRouting, IndexMetadata indexMetadata, ClusterInfo clusterInfo) {
406+
return indexMetadata.getForecastedShardSizeInBytes().orElseGet(() -> clusterInfo.getShardSize(shardRouting, 0L));
407+
}
408+
411409
private static int numberOfCopies(IndexMetadata indexMetadata) {
412410
return indexMetadata.getNumberOfShards() * (1 + indexMetadata.getNumberOfReplicas());
413411
}
@@ -416,6 +414,14 @@ private double getShardWriteLoad(String index) {
416414
return writeLoadForecaster.getForecastedWriteLoad(metadata.index(index)).orElse(0.0);
417415
}
418416

417+
private double diskUsageInBytesPerShard(String index) {
418+
var indexMetadata = metadata.index(index);
419+
var forecastedShardSizeInBytes = indexMetadata.getForecastedShardSizeInBytes();
420+
return forecastedShardSizeInBytes.isPresent()
421+
? forecastedShardSizeInBytes.getAsLong()
422+
: (double) getIndexDiskUsageInBytesFromClusterInfo(allocation.clusterInfo(), indexMetadata) / numberOfCopies(indexMetadata);
423+
}
424+
419425
/**
420426
* Returns an array view on the nodes in the balancer. Nodes should not be removed from this list.
421427
*/
@@ -445,10 +451,6 @@ public double avgDiskUsageInBytesPerNode() {
445451
return avgDiskUsageInBytesPerNode;
446452
}
447453

448-
public double diskUsageInBytesPerShard(String index) {
449-
return metadata.index(index).getForecastedShardSizeInBytes().orElse(0);
450-
}
451-
452454
/**
453455
* Returns a new {@link NodeSorter} that sorts the nodes based on their
454456
* current weight with respect to the index passed to the sorter. The
@@ -962,7 +964,7 @@ private Decision decideCanForceAllocateForVacate(ShardRouting shardRouting, Rout
962964
private Map<String, ModelNode> buildModelFromAssigned() {
963965
Map<String, ModelNode> nodes = Maps.newMapWithExpectedSize(routingNodes.size());
964966
for (RoutingNode rn : routingNodes) {
965-
ModelNode node = new ModelNode(writeLoadForecaster, metadata, rn);
967+
ModelNode node = new ModelNode(writeLoadForecaster, metadata, allocation.clusterInfo(), rn);
966968
nodes.put(rn.nodeId(), node);
967969
for (ShardRouting shard : rn) {
968970
assert rn.nodeId().equals(shard.currentNodeId());
@@ -1254,12 +1256,14 @@ static class ModelNode implements Iterable<ModelIndex> {
12541256
private double diskUsageInBytes = 0.0;
12551257
private final WriteLoadForecaster writeLoadForecaster;
12561258
private final Metadata metadata;
1259+
private final ClusterInfo clusterInfo;
12571260
private final RoutingNode routingNode;
12581261
private final Map<String, ModelIndex> indices;
12591262

1260-
ModelNode(WriteLoadForecaster writeLoadForecaster, Metadata metadata, RoutingNode routingNode) {
1263+
ModelNode(WriteLoadForecaster writeLoadForecaster, Metadata metadata, ClusterInfo clusterInfo, RoutingNode routingNode) {
12611264
this.writeLoadForecaster = writeLoadForecaster;
12621265
this.metadata = metadata;
1266+
this.clusterInfo = clusterInfo;
12631267
this.routingNode = routingNode;
12641268
this.indices = Maps.newMapWithExpectedSize(routingNode.size() + 10);// some extra to account for shard movements
12651269
}
@@ -1305,7 +1309,7 @@ public void addShard(ShardRouting shard) {
13051309
indices.computeIfAbsent(shard.getIndexName(), t -> new ModelIndex()).addShard(shard);
13061310
IndexMetadata indexMetadata = metadata.index(shard.index());
13071311
writeLoad += writeLoadForecaster.getForecastedWriteLoad(indexMetadata).orElse(0.0);
1308-
diskUsageInBytes += indexMetadata.getForecastedShardSizeInBytes().orElse(0);
1312+
diskUsageInBytes += Balancer.getShardDiskUsageInBytes(shard, indexMetadata, clusterInfo);
13091313
numShards++;
13101314
}
13111315

@@ -1319,7 +1323,7 @@ public void removeShard(ShardRouting shard) {
13191323
}
13201324
IndexMetadata indexMetadata = metadata.index(shard.index());
13211325
writeLoad -= writeLoadForecaster.getForecastedWriteLoad(indexMetadata).orElse(0.0);
1322-
diskUsageInBytes -= indexMetadata.getForecastedShardSizeInBytes().orElse(0);
1326+
diskUsageInBytes -= Balancer.getShardDiskUsageInBytes(shard, indexMetadata, clusterInfo);
13231327
numShards--;
13241328
}
13251329

server/src/test/java/org/elasticsearch/cluster/routing/allocation/ExpectedShardSizeAllocationTests.java

Lines changed: 34 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,21 @@
1313
import org.elasticsearch.Version;
1414
import org.elasticsearch.action.ActionListener;
1515
import org.elasticsearch.cluster.ClusterInfo;
16+
import org.elasticsearch.cluster.ClusterName;
1617
import org.elasticsearch.cluster.ClusterState;
1718
import org.elasticsearch.cluster.ESAllocationTestCase;
1819
import org.elasticsearch.cluster.TestShardRoutingRoleStrategies;
1920
import org.elasticsearch.cluster.metadata.IndexMetadata;
2021
import org.elasticsearch.cluster.metadata.Metadata;
2122
import org.elasticsearch.cluster.node.DiscoveryNodes;
2223
import org.elasticsearch.cluster.routing.RoutingTable;
23-
import org.elasticsearch.cluster.routing.ShardRouting;
2424
import org.elasticsearch.cluster.routing.ShardRoutingState;
2525
import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands;
2626
import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand;
2727
import org.elasticsearch.common.settings.Settings;
28+
import org.elasticsearch.index.shard.ShardId;
29+
30+
import java.util.Map;
2831

2932
import static org.elasticsearch.cluster.routing.RoutingNodesHelper.shardsWithState;
3033
import static org.hamcrest.Matchers.equalTo;
@@ -35,37 +38,18 @@ public class ExpectedShardSizeAllocationTests extends ESAllocationTestCase {
3538

3639
public void testInitializingHasExpectedSize() {
3740
final long byteSize = randomIntBetween(0, Integer.MAX_VALUE);
38-
AllocationService strategy = createAllocationService(Settings.EMPTY, () -> new ClusterInfo() {
39-
@Override
40-
public Long getShardSize(ShardRouting shardRouting) {
41-
if (shardRouting.getIndexName().equals("test") && shardRouting.shardId().getId() == 0) {
42-
return byteSize;
43-
}
44-
return null;
45-
}
46-
});
41+
final ClusterInfo clusterInfo = createClusterInfoWith(new ShardId("test", "_na_", 0), byteSize);
42+
AllocationService strategy = createAllocationService(Settings.EMPTY, () -> clusterInfo);
4743

4844
logger.info("Building initial routing table");
45+
var indexMetadata = IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1).build();
4946

50-
Metadata metadata = Metadata.builder()
51-
.put(
52-
IndexMetadata.builder("test")
53-
.settings(
54-
settings(Version.CURRENT).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
55-
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
56-
)
57-
)
58-
.build();
59-
60-
RoutingTable routingTable = RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY)
61-
.addAsNew(metadata.index("test"))
47+
ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
48+
.metadata(Metadata.builder().put(indexMetadata, false))
49+
.routingTable(RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY).addAsNew(indexMetadata))
50+
.nodes(DiscoveryNodes.builder().add(newNode("node1")))
6251
.build();
63-
64-
ClusterState clusterState = ClusterState.builder(
65-
org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)
66-
).metadata(metadata).routingTable(routingTable).build();
6752
logger.info("Adding one node and performing rerouting");
68-
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1"))).build();
6953
clusterState = strategy.reroute(clusterState, "reroute", ActionListener.noop());
7054

7155
assertEquals(1, clusterState.getRoutingNodes().node("node1").numberOfShardsWithState(ShardRoutingState.INITIALIZING));
@@ -92,43 +76,26 @@ public Long getShardSize(ShardRouting shardRouting) {
9276

9377
public void testExpectedSizeOnMove() {
9478
final long byteSize = randomIntBetween(0, Integer.MAX_VALUE);
95-
final AllocationService allocation = createAllocationService(Settings.EMPTY, () -> new ClusterInfo() {
96-
@Override
97-
public Long getShardSize(ShardRouting shardRouting) {
98-
if (shardRouting.getIndexName().equals("test") && shardRouting.shardId().getId() == 0) {
99-
return byteSize;
100-
}
101-
return null;
102-
}
103-
});
79+
final ClusterInfo clusterInfo = createClusterInfoWith(new ShardId("test", "_na_", 0), byteSize);
80+
final AllocationService allocation = createAllocationService(Settings.EMPTY, () -> clusterInfo);
10481
logger.info("creating an index with 1 shard, no replica");
105-
Metadata metadata = Metadata.builder()
106-
.put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0))
107-
.build();
108-
RoutingTable routingTable = RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY)
109-
.addAsNew(metadata.index("test"))
82+
var indexMetadata = IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(0).build();
83+
ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT)
84+
.metadata(Metadata.builder().put(indexMetadata, false))
85+
.routingTable(RoutingTable.builder(TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY).addAsNew(indexMetadata))
86+
.nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")))
11087
.build();
111-
ClusterState clusterState = ClusterState.builder(
112-
org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)
113-
).metadata(metadata).routingTable(routingTable).build();
11488

11589
logger.info("adding two nodes and performing rerouting");
116-
clusterState = ClusterState.builder(clusterState)
117-
.nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2")))
118-
.build();
11990
clusterState = allocation.reroute(clusterState, "reroute", ActionListener.noop());
12091

12192
logger.info("start primary shard");
12293
clusterState = startInitializingShardsAndReroute(allocation, clusterState);
12394

12495
logger.info("move the shard");
12596
String existingNodeId = clusterState.routingTable().index("test").shard(0).primaryShard().currentNodeId();
126-
String toNodeId;
127-
if ("node1".equals(existingNodeId)) {
128-
toNodeId = "node2";
129-
} else {
130-
toNodeId = "node1";
131-
}
97+
String toNodeId = "node1".equals(existingNodeId) ? "node2" : "node1";
98+
13299
AllocationService.CommandsResult commandsResult = allocation.reroute(
133100
clusterState,
134101
new AllocationCommands(new MoveAllocationCommand("test", 0, existingNodeId, toNodeId)),
@@ -152,4 +119,18 @@ public Long getShardSize(ShardRouting shardRouting) {
152119
assertThat(clusterState.getRoutingNodes().node(toNodeId).iterator().next().state(), equalTo(ShardRoutingState.STARTED));
153120
assertEquals(clusterState.getRoutingNodes().node(toNodeId).iterator().next().getExpectedShardSize(), -1);
154121
}
122+
123+
private static ClusterInfo createClusterInfoWith(ShardId shardId, long size) {
124+
return new ClusterInfo(
125+
Map.of(),
126+
Map.of(),
127+
Map.ofEntries(
128+
Map.entry(ClusterInfo.shardIdentifierFromRouting(shardId, true), size),
129+
Map.entry(ClusterInfo.shardIdentifierFromRouting(shardId, false), size)
130+
),
131+
Map.of(),
132+
Map.of(),
133+
Map.of()
134+
);
135+
}
155136
}

0 commit comments

Comments
 (0)