Skip to content

Commit cb530cf

Browse files
authored
Make Shard Limit Validator Tier specific (#19532)
Signed-off-by: Gagan Singh Saini <[email protected]>
1 parent 4ab0ae9 commit cb530cf

File tree

16 files changed

+741
-152
lines changed

16 files changed

+741
-152
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55

66
## [Unreleased 3.x]
77
### Added
8+
- Add seperate shard limit validation for local and remote indices ([#19532](https://github.com/opensearch-project/OpenSearch/pull/19532))
89
- Use Lucene `pack` method for `half_float` and `usigned_long` when using `ApproximatePointRangeQuery`.
910
- Add a mapper for context aware segments grouping criteria ([#19233](https://github.com/opensearch-project/OpenSearch/pull/19233))
1011
- Return full error for GRPC error response ([#19568](https://github.com/opensearch-project/OpenSearch/pull/19568))

server/src/internalClusterTest/java/org/opensearch/cluster/routing/allocation/decider/ShardsLimitAllocationDeciderIT.java

Lines changed: 166 additions & 26 deletions
Large diffs are not rendered by default.

server/src/internalClusterTest/java/org/opensearch/cluster/shards/ClusterShardLimitIT.java

Lines changed: 166 additions & 43 deletions
Large diffs are not rendered by default.

server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,8 @@ public Iterator<Setting<?>> settings() {
10461046

10471047
private final int indexTotalShardsPerNodeLimit;
10481048
private final int indexTotalPrimaryShardsPerNodeLimit;
1049+
private final int indexTotalRemoteCapableShardsPerNodeLimit;
1050+
private final int indexTotalRemoteCapablePrimaryShardsPerNodeLimit;
10491051
private final boolean isAppendOnlyIndex;
10501052

10511053
private final Context context;
@@ -1080,6 +1082,8 @@ private IndexMetadata(
10801082
final boolean isSystem,
10811083
final int indexTotalShardsPerNodeLimit,
10821084
final int indexTotalPrimaryShardsPerNodeLimit,
1085+
final int indexTotalRemoteCapableShardsPerNodeLimit,
1086+
final int indexTotalRemoteCapablePrimaryShardsPerNodeLimit,
10831087
boolean isAppendOnlyIndex,
10841088
final Context context,
10851089
final IngestionStatus ingestionStatus
@@ -1120,6 +1124,8 @@ private IndexMetadata(
11201124
this.isRemoteSnapshot = IndexModule.Type.REMOTE_SNAPSHOT.match(this.settings);
11211125
this.indexTotalShardsPerNodeLimit = indexTotalShardsPerNodeLimit;
11221126
this.indexTotalPrimaryShardsPerNodeLimit = indexTotalPrimaryShardsPerNodeLimit;
1127+
this.indexTotalRemoteCapableShardsPerNodeLimit = indexTotalRemoteCapableShardsPerNodeLimit;
1128+
this.indexTotalRemoteCapablePrimaryShardsPerNodeLimit = indexTotalRemoteCapablePrimaryShardsPerNodeLimit;
11231129
this.isAppendOnlyIndex = isAppendOnlyIndex;
11241130
this.context = context;
11251131
this.ingestionStatus = ingestionStatus;
@@ -1334,10 +1340,18 @@ public int getIndexTotalShardsPerNodeLimit() {
13341340
return this.indexTotalShardsPerNodeLimit;
13351341
}
13361342

1343+
public int getIndexTotalRemoteCapableShardsPerNodeLimit() {
1344+
return this.indexTotalRemoteCapableShardsPerNodeLimit;
1345+
}
1346+
13371347
public int getIndexTotalPrimaryShardsPerNodeLimit() {
13381348
return this.indexTotalPrimaryShardsPerNodeLimit;
13391349
}
13401350

1351+
public int getIndexTotalRemoteCapablePrimaryShardsPerNodeLimit() {
1352+
return this.indexTotalRemoteCapablePrimaryShardsPerNodeLimit;
1353+
}
1354+
13411355
public boolean isAppendOnlyIndex() {
13421356
return this.isAppendOnlyIndex;
13431357
}
@@ -2175,6 +2189,10 @@ public IndexMetadata build() {
21752189
final int indexTotalPrimaryShardsPerNodeLimit = ShardsLimitAllocationDecider.INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.get(
21762190
settings
21772191
);
2192+
final int indexTotalRemoteCapableShardsPerNodeLimit =
2193+
ShardsLimitAllocationDecider.INDEX_TOTAL_REMOTE_CAPABLE_SHARDS_PER_NODE_SETTING.get(settings);
2194+
final int indexTotalRemoteCapablePrimaryShardsPerNodeLimit =
2195+
ShardsLimitAllocationDecider.INDEX_TOTAL_REMOTE_CAPABLE_PRIMARY_SHARDS_PER_NODE_SETTING.get(settings);
21782196
final boolean isAppendOnlyIndex = INDEX_APPEND_ONLY_ENABLED_SETTING.get(settings);
21792197

21802198
final String uuid = settings.get(SETTING_INDEX_UUID, INDEX_UUID_NA_VALUE);
@@ -2212,6 +2230,8 @@ public IndexMetadata build() {
22122230
isSystem,
22132231
indexTotalShardsPerNodeLimit,
22142232
indexTotalPrimaryShardsPerNodeLimit,
2233+
indexTotalRemoteCapableShardsPerNodeLimit,
2234+
indexTotalRemoteCapablePrimaryShardsPerNodeLimit,
22152235
isAppendOnlyIndex,
22162236
context,
22172237
ingestionStatus

server/src/main/java/org/opensearch/cluster/metadata/Metadata.java

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import org.opensearch.cluster.block.ClusterBlockLevel;
4949
import org.opensearch.cluster.coordination.CoordinationMetadata;
5050
import org.opensearch.cluster.decommission.DecommissionAttributeMetadata;
51+
import org.opensearch.cluster.routing.RoutingPool;
5152
import org.opensearch.common.Nullable;
5253
import org.opensearch.common.UUIDs;
5354
import org.opensearch.common.annotation.PublicApi;
@@ -270,7 +271,8 @@ static Custom fromXContent(XContentParser parser, String name) throws IOExceptio
270271
private final Map<String, Custom> customs;
271272

272273
private final transient int totalNumberOfShards; // Transient ? not serializable anyway?
273-
private final int totalOpenIndexShards;
274+
private final int totalOpenLocalOnlyIndexShards;
275+
private final int totalOpenRemoteCapableIndexShards;
274276

275277
private final String[] allIndices;
276278
private final String[] visibleIndices;
@@ -315,15 +317,21 @@ static Custom fromXContent(XContentParser parser, String name) throws IOExceptio
315317
this.customs = Collections.unmodifiableMap(customs);
316318
this.templates = new TemplatesMetadata(templates);
317319
int totalNumberOfShards = 0;
318-
int totalOpenIndexShards = 0;
320+
int totalOpenLocalOnlyIndexShards = 0;
321+
int totalOpenRemoteCapableIndexShards = 0;
319322
for (IndexMetadata cursor : indices.values()) {
320323
totalNumberOfShards += cursor.getTotalNumberOfShards();
321324
if (IndexMetadata.State.OPEN.equals(cursor.getState())) {
322-
totalOpenIndexShards += cursor.getTotalNumberOfShards();
325+
if (RoutingPool.getIndexPool(cursor) == RoutingPool.REMOTE_CAPABLE) {
326+
totalOpenRemoteCapableIndexShards += cursor.getTotalNumberOfShards();
327+
} else {
328+
totalOpenLocalOnlyIndexShards += cursor.getTotalNumberOfShards();
329+
}
323330
}
324331
}
325332
this.totalNumberOfShards = totalNumberOfShards;
326-
this.totalOpenIndexShards = totalOpenIndexShards;
333+
this.totalOpenLocalOnlyIndexShards = totalOpenLocalOnlyIndexShards;
334+
this.totalOpenRemoteCapableIndexShards = totalOpenRemoteCapableIndexShards;
327335

328336
this.allIndices = allIndices;
329337
this.visibleIndices = visibleIndices;
@@ -905,7 +913,16 @@ public int getTotalNumberOfShards() {
905913
* @return The total number of open shards from all indices.
906914
*/
907915
public int getTotalOpenIndexShards() {
908-
return this.totalOpenIndexShards;
916+
return this.totalOpenLocalOnlyIndexShards;
917+
}
918+
919+
/**
920+
* Gets the total number of open remote capable shards from all indices. Includes
921+
* replicas, but does not include shards that are part of closed indices.
922+
* @return The total number of open shards from all indices.
923+
*/
924+
public int getTotalOpenRemoteCapableIndexShards() {
925+
return this.totalOpenRemoteCapableIndexShards;
909926
}
910927

911928
/**

server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@
155155
import static org.opensearch.cluster.metadata.Metadata.DEFAULT_REPLICA_COUNT_SETTING;
156156
import static org.opensearch.cluster.metadata.MetadataIndexTemplateService.findContextTemplateName;
157157
import static org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider.INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING;
158+
import static org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider.INDEX_TOTAL_REMOTE_CAPABLE_PRIMARY_SHARDS_PER_NODE_SETTING;
158159
import static org.opensearch.cluster.service.ClusterManagerTask.CREATE_INDEX;
159160
import static org.opensearch.index.IndexModule.INDEX_STORE_TYPE_SETTING;
160161
import static org.opensearch.index.IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING;
@@ -1867,17 +1868,22 @@ public static void validateRefreshIntervalSettings(Settings requestSettings, Clu
18671868
public static void validateIndexTotalPrimaryShardsPerNodeSetting(Settings indexSettings) {
18681869
// Get the setting value
18691870
int indexPrimaryShardsPerNode = INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.get(indexSettings);
1871+
int indexRemoteCapablePrimaryShardsPerNode = INDEX_TOTAL_REMOTE_CAPABLE_PRIMARY_SHARDS_PER_NODE_SETTING.get(indexSettings);
18701872

18711873
// If default value (-1), no validation needed
1872-
if (indexPrimaryShardsPerNode == -1) {
1874+
if (indexPrimaryShardsPerNode == -1 && indexRemoteCapablePrimaryShardsPerNode == -1) {
18731875
return;
18741876
}
18751877

18761878
// Check if remote store is enabled
18771879
boolean isRemoteStoreEnabled = IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexSettings);
18781880
if (!isRemoteStoreEnabled) {
18791881
throw new IllegalArgumentException(
1880-
"Setting [" + INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.getKey() + "] can only be used with remote store enabled clusters"
1882+
"Setting ["
1883+
+ INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.getKey()
1884+
+ "] or ["
1885+
+ INDEX_TOTAL_REMOTE_CAPABLE_PRIMARY_SHARDS_PER_NODE_SETTING.getKey()
1886+
+ "] can only be used with remote store enabled clusters"
18811887
);
18821888
}
18831889
}

server/src/main/java/org/opensearch/cluster/metadata/MetadataUpdateSettingsService.java

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
import static org.opensearch.cluster.metadata.MetadataCreateIndexService.validateTranslogFlushIntervalSettingsForCompositeIndex;
8484
import static org.opensearch.cluster.metadata.MetadataIndexTemplateService.findComponentTemplate;
8585
import static org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider.INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING;
86+
import static org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider.INDEX_TOTAL_REMOTE_CAPABLE_PRIMARY_SHARDS_PER_NODE_SETTING;
8687
import static org.opensearch.cluster.service.ClusterManagerTask.UPDATE_SETTINGS;
8788
import static org.opensearch.common.settings.AbstractScopedSettings.ARCHIVED_SETTINGS_PREFIX;
8889
import static org.opensearch.index.IndexSettings.same;
@@ -272,15 +273,11 @@ public ClusterState execute(ClusterState currentState) {
272273
}
273274

274275
// Verify that this won't take us over the cluster shard limit.
275-
int totalNewShards = Arrays.stream(request.indices())
276-
.mapToInt(i -> getTotalNewShards(i, currentState, updatedNumberOfReplicas))
277-
.sum();
278-
Optional<String> error = shardLimitValidator.checkShardLimit(totalNewShards, currentState);
279-
if (error.isPresent()) {
280-
ValidationException ex = new ValidationException();
281-
ex.addValidationError(error.get());
282-
throw ex;
283-
}
276+
shardLimitValidator.validateShardLimitForIndices(
277+
request.indices(),
278+
currentState,
279+
index -> getTotalNewShards(index, currentState, updatedNumberOfReplicas)
280+
);
284281

285282
/*
286283
* We do not update the in-sync allocation IDs as they will be removed upon the first index operation which makes
@@ -315,15 +312,12 @@ public ClusterState execute(ClusterState currentState) {
315312
}
316313

317314
// Verify that this won't take us over the cluster shard limit.
318-
int totalNewShards = Arrays.stream(request.indices())
319-
.mapToInt(i -> getTotalNewShards(i, currentState, updatedNumberOfSearchReplicas))
320-
.sum();
321-
Optional<String> error = shardLimitValidator.checkShardLimit(totalNewShards, currentState);
322-
if (error.isPresent()) {
323-
ValidationException ex = new ValidationException();
324-
ex.addValidationError(error.get());
325-
throw ex;
326-
}
315+
shardLimitValidator.validateShardLimitForIndices(
316+
request.indices(),
317+
currentState,
318+
index -> getTotalNewShards(index, currentState, updatedNumberOfSearchReplicas)
319+
);
320+
327321
routingTableBuilder.updateNumberOfSearchReplicas(updatedNumberOfSearchReplicas, actualIndices);
328322
metadataBuilder.updateNumberOfSearchReplicas(updatedNumberOfSearchReplicas, actualIndices);
329323
logger.info(
@@ -571,9 +565,10 @@ private void validateSearchReplicaCountSettings(Settings requestSettings, Index[
571565
public static void validateIndexTotalPrimaryShardsPerNodeSetting(Settings indexSettings, ClusterService clusterService) {
572566
// Get the setting value
573567
int indexPrimaryShardsPerNode = INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.get(indexSettings);
568+
int indexRemoteCapablePrimaryShardsPerNode = INDEX_TOTAL_REMOTE_CAPABLE_PRIMARY_SHARDS_PER_NODE_SETTING.get(indexSettings);
574569

575570
// If default value (-1), no validation needed
576-
if (indexPrimaryShardsPerNode == -1) {
571+
if (indexPrimaryShardsPerNode == -1 && indexRemoteCapablePrimaryShardsPerNode == -1) {
577572
return;
578573
}
579574

@@ -586,7 +581,11 @@ public static void validateIndexTotalPrimaryShardsPerNodeSetting(Settings indexS
586581
.allMatch(DiscoveryNode::isRemoteStoreNode);
587582
if (!isRemoteStoreEnabled) {
588583
throw new IllegalArgumentException(
589-
"Setting [" + INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.getKey() + "] can only be used with remote store enabled clusters"
584+
"Setting ["
585+
+ INDEX_TOTAL_PRIMARY_SHARDS_PER_NODE_SETTING.getKey()
586+
+ "] or ["
587+
+ INDEX_TOTAL_REMOTE_CAPABLE_PRIMARY_SHARDS_PER_NODE_SETTING.getKey()
588+
+ "] can only be used with remote store enabled clusters"
590589
);
591590
}
592591
}

server/src/main/java/org/opensearch/cluster/node/DiscoveryNodes.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ public class DiscoveryNodes extends AbstractDiffable<DiscoveryNodes> implements
7474

7575
private final Map<String, DiscoveryNode> nodes;
7676
private final Map<String, DiscoveryNode> dataNodes;
77+
private final Map<String, DiscoveryNode> warmNodes;
7778
private final Map<String, DiscoveryNode> clusterManagerNodes;
7879
private final Map<String, DiscoveryNode> ingestNodes;
7980

@@ -87,6 +88,7 @@ public class DiscoveryNodes extends AbstractDiffable<DiscoveryNodes> implements
8788
private DiscoveryNodes(
8889
final Map<String, DiscoveryNode> nodes,
8990
final Map<String, DiscoveryNode> dataNodes,
91+
final Map<String, DiscoveryNode> warmNodes,
9092
final Map<String, DiscoveryNode> clusterManagerNodes,
9193
final Map<String, DiscoveryNode> ingestNodes,
9294
String clusterManagerNodeId,
@@ -98,6 +100,7 @@ private DiscoveryNodes(
98100
) {
99101
this.nodes = Collections.unmodifiableMap(nodes);
100102
this.dataNodes = Collections.unmodifiableMap(dataNodes);
103+
this.warmNodes = Collections.unmodifiableMap(warmNodes);
101104
this.clusterManagerNodes = Collections.unmodifiableMap(clusterManagerNodes);
102105
this.ingestNodes = Collections.unmodifiableMap(ingestNodes);
103106
this.clusterManagerNodeId = clusterManagerNodeId;
@@ -151,6 +154,15 @@ public Map<String, DiscoveryNode> getDataNodes() {
151154
return this.dataNodes;
152155
}
153156

157+
/**
158+
* Get a {@link Map} of the discovered warm nodes arranged by their ids
159+
*
160+
* @return {@link Map} of the discovered warm nodes arranged by their ids
161+
*/
162+
public Map<String, DiscoveryNode> getWarmNodes() {
163+
return this.warmNodes;
164+
}
165+
154166
/**
155167
* Get a {@link Map} of the discovered cluster-manager nodes arranged by their ids
156168
*
@@ -802,6 +814,7 @@ private String validateAdd(DiscoveryNode node) {
802814

803815
public DiscoveryNodes build() {
804816
final Map<String, DiscoveryNode> dataNodesBuilder = new HashMap<>();
817+
final Map<String, DiscoveryNode> warmNodesBuilder = new HashMap<>();
805818
final Map<String, DiscoveryNode> clusterManagerNodesBuilder = new HashMap<>();
806819
final Map<String, DiscoveryNode> ingestNodesBuilder = new HashMap<>();
807820
Version minNodeVersion = null;
@@ -812,6 +825,9 @@ public DiscoveryNodes build() {
812825
if (nodeEntry.getValue().isDataNode()) {
813826
dataNodesBuilder.put(nodeEntry.getKey(), nodeEntry.getValue());
814827
}
828+
if (nodeEntry.getValue().isWarmNode()) {
829+
warmNodesBuilder.put(nodeEntry.getKey(), nodeEntry.getValue());
830+
}
815831
if (nodeEntry.getValue().isClusterManagerNode()) {
816832
clusterManagerNodesBuilder.put(nodeEntry.getKey(), nodeEntry.getValue());
817833
}
@@ -835,6 +851,7 @@ public DiscoveryNodes build() {
835851
return new DiscoveryNodes(
836852
nodes,
837853
dataNodesBuilder,
854+
warmNodesBuilder,
838855
clusterManagerNodesBuilder,
839856
ingestNodesBuilder,
840857
clusterManagerNodeId,

0 commit comments

Comments
 (0)