Skip to content

Commit f5712e4

Browse files
authored
Infrastructure for assuming cluster features in the next major version (#118143)
Allow features to be marked as 'assumed', allowing them to be removed in the next major version.
1 parent 6516a53 commit f5712e4

File tree

14 files changed

+464
-42
lines changed

14 files changed

+464
-42
lines changed

build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,8 @@ org.elasticsearch.cluster.ClusterState#compatibilityVersions()
155155

156156
@defaultMessage ClusterFeatures#nodeFeatures is for internal use only. Use FeatureService#clusterHasFeature to determine if a feature is present on the cluster.
157157
org.elasticsearch.cluster.ClusterFeatures#nodeFeatures()
158-
@defaultMessage ClusterFeatures#allNodeFeatures is for internal use only. Use FeatureService#clusterHasFeature to determine if a feature is present on the cluster.
159-
org.elasticsearch.cluster.ClusterFeatures#allNodeFeatures()
160158
@defaultMessage ClusterFeatures#clusterHasFeature is for internal use only. Use FeatureService#clusterHasFeature to determine if a feature is present on the cluster.
161-
org.elasticsearch.cluster.ClusterFeatures#clusterHasFeature(org.elasticsearch.features.NodeFeature)
159+
org.elasticsearch.cluster.ClusterFeatures#clusterHasFeature(org.elasticsearch.cluster.node.DiscoveryNodes, org.elasticsearch.features.NodeFeature)
162160

163161
@defaultMessage Do not construct this records outside the source files they are declared in
164162
org.elasticsearch.cluster.SnapshotsInProgress$ShardSnapshotStatus#<init>(java.lang.String, org.elasticsearch.cluster.SnapshotsInProgress$ShardState, org.elasticsearch.repositories.ShardGeneration, java.lang.String, org.elasticsearch.repositories.ShardSnapshotResult)

docs/changelog/118143.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 118143
2+
summary: Infrastructure for assuming cluster features in the next major version
3+
area: "Infra/Core"
4+
type: feature
5+
issues: []

server/src/main/java/org/elasticsearch/cluster/ClusterFeatures.java

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99

1010
package org.elasticsearch.cluster;
1111

12+
import org.elasticsearch.cluster.node.DiscoveryNode;
13+
import org.elasticsearch.cluster.node.DiscoveryNodes;
1214
import org.elasticsearch.common.io.stream.StreamInput;
1315
import org.elasticsearch.common.io.stream.StreamOutput;
1416
import org.elasticsearch.common.xcontent.ChunkedToXContent;
1517
import org.elasticsearch.common.xcontent.ChunkedToXContentObject;
16-
import org.elasticsearch.core.SuppressForbidden;
1718
import org.elasticsearch.features.NodeFeature;
1819
import org.elasticsearch.xcontent.ToXContent;
1920

@@ -79,28 +80,61 @@ public Map<String, Set<String>> nodeFeatures() {
7980
return nodeFeatures;
8081
}
8182

82-
/**
83-
* The features in all nodes in the cluster.
84-
* <p>
85-
* NOTE: This should not be used directly.
86-
* Please use {@link org.elasticsearch.features.FeatureService#clusterHasFeature} instead.
87-
*/
88-
public Set<String> allNodeFeatures() {
83+
private Set<String> allNodeFeatures() {
8984
if (allNodeFeatures == null) {
9085
allNodeFeatures = Set.copyOf(calculateAllNodeFeatures(nodeFeatures.values()));
9186
}
9287
return allNodeFeatures;
9388
}
9489

90+
/**
91+
* Returns {@code true} if {@code node} can have assumed features.
92+
* @see org.elasticsearch.env.BuildVersion#canRemoveAssumedFeatures
93+
*/
94+
public static boolean featuresCanBeAssumedForNode(DiscoveryNode node) {
95+
return node.getBuildVersion().canRemoveAssumedFeatures();
96+
}
97+
98+
/**
99+
* Returns {@code true} if one or more nodes in {@code nodes} can have assumed features.
100+
* @see org.elasticsearch.env.BuildVersion#canRemoveAssumedFeatures
101+
*/
102+
public static boolean featuresCanBeAssumedForNodes(DiscoveryNodes nodes) {
103+
return nodes.getAllNodes().stream().anyMatch(n -> n.getBuildVersion().canRemoveAssumedFeatures());
104+
}
105+
95106
/**
96107
* {@code true} if {@code feature} is present on all nodes in the cluster.
97108
* <p>
98109
* NOTE: This should not be used directly.
99110
* Please use {@link org.elasticsearch.features.FeatureService#clusterHasFeature} instead.
100111
*/
101-
@SuppressForbidden(reason = "directly reading cluster features")
102-
public boolean clusterHasFeature(NodeFeature feature) {
103-
return allNodeFeatures().contains(feature.id());
112+
public boolean clusterHasFeature(DiscoveryNodes nodes, NodeFeature feature) {
113+
assert nodes.getNodes().keySet().equals(nodeFeatures.keySet())
114+
: "Cluster features nodes " + nodeFeatures.keySet() + " is different to discovery nodes " + nodes.getNodes().keySet();
115+
116+
// basic case
117+
boolean allNodesHaveFeature = allNodeFeatures().contains(feature.id());
118+
if (allNodesHaveFeature) {
119+
return true;
120+
}
121+
122+
// if the feature is assumed, check the versions more closely
123+
// it's actually ok if the feature is assumed, and all nodes missing the feature can assume it
124+
// TODO: do we need some kind of transient cache of this calculation?
125+
if (feature.assumedAfterNextCompatibilityBoundary()) {
126+
for (var nf : nodeFeatures.entrySet()) {
127+
if (nf.getValue().contains(feature.id()) == false
128+
&& featuresCanBeAssumedForNode(nodes.getNodes().get(nf.getKey())) == false) {
129+
return false;
130+
}
131+
}
132+
133+
// all nodes missing the feature can assume it - so that's alright then
134+
return true;
135+
}
136+
137+
return false;
104138
}
105139

106140
/**

server/src/main/java/org/elasticsearch/cluster/coordination/NodeJoinExecutor.java

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.elasticsearch.common.Priority;
3030
import org.elasticsearch.common.Strings;
3131
import org.elasticsearch.features.FeatureService;
32+
import org.elasticsearch.features.NodeFeature;
3233
import org.elasticsearch.index.IndexVersion;
3334
import org.elasticsearch.index.IndexVersions;
3435
import org.elasticsearch.persistent.PersistentTasksCustomMetadata;
@@ -39,6 +40,7 @@
3940
import java.util.Comparator;
4041
import java.util.HashMap;
4142
import java.util.HashSet;
43+
import java.util.Iterator;
4244
import java.util.List;
4345
import java.util.Map;
4446
import java.util.Objects;
@@ -137,8 +139,8 @@ public ClusterState execute(BatchExecutionContext<JoinTask> batchExecutionContex
137139

138140
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(newState.nodes());
139141
Map<String, CompatibilityVersions> compatibilityVersionsMap = new HashMap<>(newState.compatibilityVersions());
140-
Map<String, Set<String>> nodeFeatures = new HashMap<>(newState.nodeFeatures());
141-
Set<String> allNodesFeatures = ClusterFeatures.calculateAllNodeFeatures(nodeFeatures.values());
142+
Map<String, Set<String>> nodeFeatures = new HashMap<>(newState.nodeFeatures()); // as present in cluster state
143+
Set<String> effectiveClusterFeatures = calculateEffectiveClusterFeatures(newState.nodes(), nodeFeatures);
142144

143145
assert nodesBuilder.isLocalNodeElectedMaster();
144146

@@ -174,14 +176,17 @@ public ClusterState execute(BatchExecutionContext<JoinTask> batchExecutionContex
174176
}
175177
blockForbiddenVersions(compatibilityVersions.transportVersion());
176178
ensureNodesCompatibility(node.getVersion(), minClusterNodeVersion, maxClusterNodeVersion);
177-
enforceNodeFeatureBarrier(node.getId(), allNodesFeatures, features);
179+
Set<String> newNodeEffectiveFeatures = enforceNodeFeatureBarrier(node, effectiveClusterFeatures, features);
178180
// we do this validation quite late to prevent race conditions between nodes joining and importing dangling indices
179181
// we have to reject nodes that don't support all indices we have in this cluster
180182
ensureIndexCompatibility(node.getMinIndexVersion(), node.getMaxIndexVersion(), initialState.getMetadata());
183+
181184
nodesBuilder.add(node);
182185
compatibilityVersionsMap.put(node.getId(), compatibilityVersions);
186+
// store the actual node features here, not including assumed features, as this is persisted in cluster state
183187
nodeFeatures.put(node.getId(), features);
184-
allNodesFeatures.retainAll(features);
188+
effectiveClusterFeatures.retainAll(newNodeEffectiveFeatures);
189+
185190
nodesChanged = true;
186191
minClusterNodeVersion = Version.min(minClusterNodeVersion, node.getVersion());
187192
maxClusterNodeVersion = Version.max(maxClusterNodeVersion, node.getVersion());
@@ -355,6 +360,35 @@ private static void blockForbiddenVersions(TransportVersion joiningTransportVers
355360
}
356361
}
357362

363+
/**
364+
* Calculate the cluster's effective features. This includes all features that are assumed on any nodes in the cluster,
365+
* that are also present across the whole cluster as a result.
366+
*/
367+
private Set<String> calculateEffectiveClusterFeatures(DiscoveryNodes nodes, Map<String, Set<String>> nodeFeatures) {
368+
if (featureService.featuresCanBeAssumedForNodes(nodes)) {
369+
Set<String> assumedFeatures = featureService.getNodeFeatures()
370+
.values()
371+
.stream()
372+
.filter(NodeFeature::assumedAfterNextCompatibilityBoundary)
373+
.map(NodeFeature::id)
374+
.collect(Collectors.toSet());
375+
376+
// add all assumed features to the featureset of all nodes of the next major version
377+
nodeFeatures = new HashMap<>(nodeFeatures);
378+
for (var node : nodes.getNodes().entrySet()) {
379+
if (featureService.featuresCanBeAssumedForNode(node.getValue())) {
380+
assert nodeFeatures.containsKey(node.getKey()) : "Node " + node.getKey() + " does not have any features";
381+
nodeFeatures.computeIfPresent(node.getKey(), (k, v) -> {
382+
var newFeatures = new HashSet<>(v);
383+
return newFeatures.addAll(assumedFeatures) ? newFeatures : v;
384+
});
385+
}
386+
}
387+
}
388+
389+
return ClusterFeatures.calculateAllNodeFeatures(nodeFeatures.values());
390+
}
391+
358392
/**
359393
* Ensures that all indices are compatible with the given index version. This will ensure that all indices in the given metadata
360394
* will not be created with a newer version of elasticsearch as well as that all indices are newer or equal to the minimum index
@@ -461,13 +495,44 @@ public static void ensureVersionBarrier(Version joiningNodeVersion, Version minC
461495
}
462496
}
463497

464-
private void enforceNodeFeatureBarrier(String nodeId, Set<String> existingNodesFeatures, Set<String> newNodeFeatures) {
498+
/**
499+
* Enforces the feature join barrier - a joining node should have all features already present in all existing nodes in the cluster
500+
*
501+
* @return The set of features that this node has (including assumed features)
502+
*/
503+
private Set<String> enforceNodeFeatureBarrier(DiscoveryNode node, Set<String> effectiveClusterFeatures, Set<String> newNodeFeatures) {
465504
// prevent join if it does not have one or more features that all other nodes have
466-
Set<String> missingFeatures = new HashSet<>(existingNodesFeatures);
505+
Set<String> missingFeatures = new HashSet<>(effectiveClusterFeatures);
467506
missingFeatures.removeAll(newNodeFeatures);
468507

469-
if (missingFeatures.isEmpty() == false) {
470-
throw new IllegalStateException("Node " + nodeId + " is missing required features " + missingFeatures);
508+
if (missingFeatures.isEmpty()) {
509+
// nothing missing - all ok
510+
return newNodeFeatures;
511+
}
512+
513+
if (featureService.featuresCanBeAssumedForNode(node)) {
514+
// it might still be ok for this node to join if this node can have assumed features,
515+
// and all the missing features are assumed
516+
// we can get the NodeFeature object direct from this node's registered features
517+
// as all existing nodes in the cluster have the features present in existingNodesFeatures, including this one
518+
newNodeFeatures = new HashSet<>(newNodeFeatures);
519+
for (Iterator<String> it = missingFeatures.iterator(); it.hasNext();) {
520+
String feature = it.next();
521+
NodeFeature nf = featureService.getNodeFeatures().get(feature);
522+
if (nf.assumedAfterNextCompatibilityBoundary()) {
523+
// its ok for this feature to be missing from this node
524+
it.remove();
525+
// and it should be assumed to still be in the cluster
526+
newNodeFeatures.add(feature);
527+
}
528+
// even if we don't remove it, still continue, so the exception message below is accurate
529+
}
530+
}
531+
532+
if (missingFeatures.isEmpty()) {
533+
return newNodeFeatures;
534+
} else {
535+
throw new IllegalStateException("Node " + node.getId() + " is missing required features " + missingFeatures);
471536
}
472537
}
473538

server/src/main/java/org/elasticsearch/env/BuildVersion.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@
3737
*/
3838
public abstract class BuildVersion implements ToXContentFragment, Writeable {
3939

40+
/**
41+
* Checks if this version can operate properly in a cluster without features
42+
* that are assumed in the currently running Elasticsearch.
43+
*/
44+
public abstract boolean canRemoveAssumedFeatures();
45+
4046
/**
4147
* Check whether this version is on or after a minimum threshold.
4248
*

server/src/main/java/org/elasticsearch/env/DefaultBuildVersion.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@ final class DefaultBuildVersion extends BuildVersion {
4747
this(in.readVInt());
4848
}
4949

50+
@Override
51+
public boolean canRemoveAssumedFeatures() {
52+
/*
53+
* We can remove assumed features if the node version is the next major version.
54+
* This is because the next major version can only form a cluster with the
55+
* latest minor version of the previous major, so any features introduced before that point
56+
* (that are marked as assumed in the running code version) are automatically met by that version.
57+
*/
58+
return version.major == Version.CURRENT.major + 1;
59+
}
60+
5061
@Override
5162
public boolean onOrAfterMinimumCompatible() {
5263
return Version.CURRENT.minimumCompatibilityVersion().onOrBefore(version);

server/src/main/java/org/elasticsearch/features/FeatureService.java

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99

1010
package org.elasticsearch.features;
1111

12+
import org.elasticsearch.cluster.ClusterFeatures;
1213
import org.elasticsearch.cluster.ClusterState;
14+
import org.elasticsearch.cluster.node.DiscoveryNode;
15+
import org.elasticsearch.cluster.node.DiscoveryNodes;
1316
import org.elasticsearch.core.SuppressForbidden;
1417
import org.elasticsearch.logging.LogManager;
1518
import org.elasticsearch.logging.Logger;
@@ -38,9 +41,7 @@ public class FeatureService {
3841
* as the local node's supported feature set
3942
*/
4043
public FeatureService(List<? extends FeatureSpecification> specs) {
41-
42-
var featureData = FeatureData.createFromSpecifications(specs);
43-
nodeFeatures = featureData.getNodeFeatures();
44+
this.nodeFeatures = FeatureData.createFromSpecifications(specs).getNodeFeatures();
4445

4546
logger.info("Registered local node features {}", nodeFeatures.keySet().stream().sorted().toList());
4647
}
@@ -53,11 +54,25 @@ public Map<String, NodeFeature> getNodeFeatures() {
5354
return nodeFeatures;
5455
}
5556

57+
/**
58+
* Returns {@code true} if {@code node} can have assumed features.
59+
*/
60+
public boolean featuresCanBeAssumedForNode(DiscoveryNode node) {
61+
return ClusterFeatures.featuresCanBeAssumedForNode(node);
62+
}
63+
64+
/**
65+
* Returns {@code true} if one or more nodes in {@code nodes} can have assumed features.
66+
*/
67+
public boolean featuresCanBeAssumedForNodes(DiscoveryNodes nodes) {
68+
return ClusterFeatures.featuresCanBeAssumedForNodes(nodes);
69+
}
70+
5671
/**
5772
* Returns {@code true} if all nodes in {@code state} support feature {@code feature}.
5873
*/
5974
@SuppressForbidden(reason = "We need basic feature information from cluster state")
6075
public boolean clusterHasFeature(ClusterState state, NodeFeature feature) {
61-
return state.clusterFeatures().clusterHasFeature(feature);
76+
return state.clusterFeatures().clusterHasFeature(state.nodes(), feature);
6277
}
6378
}

server/src/main/java/org/elasticsearch/features/NodeFeature.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,17 @@
1515
* A feature published by a node.
1616
*
1717
* @param id The feature id. Must be unique in the node.
18+
* @param assumedAfterNextCompatibilityBoundary
19+
* {@code true} if this feature is removed at the next compatibility boundary (ie next major version),
20+
* and so should be assumed to be true for all nodes after that boundary.
1821
*/
19-
public record NodeFeature(String id) {
22+
public record NodeFeature(String id, boolean assumedAfterNextCompatibilityBoundary) {
2023

2124
public NodeFeature {
2225
Objects.requireNonNull(id);
2326
}
27+
28+
public NodeFeature(String id) {
29+
this(id, false);
30+
}
2431
}

server/src/main/java/org/elasticsearch/readiness/ReadinessService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,8 @@ protected boolean areFileSettingsApplied(ClusterState clusterState) {
294294
}
295295

296296
@SuppressForbidden(reason = "need to check file settings support on exact cluster state")
297-
private static boolean supportsFileSettings(ClusterState clusterState) {
298-
return clusterState.clusterFeatures().clusterHasFeature(FileSettingsFeatures.FILE_SETTINGS_SUPPORTED);
297+
private boolean supportsFileSettings(ClusterState clusterState) {
298+
return clusterState.clusterFeatures().clusterHasFeature(clusterState.nodes(), FileSettingsFeatures.FILE_SETTINGS_SUPPORTED);
299299
}
300300

301301
private void setReady(boolean ready) {

0 commit comments

Comments
 (0)