-
Notifications
You must be signed in to change notification settings - Fork 25.5k
Make the health node available even if all nodes are marked for shutdown (#92193) #112825
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,7 @@ | |
import java.util.concurrent.atomic.AtomicReference; | ||
|
||
import static org.elasticsearch.health.node.selection.HealthNode.TASK_NAME; | ||
import static org.elasticsearch.persistent.PersistentTasksClusterService.CLUSTER_TASKS_ALLOCATION_ALLOW_LIGHTWEIGHT_ASSIGNMENTS_TO_NODES_SHUTTING_DOWN_SETTING; | ||
|
||
/** | ||
* Persistent task executor that is managing the {@link HealthNode}. | ||
|
@@ -64,6 +65,7 @@ public final class HealthNodeTaskExecutor extends PersistentTasksExecutor<Health | |
private final AtomicReference<HealthNode> currentTask = new AtomicReference<>(); | ||
private final ClusterStateListener taskStarter; | ||
private final ClusterStateListener shutdownListener; | ||
private Boolean allowLightweightAssignmentsToNodesShuttingDown; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why a |
||
private volatile boolean enabled; | ||
|
||
private HealthNodeTaskExecutor( | ||
|
@@ -79,6 +81,18 @@ private HealthNodeTaskExecutor( | |
this.taskStarter = this::startTask; | ||
this.shutdownListener = this::shuttingDown; | ||
this.enabled = ENABLED_SETTING.get(settings); | ||
this.allowLightweightAssignmentsToNodesShuttingDown = | ||
CLUSTER_TASKS_ALLOCATION_ALLOW_LIGHTWEIGHT_ASSIGNMENTS_TO_NODES_SHUTTING_DOWN_SETTING.get(settings); | ||
clusterService.getClusterSettings() | ||
.addSettingsUpdateConsumer( | ||
CLUSTER_TASKS_ALLOCATION_ALLOW_LIGHTWEIGHT_ASSIGNMENTS_TO_NODES_SHUTTING_DOWN_SETTING, | ||
this::setAllowLightweightAssignmentsToNodesShuttingDownFlag | ||
); | ||
} | ||
|
||
// visible for testing only | ||
public void setAllowLightweightAssignmentsToNodesShuttingDownFlag(Boolean allowLightweightAssignmentsToNodesShuttingDown) { | ||
this.allowLightweightAssignmentsToNodesShuttingDown = allowLightweightAssignmentsToNodesShuttingDown; | ||
} | ||
|
||
public static HealthNodeTaskExecutor create( | ||
|
@@ -186,9 +200,15 @@ void startTask(ClusterChangedEvent event) { | |
|
||
// visible for testing | ||
void shuttingDown(ClusterChangedEvent event) { | ||
DiscoveryNode node = clusterService.localNode(); | ||
if (isNodeShuttingDown(event, node.getId())) { | ||
abortTaskIfApplicable("node [{" + node.getName() + "}{" + node.getId() + "}] shutting down"); | ||
DiscoveryNode localNode = clusterService.localNode(); | ||
|
||
if (isNodeShuttingDown(event, localNode.getId())) { | ||
// only abort task if not every node is shutting down to avoid constant rescheduling on a cluster that is shutting down. | ||
// only check this condition if lightweight tasks are allowed to be scheduled to nodes shutting down in the first place. | ||
if (allowLightweightAssignmentsToNodesShuttingDown == false | ||
|| isEveryNodeShuttingDown(clusterService.state(), event) == false) { | ||
abortTaskIfApplicable("node [{" + localNode.getName() + "}{" + localNode.getId() + "}] shutting down"); | ||
} | ||
} | ||
} | ||
|
||
|
@@ -207,6 +227,10 @@ private static boolean isNodeShuttingDown(ClusterChangedEvent event, String node | |
&& event.state().metadata().nodeShutdowns().contains(nodeId); | ||
} | ||
|
||
private static boolean isEveryNodeShuttingDown(ClusterState clusterState, ClusterChangedEvent event) { | ||
return clusterState.nodes().getAllNodes().stream().allMatch(dn -> event.state().metadata().nodeShutdowns().contains(dn.getId())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why using clusterState form clusterService.state() when you already have event.state()? Am I missing something here? |
||
} | ||
|
||
public static List<NamedXContentRegistry.Entry> getNamedXContentParsers() { | ||
return List.of( | ||
new NamedXContentRegistry.Entry(PersistentTaskParams.class, new ParseField(TASK_NAME), HealthNodeTaskParams::fromXContent) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,5 +16,13 @@ | |
* Parameters used to start persistent task | ||
*/ | ||
public interface PersistentTaskParams extends VersionedNamedWriteable, ToXContentObject { | ||
|
||
/** | ||
* Lightweight tasks are allowed to be scheduled to nodes shutting down iff all nodes are shutting down. | ||
* The setting | ||
* {@link PersistentTasksClusterService#CLUSTER_TASKS_ALLOCATION_ALLOW_LIGHTWEIGHT_ASSIGNMENTS_TO_NODES_SHUTTING_DOWN_SETTING} | ||
* needs to be set for this behavior to apply. | ||
*/ | ||
default boolean isLightweight() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am guessing this term was inspired by the original issue. However, this term is not established and it was more used to describe the health node task. Since we do not have any other usage for this method, I suggest we give a more specific name, something along the lines of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++ for |
||
return false; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,6 +57,14 @@ public final class PersistentTasksClusterService implements ClusterStateListener | |
Setting.Property.NodeScope | ||
); | ||
|
||
public static final Setting<Boolean> CLUSTER_TASKS_ALLOCATION_ALLOW_LIGHTWEIGHT_ASSIGNMENTS_TO_NODES_SHUTTING_DOWN_SETTING = Setting | ||
.boolSetting( | ||
"cluster.persistent_tasks.allocation.allow_lightweight_assignments_to_nodes_shutting_down", | ||
false, | ||
Setting.Property.Dynamic, | ||
Setting.Property.NodeScope | ||
); | ||
|
||
Comment on lines
+60
to
+67
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this change is safe enough to not add a switch. Each task that defines itself as lightweight should ensure that it is safe to run always like that or that it needs the flag. What do you think? |
||
private static final Logger logger = LogManager.getLogger(PersistentTasksClusterService.class); | ||
|
||
private final ClusterService clusterService; | ||
|
@@ -65,6 +73,7 @@ public final class PersistentTasksClusterService implements ClusterStateListener | |
private final ThreadPool threadPool; | ||
private final PeriodicRechecker periodicRechecker; | ||
private final AtomicBoolean reassigningTasks = new AtomicBoolean(false); | ||
private Boolean allowLightweightAssignmentsToNodesShuttingDown; | ||
|
||
public PersistentTasksClusterService( | ||
Settings settings, | ||
|
@@ -77,18 +86,30 @@ public PersistentTasksClusterService( | |
this.enableDecider = new EnableAssignmentDecider(settings, clusterService.getClusterSettings()); | ||
this.threadPool = threadPool; | ||
this.periodicRechecker = new PeriodicRechecker(CLUSTER_TASKS_ALLOCATION_RECHECK_INTERVAL_SETTING.get(settings)); | ||
this.allowLightweightAssignmentsToNodesShuttingDown = | ||
CLUSTER_TASKS_ALLOCATION_ALLOW_LIGHTWEIGHT_ASSIGNMENTS_TO_NODES_SHUTTING_DOWN_SETTING.get(settings); | ||
if (DiscoveryNode.isMasterNode(settings)) { | ||
clusterService.addListener(this); | ||
} | ||
clusterService.getClusterSettings() | ||
.addSettingsUpdateConsumer(CLUSTER_TASKS_ALLOCATION_RECHECK_INTERVAL_SETTING, this::setRecheckInterval); | ||
clusterService.getClusterSettings() | ||
.addSettingsUpdateConsumer( | ||
CLUSTER_TASKS_ALLOCATION_ALLOW_LIGHTWEIGHT_ASSIGNMENTS_TO_NODES_SHUTTING_DOWN_SETTING, | ||
this::setAllowLightweightAssignmentsToNodesShuttingDownFlag | ||
); | ||
} | ||
|
||
// visible for testing only | ||
public void setRecheckInterval(TimeValue recheckInterval) { | ||
periodicRechecker.setInterval(recheckInterval); | ||
} | ||
|
||
// visible for testing only | ||
public void setAllowLightweightAssignmentsToNodesShuttingDownFlag(Boolean allowLightweightAssignmentsToNodesShuttingDown) { | ||
this.allowLightweightAssignmentsToNodesShuttingDown = allowLightweightAssignmentsToNodesShuttingDown; | ||
} | ||
|
||
// visible for testing only | ||
PeriodicRechecker getPeriodicRechecker() { | ||
return periodicRechecker; | ||
|
@@ -340,19 +361,30 @@ private <Params extends PersistentTaskParams> Assignment createAssignment( | |
return unassignedAssignment("persistent task [" + taskName + "] cannot be assigned [" + decision.getReason() + "]"); | ||
} | ||
|
||
// Filter all nodes that are marked as shutting down, because we do not | ||
// Filter out all nodes that are marked as shutting down, because we do not | ||
// want to assign a persistent task to a node that will shortly be | ||
// leaving the cluster | ||
final List<DiscoveryNode> candidateNodes = currentState.nodes() | ||
.stream() | ||
// leaving the cluster. The exception to this are lightweight tasks if no other | ||
// nodes are available. An example of a lightweight task is the HealthNodeTask. | ||
// This behavior needs to be enabled with the CLUSTER_TASKS_ALLOCATION_ALLOW_LIGHTWEIGHT_ASSIGNMENTS_TO_NODES_SHUTTING_DOWN_SETTING | ||
// setting. | ||
Comment on lines
+367
to
+369
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the comment is clear enough without the example. Someone can always check which tasks implement the |
||
final List<DiscoveryNode> allNodes = currentState.nodes().stream().toList(); | ||
final List<DiscoveryNode> candidateNodes = allNodes.stream() | ||
.filter(dn -> currentState.metadata().nodeShutdowns().contains(dn.getId()) == false) | ||
.collect(Collectors.toCollection(ArrayList::new)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could use |
||
|
||
if (candidateNodes.isEmpty() && allNodes.isEmpty() == false) { // all nodes are shutting down | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think creating a new collection |
||
if (this.allowLightweightAssignmentsToNodesShuttingDown && taskParams.isLightweight()) { | ||
candidateNodes.addAll(allNodes); | ||
} | ||
} | ||
|
||
// Task assignment should not rely on node order | ||
Randomness.shuffle(candidateNodes); | ||
|
||
final Assignment assignment = persistentTasksExecutor.getAssignment(taskParams, candidateNodes, currentState); | ||
assert assignment != null : "getAssignment() should always return an Assignment object, containing a node or a reason why not"; | ||
assert (assignment.getExecutorNode() == null | ||
assert ((this.allowLightweightAssignmentsToNodesShuttingDown && taskParams.isLightweight()) | ||
|| assignment.getExecutorNode() == null | ||
|| currentState.metadata().nodeShutdowns().contains(assignment.getExecutorNode()) == false) | ||
: "expected task [" | ||
+ taskName | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think this note is needed here; better add this information to the changelog.