@@ -184,11 +184,14 @@ void onMaster(ClusterState clusterState) {
184184 maybeScheduleJob ();
185185
186186 for (var projectId : clusterState .metadata ().projects ().keySet ()) {
187- onMaster (clusterState .projectState (projectId ));
187+ maybeRunAsyncActions (clusterState .projectState (projectId ));
188188 }
189189 }
190190
191- void onMaster (ProjectState state ) {
191+ /**
192+ * Kicks off any async actions that may not have been run due to either master failover or ILM being manually stopped.
193+ */
194+ private void maybeRunAsyncActions (ProjectState state ) {
192195 final ProjectMetadata projectMetadata = state .metadata ();
193196 final IndexLifecycleMetadata currentMetadata = projectMetadata .custom (IndexLifecycleMetadata .TYPE );
194197 if (currentMetadata != null ) {
@@ -198,67 +201,51 @@ void onMaster(ProjectState state) {
198201 }
199202
200203 boolean safeToStop = true ; // true until proven false by a run policy
201-
202- // If we just became master, we need to kick off any async actions that
203- // may have not been run due to master rollover
204204 for (IndexMetadata idxMeta : projectMetadata .indices ().values ()) {
205- if (projectMetadata .isIndexManagedByILM (idxMeta )) {
206- String policyName = idxMeta .getLifecyclePolicyName ();
207- final LifecycleExecutionState lifecycleState = idxMeta .getLifecycleExecutionState ();
208- StepKey stepKey = Step .getCurrentStepKey (lifecycleState );
209-
210- try {
211- if (OperationMode .STOPPING == currentMode ) {
212- if (stepKey != null && IGNORE_STEPS_MAINTENANCE_REQUESTED .contains (stepKey .name ())) {
213- logger .info (
214- "waiting to stop ILM because index [{}] with policy [{}] is currently in step [{}]" ,
215- idxMeta .getIndex ().getName (),
216- policyName ,
217- stepKey .name ()
218- );
219- lifecycleRunner .maybeRunAsyncAction (state , idxMeta , policyName , stepKey );
220- // ILM is trying to stop, but this index is in a Shrink step (or other dangerous step) so we can't stop
221- safeToStop = false ;
222- } else {
223- logger .info (
224- "skipping policy execution of step [{}] for index [{}] with policy [{}]" + " because ILM is stopping" ,
225- stepKey == null ? "n/a" : stepKey .name (),
226- idxMeta .getIndex ().getName (),
227- policyName
228- );
229- }
230- } else {
231- lifecycleRunner .maybeRunAsyncAction (state , idxMeta , policyName , stepKey );
232- }
233- } catch (Exception e ) {
234- if (logger .isTraceEnabled ()) {
235- logger .warn (
236- () -> format (
237- "async action execution failed during master election trigger"
238- + " for index [%s] with policy [%s] in step [%s], lifecycle state: [%s]" ,
239- idxMeta .getIndex ().getName (),
240- policyName ,
241- stepKey ,
242- lifecycleState .asMap ()
243- ),
244- e
245- );
246- } else {
247- logger .warn (
248- () -> format (
249- "async action execution failed during master election trigger"
250- + " for index [%s] with policy [%s] in step [%s]" ,
251- idxMeta .getIndex ().getName (),
252- policyName ,
253- stepKey
254- ),
255- e
256- );
205+ if (projectMetadata .isIndexManagedByILM (idxMeta ) == false ) {
206+ continue ;
207+ }
208+ String policyName = idxMeta .getLifecyclePolicyName ();
209+ final LifecycleExecutionState lifecycleState = idxMeta .getLifecycleExecutionState ();
210+ StepKey stepKey = Step .getCurrentStepKey (lifecycleState );
257211
258- }
259- // Don't rethrow the exception, we don't want a failure for one index to be
260- // called to cause actions not to be triggered for further indices
212+ try {
213+ if (currentMode == OperationMode .RUNNING ) {
214+ lifecycleRunner .maybeRunAsyncAction (state , idxMeta , policyName , stepKey );
215+ continue ;
216+ }
217+ if (stepKey != null && IGNORE_STEPS_MAINTENANCE_REQUESTED .contains (stepKey .name ())) {
218+ logger .info (
219+ "waiting to stop ILM because index [{}] with policy [{}] is currently in step [{}]" ,
220+ idxMeta .getIndex ().getName (),
221+ policyName ,
222+ stepKey .name ()
223+ );
224+ lifecycleRunner .maybeRunAsyncAction (state , idxMeta , policyName , stepKey );
225+ // ILM is trying to stop, but this index is in a Shrink step (or other dangerous step) so we can't stop
226+ safeToStop = false ;
227+ } else {
228+ logger .info (
229+ "skipping policy execution of step [{}] for index [{}] with policy [{}]" + " because ILM is stopping" ,
230+ stepKey == null ? "n/a" : stepKey .name (),
231+ idxMeta .getIndex ().getName (),
232+ policyName
233+ );
234+ }
235+ } catch (Exception e ) {
236+ String format = format (
237+ "async action execution failed during master election trigger for index [%s] with policy [%s] in step [%s]" ,
238+ idxMeta .getIndex ().getName (),
239+ policyName ,
240+ stepKey
241+ );
242+ if (logger .isTraceEnabled ()) {
243+ format += format (", lifecycle state: [%s]" , lifecycleState .asMap ());
261244 }
245+ logger .warn (format , e );
246+
247+ // Don't rethrow the exception, we don't want a failure for one index to be
248+ // called to cause actions not to be triggered for further indices
262249 }
263250 }
264251
@@ -333,6 +320,20 @@ public void clusterChanged(ClusterChangedEvent event) {
333320 cancelJob ();
334321 policyRegistry .clear ();
335322 }
323+ } else if (this .isMaster ) {
324+ // If we are the master and we were before, check if any projects changed their ILM mode from non-RUNNING to RUNNING.
325+ // If so, kick off any async actions that may not have run while not in RUNNING mode.
326+ for (ProjectMetadata project : event .state ().metadata ().projects ().values ()) {
327+ final var previousProject = event .previousState ().metadata ().projects ().get (project .id ());
328+ if (previousProject == null || project == previousProject ) {
329+ continue ;
330+ }
331+ final OperationMode currentMode = currentILMMode (project );
332+ final OperationMode previousMode = currentILMMode (previousProject );
333+ if (currentMode == OperationMode .RUNNING && previousMode != OperationMode .RUNNING ) {
334+ maybeRunAsyncActions (event .state ().projectState (project .id ()));
335+ }
336+ }
336337 }
337338
338339 // if we're the master, then process deleted indices and trigger policies
0 commit comments