@@ -266,7 +266,9 @@ protected void doStart(ClusterState clusterState) {
266266 } else {
267267 if (nodes .getClusterManagerNode () == null ) {
268268 logger .debug ("no known cluster-manager node, scheduling a retry" );
269- retryOnMasterChange (clusterState , null );
269+ // Extract version before creating closure to avoid retaining full ClusterState
270+ final long stateVersion = clusterState .version ();
271+ retryOnMasterChange (stateVersion , null , null , null );
270272 } else {
271273 DiscoveryNode clusterManagerNode = nodes .getClusterManagerNode ();
272274 if (clusterManagerNode .getVersion ().onOrAfter (V_2_13_0 ) && localExecuteSupportedByAction ()) {
@@ -285,11 +287,23 @@ protected void doStart(ClusterState clusterState) {
285287 }
286288 }
287289
288- private void retryOnMasterChange (ClusterState state , Throwable failure ) {
289- retry (state , failure , ClusterManagerNodeChangePredicate .build (state ));
290+ private void retryOnMasterChange (long stateVersion , String ephemeralNodeId , String persistentNodeId , Throwable failure ) {
291+ retry (
292+ stateVersion ,
293+ ephemeralNodeId ,
294+ persistentNodeId ,
295+ failure ,
296+ ClusterManagerNodeChangePredicate .build (stateVersion , ephemeralNodeId )
297+ );
290298 }
291299
292- private void retry (ClusterState state , final Throwable failure , final Predicate <ClusterState > statePredicate ) {
300+ private void retry (
301+ final long stateVersion ,
302+ final String ephemeralNodeId ,
303+ final String persistentNodeId ,
304+ final Throwable failure ,
305+ final Predicate <ClusterState > statePredicate
306+ ) {
293307 if (observer == null ) {
294308 final long remainingTimeoutMS = request .clusterManagerNodeTimeout ().millis () - (threadPool .relativeTimeInMillis ()
295309 - startTime );
@@ -299,7 +313,8 @@ private void retry(ClusterState state, final Throwable failure, final Predicate<
299313 return ;
300314 }
301315 this .observer = new ClusterStateObserver (
302- state ,
316+ persistentNodeId ,
317+ stateVersion ,
303318 clusterService ,
304319 TimeValue .timeValueMillis (remainingTimeoutMS ),
305320 logger ,
@@ -329,6 +344,12 @@ public void onTimeout(TimeValue timeout) {
329344 }
330345
331346 private ActionListener <Response > getDelegateForLocalExecute (ClusterState clusterState ) {
347+ // Extract version and cluster manager node IDs before creating closure to avoid retaining full ClusterState
348+ final long stateVersion = clusterState .version ();
349+ final DiscoveryNode clusterManagerNode = clusterState .nodes ().getClusterManagerNode ();
350+ final String ephemeralNodeId = clusterManagerNode != null ? clusterManagerNode .getEphemeralId () : null ;
351+ final String persistentNodeId = clusterManagerNode != null ? clusterManagerNode .getId () : null ;
352+
332353 return ActionListener .delegateResponse (listener , (delegatedListener , t ) -> {
333354 if (t instanceof FailedToCommitClusterStateException || t instanceof NotClusterManagerException ) {
334355 logger .debug (
@@ -340,7 +361,7 @@ private ActionListener<Response> getDelegateForLocalExecute(ClusterState cluster
340361 t
341362 );
342363
343- retryOnMasterChange (clusterState , t );
364+ retryOnMasterChange (stateVersion , ephemeralNodeId , persistentNodeId , t );
344365 } else {
345366 delegatedListener .onFailure (t );
346367 }
@@ -352,6 +373,12 @@ protected BiConsumer<DiscoveryNode, ClusterState> clusterStateLatestChecker(
352373 BiConsumer <DiscoveryNode , ClusterState > onStaleLocalState
353374 ) {
354375 return (clusterManagerNode , clusterState ) -> {
376+ // Extract version and cluster manager node IDs before creating closure to avoid retaining full ClusterState
377+ final long stateVersion = clusterState .version ();
378+ final DiscoveryNode currentClusterManagerNode = clusterState .nodes ().getClusterManagerNode ();
379+ final String currentEphemeralNodeId = currentClusterManagerNode != null ? currentClusterManagerNode .getEphemeralId () : null ;
380+ final String currentPersistentNodeId = currentClusterManagerNode != null ? currentClusterManagerNode .getId () : null ;
381+
355382 transportService .sendRequest (
356383 clusterManagerNode ,
357384 GetTermVersionAction .NAME ,
@@ -377,7 +404,13 @@ public void handleResponse(GetTermVersionResponse response) {
377404
378405 @ Override
379406 public void handleException (TransportException exp ) {
380- handleTransportException (clusterManagerNode , clusterState , exp );
407+ handleTransportException (
408+ clusterManagerNode ,
409+ stateVersion ,
410+ currentEphemeralNodeId ,
411+ currentPersistentNodeId ,
412+ exp
413+ );
381414 }
382415
383416 @ Override
@@ -455,7 +488,12 @@ private boolean checkForBlock(Request request, ClusterState localClusterState) {
455488 listener .onFailure (blockException );
456489 } else {
457490 logger .debug ("can't execute due to a cluster block, retrying" , blockException );
458- retry (localClusterState , blockException , newState -> {
491+ // Avoid capturing full ClusterState in the lambda
492+ final long blockStateVersion = localClusterState .version ();
493+ final DiscoveryNode blockClusterManagerNode = localClusterState .nodes ().getClusterManagerNode ();
494+ final String blockEphemeralNodeId = blockClusterManagerNode != null ? blockClusterManagerNode .getEphemeralId () : null ;
495+ final String blockPersistentNodeId = blockClusterManagerNode != null ? blockClusterManagerNode .getId () : null ;
496+ retry (blockStateVersion , blockEphemeralNodeId , blockPersistentNodeId , blockException , newState -> {
459497 try {
460498 ClusterBlockException newException = checkBlock (request , newState );
461499 return (newException == null || !newException .retryable ());
@@ -490,20 +528,32 @@ private void executeOnLocalNode(ClusterState localClusterState) {
490528 private void executeOnClusterManager (DiscoveryNode clusterManagerNode , ClusterState clusterState ) {
491529 final String actionName = getClusterManagerActionName (clusterManagerNode );
492530
531+ // Extract version and cluster manager node IDs before creating closure to avoid retaining full ClusterState
532+ final long stateVersion = clusterState .version ();
533+ final DiscoveryNode currentClusterManagerNode = clusterState .nodes ().getClusterManagerNode ();
534+ final String currentEphemeralNodeId = currentClusterManagerNode != null ? currentClusterManagerNode .getEphemeralId () : null ;
535+ final String currentPersistentNodeId = currentClusterManagerNode != null ? currentClusterManagerNode .getId () : null ;
536+
493537 transportService .sendRequest (
494538 clusterManagerNode ,
495539 actionName ,
496540 request ,
497541 new ActionListenerResponseHandler <Response >(listener , TransportClusterManagerNodeAction .this ::read ) {
498542 @ Override
499543 public void handleException (final TransportException exp ) {
500- handleTransportException (clusterManagerNode , clusterState , exp );
544+ handleTransportException (clusterManagerNode , stateVersion , currentEphemeralNodeId , currentPersistentNodeId , exp );
501545 }
502546 }
503547 );
504548 }
505549
506- private void handleTransportException (DiscoveryNode clusterManagerNode , ClusterState clusterState , final TransportException exp ) {
550+ private void handleTransportException (
551+ DiscoveryNode clusterManagerNode ,
552+ long stateVersion ,
553+ String ephemeralNodeId ,
554+ String persistentNodeId ,
555+ final TransportException exp
556+ ) {
507557 Throwable cause = exp .unwrapCause ();
508558 if (cause instanceof ConnectTransportException
509559 || (exp instanceof RemoteTransportException && cause instanceof NodeClosedException )) {
@@ -517,7 +567,7 @@ private void handleTransportException(DiscoveryNode clusterManagerNode, ClusterS
517567 exp .getDetailedMessage ()
518568 );
519569
520- retryOnMasterChange (clusterState , cause );
570+ retryOnMasterChange (stateVersion , ephemeralNodeId , persistentNodeId , cause );
521571 } else {
522572 listener .onFailure (exp );
523573 }
0 commit comments