2525import org .elasticsearch .cluster .service .ClusterApplierRecordingService .Recorder ;
2626import org .elasticsearch .common .Priority ;
2727import org .elasticsearch .common .component .AbstractLifecycleComponent ;
28+ import org .elasticsearch .common .network .ThreadWatchdog ;
2829import org .elasticsearch .common .settings .ClusterSettings ;
2930import org .elasticsearch .common .settings .Setting ;
3031import org .elasticsearch .common .settings .Settings ;
@@ -75,13 +76,27 @@ public class ClusterApplierService extends AbstractLifecycleComponent implements
7576 Setting .Property .NodeScope
7677 );
7778
79+ public static final Setting <TimeValue > CLUSTER_APPLIER_THREAD_WATCHDOG_INTERVAL = Setting .positiveTimeSetting (
80+ "cluster.service.applier.thread.watchdog.interval" ,
81+ TimeValue .timeValueMinutes (5 ),
82+ Setting .Property .NodeScope
83+ );
84+
85+ public static final Setting <TimeValue > CLUSTER_APPLIER_THREAD_WATCHDOG_QUIET_TIME = Setting .positiveTimeSetting (
86+ "cluster.service.applier.thread.watchdog.quiet_time" ,
87+ TimeValue .timeValueHours (1 ),
88+ Setting .Property .NodeScope
89+ );
90+
7891 public static final String CLUSTER_UPDATE_THREAD_NAME = "clusterApplierService#updateTask" ;
7992
8093 private final ClusterSettings clusterSettings ;
8194 private final ThreadPool threadPool ;
8295
8396 private volatile TimeValue slowTaskLoggingThreshold ;
8497 private volatile TimeValue slowTaskThreadDumpTimeout ;
98+ private final TimeValue watchdogInterval ;
99+ private final TimeValue watchdogQuietTime ;
85100
86101 private volatile PrioritizedEsThreadPoolExecutor threadPoolExecutor ;
87102
@@ -103,6 +118,8 @@ public class ClusterApplierService extends AbstractLifecycleComponent implements
103118
104119 private NodeConnectionsService nodeConnectionsService ;
105120
121+ private final ThreadWatchdog threadWatchdog = new ThreadWatchdog ();
122+
106123 public ClusterApplierService (String nodeName , Settings settings , ClusterSettings clusterSettings , ThreadPool threadPool ) {
107124 this .clusterSettings = clusterSettings ;
108125 this .threadPool = threadPool ;
@@ -112,6 +129,9 @@ public ClusterApplierService(String nodeName, Settings settings, ClusterSettings
112129
113130 clusterSettings .initializeAndWatch (CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING , t -> slowTaskLoggingThreshold = t );
114131 clusterSettings .initializeAndWatch (CLUSTER_SERVICE_SLOW_TASK_THREAD_DUMP_TIMEOUT_SETTING , t -> slowTaskThreadDumpTimeout = t );
132+
133+ this .watchdogInterval = clusterSettings .get (CLUSTER_APPLIER_THREAD_WATCHDOG_INTERVAL );
134+ this .watchdogQuietTime = clusterSettings .get (CLUSTER_APPLIER_THREAD_WATCHDOG_QUIET_TIME );
115135 }
116136
117137 public synchronized void setNodeConnectionsService (NodeConnectionsService nodeConnectionsService ) {
@@ -133,6 +153,7 @@ protected synchronized void doStart() {
133153 Objects .requireNonNull (nodeConnectionsService , "please set the node connection service before starting" );
134154 Objects .requireNonNull (state .get (), "please set initial state before starting" );
135155 threadPoolExecutor = createThreadPoolExecutor ();
156+ threadWatchdog .run (watchdogInterval , watchdogQuietTime , threadPool , lifecycle , logger );
136157 }
137158
138159 protected PrioritizedEsThreadPoolExecutor createThreadPoolExecutor () {
@@ -156,7 +177,13 @@ class UpdateTask extends SourcePrioritizedRunnable {
156177
157178 @ Override
158179 public void run () {
159- runTask (source (), updateFunction , listener );
180+ final var activityTracker = threadWatchdog .getActivityTrackerForCurrentThread ();
181+ try {
182+ activityTracker .startActivity ();
183+ runTask (source (), updateFunction , listener );
184+ } finally {
185+ activityTracker .stopActivity ();
186+ }
160187 }
161188 }
162189
@@ -289,17 +316,23 @@ public void addTimeoutListener(@Nullable final TimeValue timeout, final TimeoutC
289316 threadPoolExecutor .execute (new SourcePrioritizedRunnable (Priority .HIGH , "_add_listener_" ) {
290317 @ Override
291318 public void run () {
292- final NotifyTimeout notifyTimeout = new NotifyTimeout (listener , timeout );
293- final NotifyTimeout previous = timeoutClusterStateListeners .put (listener , notifyTimeout );
294- assert previous == null : "Added same listener [" + listener + "]" ;
295- if (lifecycle .stoppedOrClosed ()) {
296- listener .onClose ();
297- return ;
298- }
299- if (timeout != null ) {
300- notifyTimeout .cancellable = threadPool .schedule (notifyTimeout , timeout , threadPool .generic ());
319+ final var activityTracker = threadWatchdog .getActivityTrackerForCurrentThread ();
320+ try {
321+ activityTracker .startActivity ();
322+ final NotifyTimeout notifyTimeout = new NotifyTimeout (listener , timeout );
323+ final NotifyTimeout previous = timeoutClusterStateListeners .put (listener , notifyTimeout );
324+ assert previous == null : "Added same listener [" + listener + "]" ;
325+ if (lifecycle .stoppedOrClosed ()) {
326+ listener .onClose ();
327+ return ;
328+ }
329+ if (timeout != null ) {
330+ notifyTimeout .cancellable = threadPool .schedule (notifyTimeout , timeout , threadPool .generic ());
331+ }
332+ listener .postAdded ();
333+ } finally {
334+ activityTracker .stopActivity ();
301335 }
302- listener .postAdded ();
303336 }
304337 });
305338 } catch (EsRejectedExecutionException e ) {
0 commit comments