Skip to content

Commit 97a6dc8

Browse files
Send max of two types of max queue latency to ClusterInfo (elastic#132675)
The TransportNodeUsageStatsForThreadPoolsAction now takes the max latency of any task currently queued in the write thread pool queue AND the previously collected max queue latency of any task dequeued since the last call. This covers the possibility that queue times can rise greatly before being reflected in execution: imagine all the write threads are stalled or have long running tasks. This action feeds a max queue latency stat to the ClusterInfo. Follow up from ES-12233. Adds additional IT testing to exercise both forms of queue latency, a followup for ES-12316. ------------- Completing the follow up testing [Henning requested previously](https://github.com/elastic/elasticsearch/pull/131480/files#r2243610807).
1 parent 8c01b67 commit 97a6dc8

File tree

7 files changed

+264
-21
lines changed

7 files changed

+264
-21
lines changed

docs/changelog/132675.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 132675
2+
summary: Add second max queue latency stat to `ClusterInfo`
3+
area: Allocation
4+
type: enhancement
5+
issues: []

server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java

Lines changed: 178 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.elasticsearch.cluster.service.ClusterService;
2828
import org.elasticsearch.common.Strings;
2929
import org.elasticsearch.common.settings.Settings;
30+
import org.elasticsearch.common.util.concurrent.TaskExecutionTimeTrackingEsThreadPoolExecutor;
3031
import org.elasticsearch.core.TimeValue;
3132
import org.elasticsearch.index.IndexService;
3233
import org.elasticsearch.index.shard.IndexShard;
@@ -59,6 +60,8 @@
5960
import static java.util.Collections.emptySet;
6061
import static java.util.Collections.singletonList;
6162
import static java.util.Collections.unmodifiableSet;
63+
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
64+
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
6265
import static org.elasticsearch.common.util.set.Sets.newHashSet;
6366
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
6467
import static org.hamcrest.CoreMatchers.equalTo;
@@ -346,6 +349,8 @@ public void testClusterInfoIncludesNodeUsageStatsForThreadPools() {
346349
WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
347350
WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED
348351
)
352+
// Manually control cluster info refreshes
353+
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING.getKey(), "60m")
349354
.build();
350355
var masterName = internalCluster().startMasterOnlyNode(settings);
351356
var dataNodeName = internalCluster().startDataOnlyNode(settings);
@@ -369,11 +374,8 @@ public void testClusterInfoIncludesNodeUsageStatsForThreadPools() {
369374
}
370375
);
371376

372-
// Do some writes to create some write thread pool activity.
373-
final String indexName = randomIdentifier();
374-
for (int i = 0; i < randomIntBetween(1, 1000); i++) {
375-
index(indexName, Integer.toString(i), Collections.singletonMap("foo", "bar"));
376-
}
377+
// Generate some writes to get some non-zero write thread pool stats.
378+
doALotOfDataNodeWrites();
377379

378380
// Force a refresh of the ClusterInfo state to collect fresh info from the data nodes.
379381
final InternalClusterInfoService masterClusterInfoService = asInstanceOf(
@@ -387,7 +389,7 @@ public void testClusterInfoIncludesNodeUsageStatsForThreadPools() {
387389

388390
final Map<String, NodeUsageStatsForThreadPools> usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools();
389391
logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools);
390-
assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collectedg
392+
assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collected
391393
var dataNodeId = getNodeId(dataNodeName);
392394
var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId);
393395
assertNotNull(nodeUsageStatsForThreadPool);
@@ -400,4 +402,174 @@ public void testClusterInfoIncludesNodeUsageStatsForThreadPools() {
400402
assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThan(0f));
401403
assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L));
402404
}
405+
406+
/**
407+
* The {@link TransportNodeUsageStatsForThreadPoolsAction} returns the max value of two kinds of queue latencies:
408+
* {@link TaskExecutionTimeTrackingEsThreadPoolExecutor#getMaxQueueLatencyMillisSinceLastPollAndReset()} and
409+
* {@link TaskExecutionTimeTrackingEsThreadPoolExecutor#peekMaxQueueLatencyInQueueMillis()}. The latter looks at currently queued tasks,
410+
* and the former tracks the queue latency of tasks when they are taken off of the queue to start execution.
411+
*/
412+
public void testMaxQueueLatenciesInClusterInfo() throws Exception {
413+
var settings = Settings.builder()
414+
.put(
415+
WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
416+
WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED
417+
)
418+
// Manually control cluster info refreshes
419+
.put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING.getKey(), "60m")
420+
.build();
421+
var masterName = internalCluster().startMasterOnlyNode(settings);
422+
var dataNodeName = internalCluster().startDataOnlyNode(settings);
423+
ensureStableCluster(2);
424+
assertEquals(internalCluster().getMasterName(), masterName);
425+
assertNotEquals(internalCluster().getMasterName(), dataNodeName);
426+
logger.info("---> master node: " + masterName + ", data node: " + dataNodeName);
427+
428+
// Block indexing on the data node by submitting write thread pool tasks equal to the number of write threads.
429+
var barrier = blockDataNodeIndexing(dataNodeName);
430+
try {
431+
// Arbitrary number of tasks, which will queue because all the write threads are occupied already, greater than one: only
432+
// strictly need a single task to occupy the queue.
433+
int numberOfTasks = randomIntBetween(1, 5);
434+
Thread[] threadsToJoin = new Thread[numberOfTasks];
435+
String indexName = randomIdentifier();
436+
createIndex(
437+
indexName,
438+
// NB: Set 0 replicas so that there aren't any stray GlobalCheckpointSyncAction tasks on the write thread pool.
439+
Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5)).put(SETTING_NUMBER_OF_REPLICAS, 0).build()
440+
);
441+
for (int i = 0; i < numberOfTasks; ++i) {
442+
threadsToJoin[i] = startParallelSingleWrite(indexName);
443+
}
444+
445+
// Reach into the data node's write thread pool to check that tasks have reached the queue.
446+
var dataNodeThreadPool = internalCluster().getInstance(ThreadPool.class, dataNodeName);
447+
var writeExecutor = dataNodeThreadPool.executor(ThreadPool.Names.WRITE);
448+
assert writeExecutor instanceof TaskExecutionTimeTrackingEsThreadPoolExecutor;
449+
var trackingWriteExecutor = (TaskExecutionTimeTrackingEsThreadPoolExecutor) writeExecutor;
450+
assertBusy(
451+
// Wait for the parallel threads' writes to get queued in the write thread pool.
452+
() -> assertThat(
453+
"Write thread pool dump: " + trackingWriteExecutor,
454+
trackingWriteExecutor.peekMaxQueueLatencyInQueueMillis(),
455+
greaterThan(0L)
456+
)
457+
);
458+
459+
// Force a refresh of the ClusterInfo state to collect fresh info from the data node.
460+
final InternalClusterInfoService masterClusterInfoService = asInstanceOf(
461+
InternalClusterInfoService.class,
462+
internalCluster().getCurrentMasterNodeInstance(ClusterInfoService.class)
463+
);
464+
final ClusterInfo clusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService);
465+
466+
// Since tasks are actively queued right now, #peekMaxQueueLatencyInQueue, which is called from the
467+
// TransportNodeUsageStatsForThreadPoolsAction that a ClusterInfoService refresh initiates, should return a max queue
468+
// latency > 0;
469+
{
470+
final Map<String, NodeUsageStatsForThreadPools> usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools();
471+
logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools);
472+
assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data node should be collected
473+
var dataNodeId = getNodeId(dataNodeName);
474+
var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId);
475+
assertNotNull(nodeUsageStatsForThreadPool);
476+
logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool);
477+
478+
assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId());
479+
var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE);
480+
assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats);
481+
assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0));
482+
assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThanOrEqualTo(0f));
483+
assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThan(0L));
484+
}
485+
486+
// Now release the data node's indexing, and drain the queued tasks. Max queue latency of executed (not queued) tasks is reset
487+
// by each TransportNodeUsageStatsForThreadPoolsAction call (#getMaxQueueLatencyMillisSinceLastPollAndReset), so the new queue
488+
// latencies will be present in the next call. There will be nothing in the queue to peek at now, so the result of the max
489+
// queue latency result in TransportNodeUsageStatsForThreadPoolsAction will reflect
490+
// #getMaxQueueLatencyMillisSinceLastPollAndReset and not #peekMaxQueueLatencyInQueue.
491+
barrier.await();
492+
for (int i = 0; i < numberOfTasks; ++i) {
493+
threadsToJoin[i].join();
494+
}
495+
assertThat(
496+
"Unexpectedly found a task queued for the write thread pool. Write thread pool dump: " + trackingWriteExecutor,
497+
trackingWriteExecutor.peekMaxQueueLatencyInQueueMillis(),
498+
equalTo(0L)
499+
);
500+
501+
final ClusterInfo nextClusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService);
502+
{
503+
final Map<String, NodeUsageStatsForThreadPools> usageStatsForThreadPools = nextClusterInfo
504+
.getNodeUsageStatsForThreadPools();
505+
logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools);
506+
assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collected
507+
var dataNodeId = getNodeId(dataNodeName);
508+
var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId);
509+
assertNotNull(nodeUsageStatsForThreadPool);
510+
logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool);
511+
512+
assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId());
513+
var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE);
514+
assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats);
515+
assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0));
516+
assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThan(0f));
517+
assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L));
518+
}
519+
} finally {
520+
// Ensure that the write threads have been released by signalling an interrupt on any callers waiting on the barrier. If the
521+
// callers have already all been successfully released, then there will be nothing left to interrupt.
522+
logger.info("---> Ensuring release of the barrier on write thread pool tasks");
523+
barrier.reset();
524+
}
525+
526+
// Now that there's nothing in the queue, and no activity since the last ClusterInfo refresh, the max latency returned should be
527+
// zero. Verify this.
528+
final InternalClusterInfoService masterClusterInfoService = asInstanceOf(
529+
InternalClusterInfoService.class,
530+
internalCluster().getCurrentMasterNodeInstance(ClusterInfoService.class)
531+
);
532+
final ClusterInfo clusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService);
533+
{
534+
final Map<String, NodeUsageStatsForThreadPools> usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools();
535+
logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools);
536+
assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collected
537+
var dataNodeId = getNodeId(dataNodeName);
538+
var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId);
539+
assertNotNull(nodeUsageStatsForThreadPool);
540+
logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool);
541+
542+
assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId());
543+
var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE);
544+
assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats);
545+
assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0));
546+
assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), equalTo(0f));
547+
assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), equalTo(0L));
548+
}
549+
}
550+
551+
/**
552+
* Do some writes to create some write thread pool activity.
553+
*/
554+
private void doALotOfDataNodeWrites() {
555+
final String indexName = randomIdentifier();
556+
final int randomInt = randomIntBetween(500, 1000);
557+
for (int i = 0; i < randomInt; i++) {
558+
index(indexName, Integer.toString(i), Collections.singletonMap("foo", "bar"));
559+
}
560+
}
561+
562+
/**
563+
* Starts a single index request on a parallel thread and returns the reference so {@link Thread#join()} can be called eventually.
564+
*/
565+
private Thread startParallelSingleWrite(String indexName) {
566+
Thread running = new Thread(() -> doSingleWrite(indexName));
567+
running.start();
568+
return running;
569+
}
570+
571+
private void doSingleWrite(String indexName) {
572+
final int randomId = randomIntBetween(500, 1000);
573+
index(indexName, Integer.toString(randomId), Collections.singletonMap("foo", "bar"));
574+
}
403575
}

server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodeUsageStatsForThreadPoolsAction.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,10 @@ protected NodeUsageStatsForThreadPoolsAction.NodeResponse nodeOperation(
104104
(float) trackingForWriteExecutor.pollUtilization(
105105
TaskExecutionTimeTrackingEsThreadPoolExecutor.UtilizationTrackingPurpose.ALLOCATION
106106
),
107-
trackingForWriteExecutor.getMaxQueueLatencyMillisSinceLastPollAndReset()
107+
Math.max(
108+
trackingForWriteExecutor.getMaxQueueLatencyMillisSinceLastPollAndReset(),
109+
trackingForWriteExecutor.peekMaxQueueLatencyInQueueMillis()
110+
)
108111
);
109112

110113
Map<String, ThreadPoolUsageStats> perThreadPool = new HashMap<>();

server/src/main/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutor.java

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ public int getCurrentQueueSize() {
151151
* Returns the max queue latency seen since the last time that this method was called. Every call will reset the max seen back to zero.
152152
* Latencies are only observed as tasks are taken off of the queue. This means that tasks in the queue will not contribute to the max
153153
* latency until they are unqueued and handed to a thread to execute. To see the latency of tasks still in the queue, use
154-
* {@link #peekMaxQueueLatencyInQueue}. If there have been no tasks in the queue since the last call, then zero latency is returned.
154+
* {@link #peekMaxQueueLatencyInQueueMillis}. If there have been no tasks in the queue since the last call, then zero latency is
155+
* returned.
155156
*/
156157
public long getMaxQueueLatencyMillisSinceLastPollAndReset() {
157158
if (trackMaxQueueLatency == false) {
@@ -164,23 +165,29 @@ public long getMaxQueueLatencyMillisSinceLastPollAndReset() {
164165
* Returns the queue latency of the next task to be executed that is still in the task queue. Essentially peeks at the front of the
165166
* queue and calculates how long it has been there. Returns zero if there is no queue.
166167
*/
167-
public long peekMaxQueueLatencyInQueue() {
168+
public long peekMaxQueueLatencyInQueueMillis() {
168169
if (trackMaxQueueLatency == false) {
169170
return 0;
170171
}
172+
171173
var queue = getQueue();
172-
if (queue.isEmpty()) {
174+
assert queue instanceof LinkedTransferQueue || queue instanceof SizeBlockingQueue
175+
: "Not the type of queue expected: " + queue.getClass();
176+
var linkedTransferOrSizeBlockingQueue = queue instanceof LinkedTransferQueue
177+
? (LinkedTransferQueue) queue
178+
: (SizeBlockingQueue) queue;
179+
180+
var task = linkedTransferOrSizeBlockingQueue.peek();
181+
if (task == null) {
182+
// There's nothing in the queue right now.
173183
return 0;
174184
}
175-
assert queue instanceof LinkedTransferQueue : "Not the type of queue expected: " + queue.getClass();
176-
var linkedTransferQueue = (LinkedTransferQueue) queue;
177185

178-
var task = linkedTransferQueue.peek();
179186
assert task instanceof WrappedRunnable : "Not the type of task expected: " + task.getClass();
180187
var wrappedTask = ((WrappedRunnable) task).unwrap();
181188
assert wrappedTask instanceof TimedRunnable : "Not the type of task expected: " + task.getClass();
182189
var timedTask = (TimedRunnable) wrappedTask;
183-
return timedTask.getTimeSinceCreationNanos();
190+
return TimeUnit.NANOSECONDS.toMillis(timedTask.getTimeSinceCreationNanos());
184191
}
185192

186193
/**

server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,23 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
10241024
return builder;
10251025
}
10261026

1027+
@Override
1028+
public String toString() {
1029+
return "Info[name="
1030+
+ name
1031+
+ ",type="
1032+
+ type
1033+
+ ",min="
1034+
+ min
1035+
+ ",max="
1036+
+ max
1037+
+ ",keepAlive="
1038+
+ keepAlive
1039+
+ ",queueSize="
1040+
+ queueSize
1041+
+ "]";
1042+
}
1043+
10271044
}
10281045

10291046
/**

0 commit comments

Comments
 (0)