Skip to content

Commit c05c61d

Browse files
authored
[Test] Use generic for GlobalCheckpoingSyncAction (#134180)
Redirect the GlobalCheckpoingSyncAction to the generic threadpool so that we have precise control over the write threadpool for load and latency assertions. Resolves: #134088
1 parent 58b3296 commit c05c61d

File tree

2 files changed

+23
-29
lines changed

2 files changed

+23
-29
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -507,9 +507,6 @@ tests:
507507
- class: org.elasticsearch.xpack.ml.integration.ClassificationIT
508508
method: testWithCustomFeatureProcessors
509509
issue: https://github.com/elastic/elasticsearch/issues/134001
510-
- class: org.elasticsearch.cluster.ClusterInfoServiceIT
511-
method: testMaxQueueLatenciesInClusterInfo
512-
issue: https://github.com/elastic/elasticsearch/issues/134088
513510
- class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT
514511
method: test {csv-spec:fork.ForkBeforeStats}
515512
issue: https://github.com/elastic/elasticsearch/issues/134100

server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,10 @@
2727
import org.elasticsearch.cluster.service.ClusterService;
2828
import org.elasticsearch.common.Strings;
2929
import org.elasticsearch.common.settings.Settings;
30-
import org.elasticsearch.common.util.concurrent.EsExecutors;
3130
import org.elasticsearch.common.util.concurrent.TaskExecutionTimeTrackingEsThreadPoolExecutor;
3231
import org.elasticsearch.core.TimeValue;
3332
import org.elasticsearch.index.IndexService;
34-
import org.elasticsearch.index.shard.GlobalCheckpointListeners;
33+
import org.elasticsearch.index.seqno.GlobalCheckpointSyncAction;
3534
import org.elasticsearch.index.shard.IndexShard;
3635
import org.elasticsearch.index.shard.ShardId;
3736
import org.elasticsearch.index.store.Store;
@@ -45,6 +44,7 @@
4544
import org.elasticsearch.test.InternalTestCluster;
4645
import org.elasticsearch.test.transport.MockTransportService;
4746
import org.elasticsearch.threadpool.ThreadPool;
47+
import org.elasticsearch.transport.RequestHandlerRegistry;
4848
import org.elasticsearch.transport.TransportService;
4949
import org.hamcrest.Matchers;
5050

@@ -57,7 +57,6 @@
5757
import java.util.Map;
5858
import java.util.Set;
5959
import java.util.concurrent.CountDownLatch;
60-
import java.util.concurrent.Executor;
6160
import java.util.concurrent.atomic.AtomicBoolean;
6261

6362
import static java.util.Collections.emptySet;
@@ -431,7 +430,27 @@ public void testMaxQueueLatenciesInClusterInfo() throws Exception {
431430
final int numShards = randomIntBetween(1, 5);
432431
createIndex(indexName, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, numShards).put(SETTING_NUMBER_OF_REPLICAS, 0).build());
433432
ensureGreen(indexName);
434-
final var indexService = internalCluster().getInstance(IndicesService.class, dataNodeName).iterator().next();
433+
434+
// Global checkpoint sync actions are asynchronous. We cannot really tell exactly when they are completely off the
435+
// thread pool. To avoid busy waiting, we redirect them to the generic thread pool so that we have precise control
436+
// over the write thread pool for assertions.
437+
final MockTransportService mockTransportService = MockTransportService.getInstance(dataNodeName);
438+
final var originalRegistry = mockTransportService.transport()
439+
.getRequestHandlers()
440+
.getHandler(GlobalCheckpointSyncAction.ACTION_NAME + "[p]");
441+
mockTransportService.transport()
442+
.getRequestHandlers()
443+
.forceRegister(
444+
new RequestHandlerRegistry<>(
445+
GlobalCheckpointSyncAction.ACTION_NAME + "[p]",
446+
in -> null, // no need to deserialize the request since it's local
447+
mockTransportService.getTaskManager(),
448+
originalRegistry.getHandler(),
449+
mockTransportService.getThreadPool().executor(ThreadPool.Names.GENERIC),
450+
true,
451+
true
452+
)
453+
);
435454

436455
// Block indexing on the data node by submitting write thread pool tasks equal to the number of write threads.
437456
var barrier = blockDataNodeIndexing(dataNodeName);
@@ -496,28 +515,6 @@ public void testMaxQueueLatenciesInClusterInfo() throws Exception {
496515
}
497516
Arrays.stream(threadsToJoin).forEach(thread -> assertFalse(thread.isAlive()));
498517

499-
// Wait for async post replication actions to complete
500-
final var checkpointsSyncLatch = new CountDownLatch(numShards);
501-
for (int i = 0; i < numShards; ++i) {
502-
final var indexShard = indexService.getShard(i);
503-
final long expectedGlobalCheckpoint = indexShard.seqNoStats().getGlobalCheckpoint();
504-
logger.info("--> shard [{}] waiting for global checkpoint {}", i, expectedGlobalCheckpoint);
505-
indexShard.addGlobalCheckpointListener(expectedGlobalCheckpoint, new GlobalCheckpointListeners.GlobalCheckpointListener() {
506-
@Override
507-
public Executor executor() {
508-
return EsExecutors.DIRECT_EXECUTOR_SERVICE;
509-
}
510-
511-
@Override
512-
public void accept(long globalCheckpoint, Exception e) {
513-
assertNull(e); // should have no error
514-
logger.info("--> shard [{}] global checkpoint updated to {}", indexShard.shardId().id(), globalCheckpoint);
515-
checkpointsSyncLatch.countDown();
516-
}
517-
}, TimeValue.THIRTY_SECONDS);
518-
}
519-
safeAwait(checkpointsSyncLatch);
520-
521518
assertThat(
522519
"Unexpectedly found a task queued for the write thread pool. Write thread pool dump: " + trackingWriteExecutor,
523520
trackingWriteExecutor.peekMaxQueueLatencyInQueueMillis(),

0 commit comments

Comments
 (0)