From 0bd3f7ad5c5adf04682d48fafb40864455589890 Mon Sep 17 00:00:00 2001 From: Joshua Adams Date: Thu, 25 Sep 2025 09:58:34 +0100 Subject: [PATCH] Fixes testSnapshotShutdownProgressTracker Reorders the test to avoid a race condition due to one snapshot thread becoming blocked Relates: 134620 --- .../snapshots/SnapshotShutdownIT.java | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotShutdownIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotShutdownIT.java index 72317a7220ec9..21ab06585924c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotShutdownIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotShutdownIT.java @@ -488,6 +488,7 @@ public void testSnapshotShutdownProgressTracker() throws Exception { final var otherNode = internalCluster().startDataOnlyNode(); final var otherIndex = randomIdentifier(); createIndexWithContent(otherIndex, indexSettings(numShards, 0).put(REQUIRE_NODE_NAME_SETTING, otherNode).build()); + indexAllShardsToAnEqualOrGreaterMinimumSize(otherIndex, ByteSizeValue.of(2, ByteSizeUnit.KB).getBytes()); blockDataNode(repoName, otherNode); final var nodeForRemoval = internalCluster().startDataOnlyNode( @@ -498,6 +499,7 @@ public void testSnapshotShutdownProgressTracker() throws Exception { final var indexName = randomIdentifier(); createIndexWithContent(indexName, indexSettings(numShards, 0).put(REQUIRE_NODE_NAME_SETTING, nodeForRemoval).build()); indexAllShardsToAnEqualOrGreaterMinimumSize(indexName, ByteSizeValue.of(2, ByteSizeUnit.KB).getBytes()); + logger.info("---> nodeForRemovalId: " + nodeForRemovalId + ", numShards: " + numShards); // Start the snapshot with blocking in place on the data node not to allow shard snapshots to finish yet. final var clusterService = internalCluster().getCurrentMasterNodeInstance(ClusterService.class); @@ -507,7 +509,21 @@ public void testSnapshotShutdownProgressTracker() throws Exception { waitForBlock(otherNode, repoName); - logger.info("---> nodeForRemovalId: " + nodeForRemovalId + ", numShards: " + numShards); + // Block on the master when a shard snapshot request comes in, until we can verify that the Tracker saw the outgoing request. + final CountDownLatch snapshotStatusUpdateLatch = new CountDownLatch(1); + final var masterTransportService = MockTransportService.getInstance(internalCluster().getMasterName()); + masterTransportService.addRequestHandlingBehavior( + SnapshotsService.UPDATE_SNAPSHOT_STATUS_ACTION_NAME, + (handler, request, channel, task) -> masterTransportService.getThreadPool().generic().execute(() -> { + safeAwait(snapshotStatusUpdateLatch); + try { + handler.messageReceived(request, channel, task); + } catch (Exception e) { + fail(e); + } + }) + ); + mockLog.addExpectation( new MockLog.SeenEventExpectation( "SnapshotShutdownProgressTracker start log message", @@ -553,21 +569,6 @@ public void testSnapshotShutdownProgressTracker() throws Exception { mockLog.awaitAllExpectationsMatched(); resetMockLog(); - // Block on the master when a shard snapshot request comes in, until we can verify that the Tracker saw the outgoing request. - final CountDownLatch snapshotStatusUpdateLatch = new CountDownLatch(1); - final var masterTransportService = MockTransportService.getInstance(internalCluster().getMasterName()); - masterTransportService.addRequestHandlingBehavior( - SnapshotsService.UPDATE_SNAPSHOT_STATUS_ACTION_NAME, - (handler, request, channel, task) -> masterTransportService.getThreadPool().generic().execute(() -> { - safeAwait(snapshotStatusUpdateLatch); - try { - handler.messageReceived(request, channel, task); - } catch (Exception e) { - fail(e); - } - }) - ); - mockLog.addExpectation( new MockLog.SeenEventExpectation( "SnapshotShutdownProgressTracker shard snapshot has paused log message",