From 583f73b965d6b4266d1a0bf8c70d8d95ca8a3718 Mon Sep 17 00:00:00 2001 From: Jeremy Dahlgren Date: Tue, 12 Aug 2025 15:08:12 -0400 Subject: [PATCH] Wait for snapshot finalization in testSnapshotAPMMetrics() Adds an assertBusy() to testSnapshotAPMMetrics() to ensure the snapshot finalization code has had a chance to run and update the snapshot completed metrics. The awaitNumberOfSnapshotsInProgress(0) call returns after it observes the expected SnapshotsInProgress cluster state update, but the snapshot finalization code may not have been executed yet. Resolves: #132731 --- muted-tests.yml | 3 --- .../repositories/SnapshotMetricsIT.java | 19 ++++++++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index d19ff661d1872..4549dbe2fa8a2 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -504,9 +504,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.RevertModelSnapshotIT method: testRevertModelSnapshot issue: https://github.com/elastic/elasticsearch/issues/132733 -- class: org.elasticsearch.repositories.SnapshotMetricsIT - method: testSnapshotAPMMetrics - issue: https://github.com/elastic/elasticsearch/issues/132731 # Examples: # diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 91cf76c6f07e4..08185f62d4596 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -133,8 +134,17 @@ public void testSnapshotAPMMetrics() throws Exception { // wait for snapshot to finish to test the other metrics awaitNumberOfSnapshotsInProgress(0); - final TimeValue snapshotElapsedTime = TimeValue.timeValueNanos(System.nanoTime() - beforeCreateSnapshotNanos); - collectMetrics(); + final AtomicReference elapsedTimeValueRef = new AtomicReference<>(); + // Sanity check snapshot completion metric observations recorded in snapshot finalization. + // Use assertBusy() so the finalization code has time to run after the SnapshotsInProgress cluster state update has completed. + assertBusy(() -> { + collectMetrics(); + elapsedTimeValueRef.set(TimeValue.timeValueNanos(System.nanoTime() - beforeCreateSnapshotNanos)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_COMPLETED), equalTo(1L)); + assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, hasSize(1)); + assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, everyItem(lessThan(elapsedTimeValueRef.get().secondsFrac()))); + }); + final TimeValue snapshotElapsedTime = elapsedTimeValueRef.get(); // sanity check blobs, bytes and throttling metrics assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BLOBS_UPLOADED), greaterThan(0L)); @@ -143,16 +153,11 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION), equalTo(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_STARTED), equalTo(1L)); - assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_COMPLETED), equalTo(1L)); // Sanity check shard duration observations assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, hasSize(numShards)); assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, everyItem(lessThan(snapshotElapsedTime.secondsFrac()))); - // Sanity check snapshot observations - assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, hasSize(1)); - assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, everyItem(lessThan(snapshotElapsedTime.secondsFrac()))); - // Work out the maximum amount of concurrency per node final ThreadPool tp = internalCluster().getDataNodeInstance(ThreadPool.class); final int snapshotThreadPoolSize = tp.info(ThreadPool.Names.SNAPSHOT).getMax();