diff --git a/muted-tests.yml b/muted-tests.yml index d19ff661d1872..4549dbe2fa8a2 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -504,9 +504,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.RevertModelSnapshotIT method: testRevertModelSnapshot issue: https://github.com/elastic/elasticsearch/issues/132733 -- class: org.elasticsearch.repositories.SnapshotMetricsIT - method: testSnapshotAPMMetrics - issue: https://github.com/elastic/elasticsearch/issues/132731 # Examples: # diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 91cf76c6f07e4..08185f62d4596 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -133,8 +134,17 @@ public void testSnapshotAPMMetrics() throws Exception { // wait for snapshot to finish to test the other metrics awaitNumberOfSnapshotsInProgress(0); - final TimeValue snapshotElapsedTime = TimeValue.timeValueNanos(System.nanoTime() - beforeCreateSnapshotNanos); - collectMetrics(); + final AtomicReference elapsedTimeValueRef = new AtomicReference<>(); + // Sanity check snapshot completion metric observations recorded in snapshot finalization. + // Use assertBusy() so the finalization code has time to run after the SnapshotsInProgress cluster state update has completed. + assertBusy(() -> { + collectMetrics(); + elapsedTimeValueRef.set(TimeValue.timeValueNanos(System.nanoTime() - beforeCreateSnapshotNanos)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_COMPLETED), equalTo(1L)); + assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, hasSize(1)); + assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, everyItem(lessThan(elapsedTimeValueRef.get().secondsFrac()))); + }); + final TimeValue snapshotElapsedTime = elapsedTimeValueRef.get(); // sanity check blobs, bytes and throttling metrics assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BLOBS_UPLOADED), greaterThan(0L)); @@ -143,16 +153,11 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION), equalTo(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_STARTED), equalTo(1L)); - assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_COMPLETED), equalTo(1L)); // Sanity check shard duration observations assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, hasSize(numShards)); assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, everyItem(lessThan(snapshotElapsedTime.secondsFrac()))); - // Sanity check snapshot observations - assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, hasSize(1)); - assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, everyItem(lessThan(snapshotElapsedTime.secondsFrac()))); - // Work out the maximum amount of concurrency per node final ThreadPool tp = internalCluster().getDataNodeInstance(ThreadPool.class); final int snapshotThreadPoolSize = tp.info(ThreadPool.Names.SNAPSHOT).getMax();