diff --git a/flink-autoscaler/src/test/java/org/apache/flink/autoscaler/RestApiMetricsCollectorTest.java b/flink-autoscaler/src/test/java/org/apache/flink/autoscaler/RestApiMetricsCollectorTest.java index 9a5fd71898..63950bb54b 100644 --- a/flink-autoscaler/src/test/java/org/apache/flink/autoscaler/RestApiMetricsCollectorTest.java +++ b/flink-autoscaler/src/test/java/org/apache/flink/autoscaler/RestApiMetricsCollectorTest.java @@ -160,6 +160,8 @@ public void testJmMetricCollection() throws Exception { (c, e) -> new StandaloneClientHAServices( miniCluster.getRestAddress().get().toString())); + // wait a little to ensure data is ready + Thread.sleep(500); do { var collector = new RestApiMetricsCollector<>(); Map flinkMetricMetricMap = diff --git a/flink-kubernetes-operator/src/test/java/org/apache/flink/kubernetes/operator/metrics/KubernetesClientMetricsTest.java b/flink-kubernetes-operator/src/test/java/org/apache/flink/kubernetes/operator/metrics/KubernetesClientMetricsTest.java index 88b85d3dd3..8220b4b6d9 100644 --- a/flink-kubernetes-operator/src/test/java/org/apache/flink/kubernetes/operator/metrics/KubernetesClientMetricsTest.java +++ b/flink-kubernetes-operator/src/test/java/org/apache/flink/kubernetes/operator/metrics/KubernetesClientMetricsTest.java @@ -36,7 +36,9 @@ import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.api.Timeout; +import java.util.NoSuchElementException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; @@ -125,7 +127,8 @@ public void testMetricsDisabled() { @Test @Order(2) - public void testMetricsEnabled() { + @Timeout(120) + public void testMetricsEnabled() throws Exception { var configuration = new Configuration(); var listener = new TestingMetricListener(configuration); var kubernetesClient = @@ -135,81 +138,134 @@ public void testMetricsEnabled() { mockServer.createClient().getConfiguration()); var deployment = TestUtils.buildApplicationCluster(); - assertEquals( - 0, listener.getCounter(listener.getMetricId(REQUEST_COUNTER_ID)).get().getCount()); - assertEquals( - 0.0, listener.getMeter(listener.getMetricId(REQUEST_METER_ID)).get().getRate()); - assertEquals( - 0, - listener.getCounter(listener.getMetricId(REQUEST_FAILED_COUNTER_ID)) - .get() - .getCount()); - assertEquals( - 0.0, - listener.getMeter(listener.getMetricId(REQUEST_FAILED_METER_ID)).get().getRate()); - assertEquals( - 0, listener.getCounter(listener.getMetricId(RESPONSE_COUNTER_ID)).get().getCount()); - assertEquals( - 0.0, listener.getMeter(listener.getMetricId(RESPONSE_METER_ID)).get().getRate()); - assertEquals( - 0, - listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) - .get() - .getStatistics() - .getMin()); - assertEquals( - 0, - listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) - .get() - .getStatistics() - .getMax()); - - kubernetesClient.resource(deployment).createOrReplace(); - assertEquals( - 1, listener.getCounter(listener.getMetricId(REQUEST_COUNTER_ID)).get().getCount()); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(REQUEST_POST_COUNTER_ID)) - .get() - .getCount()); - assertEquals( - 1, listener.getCounter(listener.getMetricId(RESPONSE_COUNTER_ID)).get().getCount()); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(RESPONSE_201_COUNTER_ID)) - .get() - .getCount()); - assertTrue( - listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) + do { + try { + assertEquals( + 0, + listener.getCounter(listener.getMetricId(REQUEST_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 0.0, + listener.getMeter(listener.getMetricId(REQUEST_METER_ID)).get().getRate()); + assertEquals( + 0, + listener.getCounter(listener.getMetricId(REQUEST_FAILED_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 0.0, + listener.getMeter(listener.getMetricId(REQUEST_FAILED_METER_ID)) + .get() + .getRate()); + assertEquals( + 0, + listener.getCounter(listener.getMetricId(RESPONSE_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 0.0, + listener.getMeter(listener.getMetricId(RESPONSE_METER_ID)).get().getRate()); + assertEquals( + 0, + listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) .get() .getStatistics() - .getMin() - > 0); - assertTrue( - listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) + .getMin()); + assertEquals( + 0, + listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) .get() .getStatistics() - .getMax() - > 0); + .getMax()); + break; + } catch (NoSuchElementException e) { + // Metrics might not be available yet (timeout above will eventually kill this + // test) + Thread.sleep(100); + } + } while (true); + kubernetesClient.resource(deployment).createOrReplace(); + do { + try { + assertEquals( + 1, + listener.getCounter(listener.getMetricId(REQUEST_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(REQUEST_POST_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(RESPONSE_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(RESPONSE_201_COUNTER_ID)) + .get() + .getCount()); + assertTrue( + listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) + .get() + .getStatistics() + .getMin() + > 0); + assertTrue( + listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) + .get() + .getStatistics() + .getMax() + > 0); + break; + } catch (NoSuchElementException e) { + // Metrics might not be available yet (timeout above will eventually kill this + // test) + Thread.sleep(100); + } + } while (true); kubernetesClient.resource(deployment).delete(); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(REQUEST_DELETE_COUNTER_ID)) - .get() - .getCount()); + do { + try { + assertEquals( + 1, + listener.getCounter(listener.getMetricId(REQUEST_DELETE_COUNTER_ID)) + .get() + .getCount()); + + break; + } catch (NoSuchElementException e) { + // Metrics might not be available yet (timeout above will eventually kill this + // test) + Thread.sleep(100); + } + } while (true); kubernetesClient.resource(deployment).delete(); - assertEquals( - 2, - listener.getCounter(listener.getMetricId(REQUEST_DELETE_COUNTER_ID)) - .get() - .getCount()); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(RESPONSE_404_COUNTER_ID)) - .get() - .getCount()); + do { + try { + + assertEquals( + 2, + listener.getCounter(listener.getMetricId(REQUEST_DELETE_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(RESPONSE_404_COUNTER_ID)) + .get() + .getCount()); + break; + } catch (NoSuchElementException e) { + // Metrics might not be available yet (timeout above will eventually kill this + // test) + Thread.sleep(100); + } + } while (true); Awaitility.await() .atMost(1, TimeUnit.MINUTES) .until( @@ -304,7 +360,8 @@ public void onDelete( @Test @Order(3) - public void testMetricsHttpResponseCodeGroupsEnabled() { + @Timeout(120) + public void testMetricsHttpResponseCodeGroupsEnabled() throws Exception { var configuration = new Configuration(); configuration.set( KubernetesOperatorMetricOptions @@ -318,91 +375,145 @@ public void testMetricsHttpResponseCodeGroupsEnabled() { mockServer.createClient().getConfiguration()); var deployment = TestUtils.buildApplicationCluster(); - assertEquals( - 0, listener.getCounter(listener.getMetricId(REQUEST_COUNTER_ID)).get().getCount()); - assertEquals( - 0.0, listener.getMeter(listener.getMetricId(REQUEST_METER_ID)).get().getRate()); - assertEquals( - 0, - listener.getCounter(listener.getMetricId(REQUEST_FAILED_COUNTER_ID)) - .get() - .getCount()); - assertEquals( - 0.0, - listener.getMeter(listener.getMetricId(REQUEST_FAILED_METER_ID)).get().getRate()); - assertEquals( - 0, listener.getCounter(listener.getMetricId(RESPONSE_COUNTER_ID)).get().getCount()); - assertEquals( - 0.0, listener.getMeter(listener.getMetricId(RESPONSE_METER_ID)).get().getRate()); - assertEquals( - 0, - listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) - .get() - .getStatistics() - .getMin()); - assertEquals( - 0, - listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) - .get() - .getStatistics() - .getMax()); + do { - kubernetesClient.resource(deployment).createOrReplace(); - assertEquals( - 1, listener.getCounter(listener.getMetricId(REQUEST_COUNTER_ID)).get().getCount()); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(REQUEST_POST_COUNTER_ID)) - .get() - .getCount()); - assertEquals( - 1, listener.getCounter(listener.getMetricId(RESPONSE_COUNTER_ID)).get().getCount()); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(RESPONSE_201_COUNTER_ID)) - .get() - .getCount()); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(RESPONSE_2xx_COUNTER_ID)) - .get() - .getCount()); - assertTrue( - listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) + try { + assertEquals( + 0, + listener.getCounter(listener.getMetricId(REQUEST_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 0.0, + listener.getMeter(listener.getMetricId(REQUEST_METER_ID)).get().getRate()); + assertEquals( + 0, + listener.getCounter(listener.getMetricId(REQUEST_FAILED_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 0.0, + listener.getMeter(listener.getMetricId(REQUEST_FAILED_METER_ID)) + .get() + .getRate()); + assertEquals( + 0, + listener.getCounter(listener.getMetricId(RESPONSE_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 0.0, + listener.getMeter(listener.getMetricId(RESPONSE_METER_ID)).get().getRate()); + assertEquals( + 0, + listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) .get() .getStatistics() - .getMin() - > 0); - assertTrue( - listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) + .getMin()); + assertEquals( + 0, + listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) .get() .getStatistics() - .getMax() - > 0); + .getMax()); + + break; + } catch (NoSuchElementException e) { + // Metrics might not be available yet (timeout above will eventually kill this + // test) + Thread.sleep(100); + } + } while (true); + kubernetesClient.resource(deployment).createOrReplace(); + do { + try { + + assertEquals( + 1, + listener.getCounter(listener.getMetricId(REQUEST_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(REQUEST_POST_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(RESPONSE_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(RESPONSE_201_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(RESPONSE_2xx_COUNTER_ID)) + .get() + .getCount()); + assertTrue( + listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) + .get() + .getStatistics() + .getMin() + > 0); + assertTrue( + listener.getHistogram(listener.getMetricId(RESPONSE_LATENCY_ID)) + .get() + .getStatistics() + .getMax() + > 0); + break; + } catch (NoSuchElementException e) { + // Metrics might not be available yet (timeout above will eventually kill this + // test) + Thread.sleep(100); + } + } while (true); kubernetesClient.resource(deployment).delete(); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(REQUEST_DELETE_COUNTER_ID)) - .get() - .getCount()); + do { + try { + assertEquals( + 1, + listener.getCounter(listener.getMetricId(REQUEST_DELETE_COUNTER_ID)) + .get() + .getCount()); + break; + } catch (NoSuchElementException e) { + // Metrics might not be available yet (timeout above will eventually kill this + // test) + Thread.sleep(100); + } + } while (true); kubernetesClient.resource(deployment).delete(); - assertEquals( - 2, - listener.getCounter(listener.getMetricId(REQUEST_DELETE_COUNTER_ID)) - .get() - .getCount()); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(RESPONSE_404_COUNTER_ID)) - .get() - .getCount()); - assertEquals( - 1, - listener.getCounter(listener.getMetricId(RESPONSE_4xx_COUNTER_ID)) - .get() - .getCount()); + do { + try { + assertEquals( + 2, + listener.getCounter(listener.getMetricId(REQUEST_DELETE_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(RESPONSE_404_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 1, + listener.getCounter(listener.getMetricId(RESPONSE_4xx_COUNTER_ID)) + .get() + .getCount()); + break; + } catch (NoSuchElementException e) { + // Metrics might not be available yet (timeout above will eventually kill this + // test) + Thread.sleep(100); + } + } while (true); Awaitility.await() .atMost(1, TimeUnit.MINUTES) .until( @@ -445,7 +556,8 @@ public void testMetricsHttpResponseCodeGroupsEnabled() { @Test @Order(3) - public void testAPIServerIsDown() { + @Timeout(120) + public void testAPIServerIsDown() throws Exception { var configuration = new Configuration(); var listener = new TestingMetricListener(configuration); var kubernetesClient = @@ -456,14 +568,26 @@ public void testAPIServerIsDown() { var deployment = TestUtils.buildApplicationCluster(); mockServer.shutdown(); - assertEquals( - 0, - listener.getCounter(listener.getMetricId(REQUEST_FAILED_COUNTER_ID)) - .get() - .getCount()); - assertEquals( - 0.0, - listener.getMeter(listener.getMetricId(REQUEST_FAILED_METER_ID)).get().getRate()); + do { + try { + assertEquals( + 0, + listener.getCounter(listener.getMetricId(REQUEST_FAILED_COUNTER_ID)) + .get() + .getCount()); + assertEquals( + 0.0, + listener.getMeter(listener.getMetricId(REQUEST_FAILED_METER_ID)) + .get() + .getRate()); + + break; + } catch (NoSuchElementException e) { + // Metrics might not be available yet (timeout above will eventually kill this + // test) + Thread.sleep(100); + } + } while (true); Awaitility.await() .atMost(1, TimeUnit.MINUTES) .until(