From e0715f9735b543901c0963d7239fd9ba8d9de8b7 Mon Sep 17 00:00:00 2001 From: Onur Kayabasi Date: Mon, 10 Nov 2025 07:01:16 +0100 Subject: [PATCH] Failsafe RetryPolicy instrumentation added --- .../failsafe/v3_0/FailsafeTelemetry.java | 47 ++++++++++ .../RetryPolicyEventListenerBuilders.java | 53 +++++++++++ .../failsafe/v3_0/FailsafeTelemetryTest.java | 91 +++++++++++++++++++ 3 files changed, 191 insertions(+) create mode 100644 instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java diff --git a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java index 94c05f1dba5f..fc488bbb06e0 100644 --- a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java +++ b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetry.java @@ -13,11 +13,16 @@ import dev.failsafe.CircuitBreaker; import dev.failsafe.CircuitBreakerConfig; +import dev.failsafe.RetryPolicy; +import dev.failsafe.RetryPolicyConfig; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.common.AttributeKey; import io.opentelemetry.api.common.Attributes; import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.api.metrics.LongHistogram; import io.opentelemetry.api.metrics.Meter; +import java.util.stream.Collectors; +import java.util.stream.LongStream; /** Entrypoint for instrumenting Failsafe components. */ public final class FailsafeTelemetry { @@ -25,6 +30,8 @@ public final class FailsafeTelemetry { private static final AttributeKey CIRCUIT_BREAKER_NAME = AttributeKey.stringKey("failsafe.circuit_breaker.name"); + private static final AttributeKey RETRY_POLICY_NAME = + AttributeKey.stringKey("failsafe.retry_policy.name"); /** Returns a new {@link FailsafeTelemetry} configured with the given {@link OpenTelemetry}. */ public static FailsafeTelemetry create(OpenTelemetry openTelemetry) { @@ -70,4 +77,44 @@ public CircuitBreaker createCircuitBreaker( .onClose(buildInstrumentedCloseListener(userConfig, stateChangesCounter, attributes)) .build(); } + + /** + * Returns an instrumented {@link RetryPolicy} by given values. + * + * @param delegate user configured {@link RetryPolicy} to be instrumented + * @param retryPolicyName identifier of given {@link RetryPolicy} + * @param {@link RetryPolicy}'s result type + * @return instrumented {@link RetryPolicy} + */ + public RetryPolicy createRetryPolicy(RetryPolicy delegate, String retryPolicyName) { + RetryPolicyConfig userConfig = delegate.getConfig(); + Meter meter = openTelemetry.getMeter(INSTRUMENTATION_NAME); + LongCounter executionCounter = + meter + .counterBuilder("failsafe.retry_policy.execution.count") + .setDescription( + "Count of execution events processed by the retry policy. " + + "Each event represents one complete execution flow (initial attempt + any retries). " + + "This metric does not count individual retry attempts - it counts each time the policy is invoked.") + .build(); + LongHistogram attemptsHistogram = + meter + .histogramBuilder("failsafe.retry_policy.attempts") + .setDescription("Histogram of number of attempts for each execution.") + .ofLongs() + .setExplicitBucketBoundariesAdvice( + LongStream.range(1, userConfig.getMaxAttempts() + 1) + .boxed() + .collect(Collectors.toList())) + .build(); + Attributes attributes = Attributes.of(RETRY_POLICY_NAME, retryPolicyName); + return RetryPolicy.builder(userConfig) + .onFailure( + RetryPolicyEventListenerBuilders.buildInstrumentedFailureListener( + userConfig, executionCounter, attemptsHistogram, attributes)) + .onSuccess( + RetryPolicyEventListenerBuilders.buildInstrumentedSuccessListener( + userConfig, executionCounter, attemptsHistogram, attributes)) + .build(); + } } diff --git a/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java new file mode 100644 index 000000000000..dc7cfeb3e455 --- /dev/null +++ b/instrumentation/failsafe-3.0/library/src/main/java/io/opentelemetry/instrumentation/failsafe/v3_0/RetryPolicyEventListenerBuilders.java @@ -0,0 +1,53 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.instrumentation.failsafe.v3_0; + +import static io.opentelemetry.api.common.AttributeKey.stringKey; + +import dev.failsafe.RetryPolicyConfig; +import dev.failsafe.event.EventListener; +import dev.failsafe.event.ExecutionCompletedEvent; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.api.metrics.LongHistogram; + +final class RetryPolicyEventListenerBuilders { + private static final AttributeKey OUTCOME_KEY = + stringKey("failsafe.retry_policy.outcome"); + + private RetryPolicyEventListenerBuilders() {} + + static EventListener> buildInstrumentedFailureListener( + RetryPolicyConfig userConfig, + LongCounter executionCounter, + LongHistogram attemptsHistogram, + Attributes commonAttributes) { + Attributes attributes = commonAttributes.toBuilder().put(OUTCOME_KEY, "failure").build(); + return e -> { + executionCounter.add(1, attributes); + attemptsHistogram.record(e.getAttemptCount(), attributes); + if (userConfig.getFailureListener() != null) { + userConfig.getFailureListener().accept(e); + } + }; + } + + static EventListener> buildInstrumentedSuccessListener( + RetryPolicyConfig userConfig, + LongCounter executionCounter, + LongHistogram attemptsHistogram, + Attributes commonAttributes) { + Attributes attributes = commonAttributes.toBuilder().put(OUTCOME_KEY, "success").build(); + return e -> { + executionCounter.add(1, attributes); + attemptsHistogram.record(e.getAttemptCount(), attributes); + if (userConfig.getFailureListener() != null) { + userConfig.getFailureListener().accept(e); + } + }; + } +} diff --git a/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java b/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java index 78038503e49d..5fe45a041cac 100644 --- a/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java +++ b/instrumentation/failsafe-3.0/library/src/test/java/io/opentelemetry/instrumentation/failsafe/v3_0/FailsafeTelemetryTest.java @@ -11,12 +11,20 @@ import dev.failsafe.CircuitBreaker; import dev.failsafe.CircuitBreakerOpenException; import dev.failsafe.Failsafe; +import dev.failsafe.RetryPolicy; import io.opentelemetry.api.common.Attributes; import io.opentelemetry.instrumentation.testing.junit.InstrumentationExtension; import io.opentelemetry.instrumentation.testing.junit.LibraryInstrumentationExtension; +import io.opentelemetry.sdk.metrics.data.HistogramData; +import io.opentelemetry.sdk.metrics.data.HistogramPointData; +import io.opentelemetry.sdk.metrics.data.LongPointData; +import io.opentelemetry.sdk.metrics.data.SumData; import io.opentelemetry.sdk.testing.assertj.LongPointAssert; import java.time.Duration; +import java.util.Arrays; +import java.util.Collection; import java.util.Objects; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.RegisterExtension; @@ -80,6 +88,82 @@ void captureCircuitBreakerMetrics() { 1, "failsafe.circuit_breaker.state", "closed")))); } + @Test + void captureRetryPolicyMetrics() { + // given + RetryPolicy userRetryPolicy = + dev.failsafe.RetryPolicy.builder() + .handleResultIf(Objects::isNull) + .withMaxAttempts(3) + .build(); + FailsafeTelemetry failsafeTelemetry = FailsafeTelemetry.create(testing.getOpenTelemetry()); + RetryPolicy instrumentedRetryPolicy = + failsafeTelemetry.createRetryPolicy(userRetryPolicy, "testing"); + + // when + for (int i = 0; i <= 3; i++) { + int temp = i; + AtomicInteger retry = new AtomicInteger(0); + Failsafe.with(instrumentedRetryPolicy) + .get( + () -> { + if (retry.get() < temp) { + retry.incrementAndGet(); + return null; + } else { + return new Object(); + } + }); + } + + // then + testing.waitAndAssertMetrics("io.opentelemetry.failsafe-3.0"); + assertThat(testing.metrics().size()).isEqualTo(2); + + SumData executionCountMetric = + testing.metrics().stream() + .filter(m -> m.getName().equals("failsafe.retry_policy.execution.count")) + .findFirst() + .get() + .getLongSumData(); + assertThat(executionCountMetric.getPoints().size()).isEqualTo(2); + assertThat(executionCountMetric.getPoints()) + .anyMatch( + p -> + p.getAttributes().equals(buildExpectedRetryPolicyAttributes("failure")) + && p.getValue() == 1); + assertThat(executionCountMetric.getPoints()) + .anyMatch( + p -> + p.getAttributes().equals(buildExpectedRetryPolicyAttributes("success")) + && p.getValue() == 3); + + HistogramData attemptsMetric = + testing.metrics().stream() + .filter(m -> m.getName().equals("failsafe.retry_policy.attempts")) + .findFirst() + .get() + .getHistogramData(); + Collection pointData = attemptsMetric.getPoints(); + assertThat(pointData).hasSize(2); + assertThat(pointData) + .anyMatch( + p -> + p.getCount() == 3 + && p.getMin() == 1 + && p.getMax() == 3 + && p.getAttributes().equals(buildExpectedRetryPolicyAttributes("success")) + && Arrays.equals(p.getCounts().toArray(), new Long[] {1L, 1L, 1L, 0L})); + assertThat(pointData) + .anyMatch( + p -> + p.getCount() == 1 + && p.getMin() == 3 + && p.getMax() == 3 + && p.getAttributes().equals(buildExpectedRetryPolicyAttributes("failure")) + && Arrays.equals(p.getCounts().toArray(), new Long[] {0L, 0L, 1L, 0L})); + } + private static Consumer buildCircuitBreakerAssertion( long expectedValue, String expectedAttributeKey, String expectedAttributeValue) { return longSumAssert -> @@ -94,4 +178,11 @@ private static Consumer buildCircuitBreakerAssertion( .build(), attributes)); } + + private static Attributes buildExpectedRetryPolicyAttributes(String expectedOutcome) { + return Attributes.builder() + .put("failsafe.retry_policy.name", "testing") + .put("failsafe.retry_policy.outcome", expectedOutcome) + .build(); + } }