Skip to content

Commit b1aa31a

Browse files
Use LongUpDownCounter for Linked Project Error Metrics (elastic#139657)
Replaces LongCounter with LongUpDownCounter for the metric used for linked project connection errors. Since the metric may not be incremented for long periods of time it was not appearing in the mappings of the current index of the APM datastream. This results in 'not found' errors when developing dashboards and alerts. The change required adding an initial zero value so the metric will be collected every polling cycle. Relates: ES-12696
1 parent c8c4b7e commit b1aa31a

File tree

3 files changed

+16
-11
lines changed

3 files changed

+16
-11
lines changed

server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,13 @@ public boolean isRemoteClusterServerEnabled() {
102102
* the functionality to do it the right way is not yet ready -- replace this code when it's ready.
103103
*/
104104
this.crossProjectEnabled = settings.getAsBoolean("serverless.cross_project.enabled", false);
105+
// Since this counter may never be incremented we need to force an initial observed value of zero via add(0) so the metric will be
106+
// in the mappings of the indices of the APM data stream, otherwise we will encounter 'not found' errors in dashboards and alerts.
107+
// See observability-dev issue #3042.
105108
transportService.getTelemetryProvider()
106109
.getMeterRegistry()
107-
.registerLongCounter(CONNECTION_ATTEMPT_FAILURES_COUNTER_NAME, "linked project connection attempt failure count", "count");
110+
.registerLongUpDownCounter(CONNECTION_ATTEMPT_FAILURES_COUNTER_NAME, "linked project connection attempt failure count", "count")
111+
.add(0);
108112
}
109113

110114
public RemoteClusterCredentialsManager getRemoteClusterCredentialsManager() {

server/src/main/java/org/elasticsearch/transport/RemoteConnectionStrategy.java

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
2222
import org.elasticsearch.core.Nullable;
2323
import org.elasticsearch.telemetry.TelemetryProvider;
24-
import org.elasticsearch.telemetry.metric.LongCounter;
24+
import org.elasticsearch.telemetry.metric.LongUpDownCounter;
2525
import org.elasticsearch.threadpool.ThreadPool;
2626

2727
import java.io.Closeable;
@@ -78,10 +78,9 @@ enum ConnectionAttempt {
7878
reconnect
7979
}
8080

81-
private static final String metricLabelPrefix = "es_linked_project";
82-
static final String linkedProjectIdLabel = metricLabelPrefix + "_id";
83-
static final String linkedProjectAliasLabel = metricLabelPrefix + "_alias";
84-
static final String connectionAtemptLabel = metricLabelPrefix + "_attempt";
81+
static final String linkedProjectIdLabel = "es_linked_project_id";
82+
static final String linkedProjectAliasLabel = "es_linked_project_alias";
83+
static final String connectionAtemptLabel = "es_linked_project_attempt";
8584

8685
private final int maxPendingConnectionListeners;
8786

@@ -91,7 +90,7 @@ enum ConnectionAttempt {
9190
private final Object mutex = new Object();
9291
private List<ActionListener<Void>> listeners = new ArrayList<>();
9392
private final AtomicBoolean initialConnectionAttempted = new AtomicBoolean(false);
94-
private final LongCounter connectionAttemptFailures;
93+
private final LongUpDownCounter connectionAttemptFailures;
9594

9695
protected final TransportService transportService;
9796
protected final RemoteConnectionManager connectionManager;
@@ -110,9 +109,11 @@ enum ConnectionAttempt {
110109
connectionManager.addListener(this);
111110
}
112111

113-
private LongCounter lookupConnectionFailureMetric(TelemetryProvider telemetryProvider) {
112+
private LongUpDownCounter lookupConnectionFailureMetric(TelemetryProvider telemetryProvider) {
114113
final var meterRegistry = telemetryProvider == null ? null : telemetryProvider.getMeterRegistry();
115-
return meterRegistry == null ? null : meterRegistry.getLongCounter(RemoteClusterService.CONNECTION_ATTEMPT_FAILURES_COUNTER_NAME);
114+
return meterRegistry == null
115+
? null
116+
: meterRegistry.getLongUpDownCounter(RemoteClusterService.CONNECTION_ATTEMPT_FAILURES_COUNTER_NAME);
116117
}
117118

118119
static ConnectionProfile buildConnectionProfile(LinkedProjectConfig config, String transportProfile) {
@@ -242,7 +243,7 @@ private void connectionAttemptCompleted(@Nullable Exception e) {
242243
} else {
243244
logger.warn(msgSupplier, e);
244245
if (connectionAttemptFailures != null) {
245-
connectionAttemptFailures.incrementBy(
246+
connectionAttemptFailures.add(
246247
1,
247248
Map.of(
248249
linkedProjectIdLabel,

server/src/test/java/org/elasticsearch/transport/RemoteConnectionStrategyTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ public void testConnectionAttemptMetricsAndLogging() {
266266
if (shouldConnectFail) {
267267
metricRecorder.collect();
268268
final var counterName = RemoteClusterService.CONNECTION_ATTEMPT_FAILURES_COUNTER_NAME;
269-
final var measurements = metricRecorder.getMeasurements(InstrumentType.LONG_COUNTER, counterName);
269+
final var measurements = metricRecorder.getMeasurements(InstrumentType.LONG_UP_DOWN_COUNTER, counterName);
270270
assertFalse(measurements.isEmpty());
271271
final var measurement = measurements.getLast();
272272
assertThat(measurement.getLong(), equalTo(1L));

0 commit comments

Comments
 (0)