Skip to content

Commit 574efab

Browse files
Add support for endpoint and server_name connection error labels (#138533)
Adds a protected RemoteConnectionStrategy method for adding strategy specific attributes to connection error metric records. This supports the monitoring dashboard so we can include the endpoint and server_name information that will correspond with the resolved address and error stack traces in the component logs. Relates: ES-12696
1 parent 68ccba3 commit 574efab

File tree

4 files changed

+80
-16
lines changed

4 files changed

+80
-16
lines changed

server/src/main/java/org/elasticsearch/transport/ProxyConnectionStrategy.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ protected boolean shouldOpenMoreConnections() {
107107

108108
@Override
109109
protected boolean strategyMustBeRebuilt(LinkedProjectConfig config) {
110-
assert config instanceof ProxyLinkedProjectConfig : "expected config to be of type " + ProxyConnectionStrategy.class;
110+
assert config instanceof ProxyLinkedProjectConfig : "expected config to be of type " + ProxyLinkedProjectConfig.class;
111111
final var proxyConfig = (ProxyLinkedProjectConfig) config;
112112
return proxyConfig.maxNumConnections() != maxNumConnections
113113
|| configuredAddress.equals(proxyConfig.proxyAddress()) == false
@@ -119,6 +119,14 @@ protected ConnectionStrategy strategyType() {
119119
return ConnectionStrategy.PROXY;
120120
}
121121

122+
@Override
123+
protected void addStrategySpecificConnectionErrorMetricAttributes(Map<String, Object> attributesMap) {
124+
attributesMap.put("endpoint", configuredAddress);
125+
if (configuredServerName != null) {
126+
attributesMap.put("server_name", configuredServerName);
127+
}
128+
}
129+
122130
@Override
123131
protected void connectImpl(ActionListener<Void> listener) {
124132
performProxyConnectionProcess(listener);

server/src/main/java/org/elasticsearch/transport/RemoteConnectionStrategy.java

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import java.net.UnknownHostException;
3232
import java.util.ArrayList;
3333
import java.util.Collections;
34+
import java.util.HashMap;
3435
import java.util.List;
3536
import java.util.Map;
3637
import java.util.Objects;
@@ -237,19 +238,13 @@ private void connectionAttemptCompleted(@Nullable Exception e) {
237238
} else {
238239
logger.warn(msgSupplier, e);
239240
if (connectionAttemptFailures != null) {
240-
connectionAttemptFailures.incrementBy(
241-
1,
242-
Map.of(
243-
"linked_project_id",
244-
linkedProjectId.toString(),
245-
"linked_project_alias",
246-
clusterAlias,
247-
"attempt",
248-
(isInitialAttempt ? ConnectionAttempt.initial : ConnectionAttempt.reconnect).toString(),
249-
"strategy",
250-
strategyType().toString()
251-
)
252-
);
241+
final var attributesMap = new HashMap<String, Object>();
242+
attributesMap.put("linked_project_id", linkedProjectId.toString());
243+
attributesMap.put("linked_project_alias", clusterAlias);
244+
attributesMap.put("attempt", (isInitialAttempt ? ConnectionAttempt.initial : ConnectionAttempt.reconnect).toString());
245+
attributesMap.put("strategy", strategyType().toString());
246+
addStrategySpecificConnectionErrorMetricAttributes(attributesMap);
247+
connectionAttemptFailures.incrementBy(1, attributesMap);
253248
}
254249
}
255250
}
@@ -264,6 +259,11 @@ boolean shouldRebuildConnection(LinkedProjectConfig config) {
264259

265260
protected abstract ConnectionStrategy strategyType();
266261

262+
/**
263+
* Add strategy-specific attributes for a new connection error metric record. The default implementation is a no-op.
264+
*/
265+
protected void addStrategySpecificConnectionErrorMetricAttributes(Map<String, Object> attributesMap) {}
266+
267267
@Override
268268
public void onNodeDisconnected(DiscoveryNode node, @Nullable Exception closeException) {
269269
if (shouldOpenMoreConnections()) {

server/src/test/java/org/elasticsearch/transport/ProxyConnectionStrategyTests.java

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import org.elasticsearch.core.Tuple;
2626
import org.elasticsearch.index.IndexVersion;
2727
import org.elasticsearch.index.IndexVersions;
28+
import org.elasticsearch.telemetry.InstrumentType;
29+
import org.elasticsearch.telemetry.RecordingMeterRegistry;
2830
import org.elasticsearch.test.ESTestCase;
2931
import org.elasticsearch.test.TransportVersionUtils;
3032
import org.elasticsearch.test.junit.annotations.TestLogging;
@@ -360,6 +362,44 @@ public void testConnectFailsWithNonRetryableException() {
360362
}
361363
}
362364

365+
public void testStrategySpecificConnectionErrorMetricAttributesAreAdded() {
366+
try (
367+
MockTransportService transport1 = startTransport("remote", VersionInformation.CURRENT, TransportVersion.current());
368+
MockTransportService localService = MockTransportService.createNewService(
369+
Settings.EMPTY,
370+
VersionInformation.CURRENT,
371+
TransportVersion.current(),
372+
threadPool
373+
)
374+
) {
375+
final var address1 = transport1.boundAddress().publishAddress();
376+
localService.addSendBehavior(address1, (connection, requestId, action, request, options) -> {
377+
throw new ElasticsearchException("non-retryable");
378+
});
379+
localService.start();
380+
localService.acceptIncomingRequests();
381+
382+
final var cfg = proxyStrategyConfig(clusterAlias, 1, address1.toString(), "address1_server_name");
383+
final var connectFuture = new PlainActionFuture<RemoteClusterService.RemoteClusterConnectionStatus>();
384+
localService.getRemoteClusterService().updateRemoteCluster(cfg, false, connectFuture);
385+
final var exception = expectThrows(ElasticsearchException.class, connectFuture::actionGet);
386+
assertThat(exception.getMessage(), containsString("non-retryable"));
387+
388+
assert localService.getTelemetryProvider() != null;
389+
final var meterRegistry = localService.getTelemetryProvider().getMeterRegistry();
390+
assert meterRegistry instanceof RecordingMeterRegistry;
391+
final var metricRecorder = ((RecordingMeterRegistry) meterRegistry).getRecorder();
392+
metricRecorder.collect();
393+
final var counterName = RemoteClusterService.CONNECTION_ATTEMPT_FAILURES_COUNTER_NAME;
394+
final var measurements = metricRecorder.getMeasurements(InstrumentType.LONG_COUNTER, counterName);
395+
assertFalse(measurements.isEmpty());
396+
final var measurement = measurements.getLast();
397+
assertThat(measurement.getLong(), equalTo(1L));
398+
assertThat(measurement.attributes().get("endpoint"), equalTo(address1.toString()));
399+
assertThat(measurement.attributes().get("server_name"), equalTo("address1_server_name"));
400+
}
401+
}
402+
363403
public void testClusterNameValidationPreventConnectingToDifferentClusters() throws Exception {
364404
Settings otherSettings = Settings.builder().put("cluster.name", "otherCluster").build();
365405

@@ -600,13 +640,16 @@ public void testProxyStrategyWillNeedToBeRebuiltIfNumOfSocketsOrAddressesOrServe
600640
assertFalse(strategy.shouldRebuildConnection(RemoteClusterSettings.toConfig(clusterAlias, noChange)));
601641
Settings addressesChanged = Settings.builder()
602642
.put(modeSetting.getKey(), "proxy")
603-
.put(addressesSetting.getKey(), remoteAddress.toString())
643+
.put(addressesSetting.getKey(), "foobar:8080")
644+
.put(socketConnections.getKey(), numOfConnections)
645+
.put(serverName.getKey(), "server-name")
604646
.build();
605647
assertTrue(strategy.shouldRebuildConnection(RemoteClusterSettings.toConfig(clusterAlias, addressesChanged)));
606648
Settings socketsChanged = Settings.builder()
607649
.put(modeSetting.getKey(), "proxy")
608650
.put(addressesSetting.getKey(), remoteAddress.toString())
609651
.put(socketConnections.getKey(), numOfConnections + 1)
652+
.put(serverName.getKey(), "server-name")
610653
.build();
611654
assertTrue(strategy.shouldRebuildConnection(RemoteClusterSettings.toConfig(clusterAlias, socketsChanged)));
612655
Settings serverNameChange = Settings.builder()

server/src/test/java/org/elasticsearch/transport/RemoteConnectionStrategyTests.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.elasticsearch.threadpool.TestThreadPool;
3131
import org.elasticsearch.threadpool.ThreadPool;
3232

33+
import java.util.Map;
3334
import java.util.Set;
3435

3536
import static org.elasticsearch.test.MockLog.assertThatLogger;
@@ -267,7 +268,13 @@ public void testConnectionAttemptMetricsAndLogging() {
267268
final var measurement = measurements.getLast();
268269
assertThat(measurement.getLong(), equalTo(1L));
269270
final var attributes = measurement.attributes();
270-
final var keySet = Set.of("linked_project_id", "linked_project_alias", "attempt", "strategy");
271+
final var keySet = Set.of(
272+
"linked_project_id",
273+
"linked_project_alias",
274+
"attempt",
275+
"strategy",
276+
"fake_metric_attribute_name"
277+
);
271278
final var expectedAttemptType = isInitialConnectAttempt
272279
? RemoteConnectionStrategy.ConnectionAttempt.initial
273280
: RemoteConnectionStrategy.ConnectionAttempt.reconnect;
@@ -276,6 +283,7 @@ public void testConnectionAttemptMetricsAndLogging() {
276283
assertThat(attributes.get("linked_project_alias"), equalTo(alias));
277284
assertThat(attributes.get("attempt"), equalTo(expectedAttemptType.toString()));
278285
assertThat(attributes.get("strategy"), equalTo(strategy.strategyType().toString()));
286+
assertThat(attributes.get("fake_metric_attribute_name"), equalTo("fake_metric_attribute_value"));
279287
}
280288
}
281289
}
@@ -390,5 +398,10 @@ protected void connectImpl(ActionListener<Void> listener) {
390398
protected RemoteConnectionInfo.ModeInfo getModeInfo() {
391399
return null;
392400
}
401+
402+
@Override
403+
protected void addStrategySpecificConnectionErrorMetricAttributes(Map<String, Object> attributeMap) {
404+
attributeMap.put("fake_metric_attribute_name", "fake_metric_attribute_value");
405+
}
393406
}
394407
}

0 commit comments

Comments
 (0)