Skip to content

Commit 826fbf9

Browse files
authored
add option for automatic JMX retry (#3511)
1 parent c0a9382 commit 826fbf9

File tree

4 files changed

+211
-71
lines changed

4 files changed

+211
-71
lines changed

CHANGELOG.asciidoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ Use subheadings with the "=====" level for adding notes for unreleased changes:
3535
===== Features
3636
* Added a configuration option to use queues in names of spring-rabbit transactions - {pull}3424[#3424]
3737
38+
[float]
39+
===== Features
40+
* Add option to retry JMX metrics capture in case of exception - {pull}3511[#3511]
41+
3842
[float]
3943
===== Bug fixes
4044
* Add support to CLI attach download for new agent signature for 1.46.0+ - {pull}3513[#3513]

apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxConfiguration.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,16 @@
1818
*/
1919
package co.elastic.apm.agent.jmx;
2020

21+
import co.elastic.apm.agent.tracer.configuration.TimeDuration;
22+
import co.elastic.apm.agent.tracer.configuration.TimeDurationValueConverter;
2123
import org.stagemonitor.configuration.ConfigurationOption;
2224
import org.stagemonitor.configuration.ConfigurationOptionProvider;
2325

2426
import java.util.Collections;
2527
import java.util.List;
2628

29+
import static co.elastic.apm.agent.tracer.configuration.RangeValidator.isNotInRange;
30+
2731
public class JmxConfiguration extends ConfigurationOptionProvider {
2832

2933
private ConfigurationOption<List<JmxMetric>> captureJmxMetrics = ConfigurationOption.<List<JmxMetric>>builder(JmxMetric.TokenValueConverter.INSTANCE, List.class)
@@ -137,4 +141,15 @@ public class JmxConfiguration extends ConfigurationOptionProvider {
137141
ConfigurationOption<List<JmxMetric>> getCaptureJmxMetrics() {
138142
return captureJmxMetrics;
139143
}
144+
145+
private final ConfigurationOption<TimeDuration> faildRetryInterval = TimeDurationValueConverter.durationOption("m")
146+
.key("jmx_failed_retry_interval")
147+
.tags("internal")
148+
.description("If set to a value greater or equal to 1m, the agent will retry failed JMX metric registrations.")
149+
.addValidator(isNotInRange(TimeDuration.of("1ms"), TimeDuration.of("59s")))
150+
.buildWithDefault(TimeDuration.of("0m"));
151+
152+
public ConfigurationOption<TimeDuration> getFaildRetryInterval() {
153+
return faildRetryInterval;
154+
}
140155
}

apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxMetricTracker.java

Lines changed: 88 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@
2323
import co.elastic.apm.agent.metrics.DoubleSupplier;
2424
import co.elastic.apm.agent.metrics.Labels;
2525
import co.elastic.apm.agent.metrics.MetricRegistry;
26-
import co.elastic.apm.agent.tracer.GlobalLocks;
26+
import co.elastic.apm.agent.sdk.internal.util.ExecutorUtils;
27+
import co.elastic.apm.agent.sdk.internal.util.PrivilegedActionUtils;
2728
import co.elastic.apm.agent.sdk.logging.Logger;
2829
import co.elastic.apm.agent.sdk.logging.LoggerFactory;
29-
import co.elastic.apm.agent.sdk.internal.util.PrivilegedActionUtils;
30+
import co.elastic.apm.agent.tracer.GlobalLocks;
31+
import co.elastic.apm.agent.tracer.configuration.TimeDuration;
3032
import org.stagemonitor.configuration.ConfigurationOption;
3133

3234
import javax.annotation.Nullable;
@@ -54,6 +56,7 @@
5456
import java.util.List;
5557
import java.util.Objects;
5658
import java.util.Set;
59+
import java.util.concurrent.ScheduledExecutorService;
5760
import java.util.concurrent.TimeUnit;
5861

5962
public class JmxMetricTracker extends AbstractLifecycleListener {
@@ -68,9 +71,17 @@ public class JmxMetricTracker extends AbstractLifecycleListener {
6871
@Nullable
6972
private volatile NotificationListener listener;
7073

74+
private final List<JmxMetric> failedMetrics;
75+
76+
@Nullable
77+
private ScheduledExecutorService retryExecutor;
78+
7179
public JmxMetricTracker(ElasticApmTracer tracer) {
7280
jmxConfiguration = tracer.getConfig(JmxConfiguration.class);
7381
metricRegistry = tracer.getMetricRegistry();
82+
83+
// using a synchronized list so adding to the list does not require synchronization
84+
failedMetrics = Collections.synchronizedList(new ArrayList<JmxMetric>());
7485
}
7586

7687
@Override
@@ -175,19 +186,52 @@ synchronized void init(final MBeanServer platformMBeanServer) {
175186
jmxConfiguration.getCaptureJmxMetrics().addChangeListener(new ConfigurationOption.ChangeListener<List<JmxMetric>>() {
176187
@Override
177188
public void onChange(ConfigurationOption<?> configurationOption, List<JmxMetric> oldValue, List<JmxMetric> newValue) {
178-
List<JmxMetricRegistration> oldRegistrations = compileJmxMetricRegistrations(oldValue, platformMBeanServer);
179-
List<JmxMetricRegistration> newRegistrations = compileJmxMetricRegistrations(newValue, platformMBeanServer);
189+
List<JmxMetric> registrationErrors = new ArrayList<JmxMetric>(); // those are not needed
190+
List<JmxMetricRegistration> oldRegistrations = compileJmxMetricRegistrations(oldValue, platformMBeanServer, registrationErrors);
191+
192+
List<JmxMetricRegistration> newRegistrations;
193+
synchronized (failedMetrics) {
194+
failedMetrics.clear();
195+
newRegistrations = compileJmxMetricRegistrations(newValue, platformMBeanServer, failedMetrics);
196+
}
197+
180198

181199
for (JmxMetricRegistration addedRegistration : removeAll(oldRegistrations, newRegistrations)) {
182200
addedRegistration.register(platformMBeanServer, metricRegistry);
183201
}
184202
for (JmxMetricRegistration deletedRegistration : removeAll(newRegistrations, oldRegistrations)) {
185203
deletedRegistration.unregister(metricRegistry);
186204
}
187-
188205
}
189206
});
190-
register(jmxConfiguration.getCaptureJmxMetrics().get(), platformMBeanServer);
207+
208+
ConfigurationOption<TimeDuration> failedRetryConfig = jmxConfiguration.getFaildRetryInterval();
209+
if (!failedRetryConfig.isDefault()) {
210+
long retryMillis = failedRetryConfig.getValue().getMillis();
211+
if (retryExecutor != null) {
212+
ExecutorUtils.shutdownAndWaitTermination(retryExecutor);
213+
}
214+
215+
retryExecutor = ExecutorUtils.createSingleThreadSchedulingDaemonPool("jmx-retry");
216+
retryExecutor.scheduleAtFixedRate(new Runnable() {
217+
@Override
218+
public void run() {
219+
retryFailedJmx(platformMBeanServer);
220+
}
221+
}, retryMillis, retryMillis, TimeUnit.MILLISECONDS);
222+
}
223+
224+
register(jmxConfiguration.getCaptureJmxMetrics().get(), platformMBeanServer, failedMetrics);
225+
}
226+
227+
// package-private for testing
228+
void retryFailedJmx(MBeanServer platformMBeanServer) {
229+
List<JmxMetric> failed = JmxMetricTracker.this.failedMetrics;
230+
synchronized (failed) {
231+
List<JmxMetric> toRetry = new ArrayList<>(failed);
232+
failed.clear();
233+
register(toRetry, platformMBeanServer, failed);
234+
}
191235
}
192236

193237
private void registerMBeanNotificationListener(final MBeanServer server) {
@@ -217,7 +261,7 @@ private void addMBean(ObjectName mBeanName, JmxMetric jmxMetric) {
217261
ObjectName metricName = jmxMetric.getObjectName();
218262
if (metricName.apply(mBeanName) || matchesJbossStatisticsPool(mBeanName, metricName, server)) {
219263
logger.debug("MBean added at runtime: {}", jmxMetric.getObjectName());
220-
register(Collections.singletonList(jmxMetric), server);
264+
register(Collections.singletonList(jmxMetric), server, failedMetrics);
221265
}
222266
}
223267

@@ -280,28 +324,36 @@ private static <T> List<T> removeAll(List<T> removeFromThis, List<T> toRemove) {
280324
return result;
281325
}
282326

283-
private void register(List<JmxMetric> jmxMetrics, MBeanServer server) {
284-
for (JmxMetricRegistration registration : compileJmxMetricRegistrations(jmxMetrics, server)) {
327+
private void register(List<JmxMetric> jmxMetrics, MBeanServer server, List<JmxMetric> failedMetrics) {
328+
for (JmxMetricRegistration registration : compileJmxMetricRegistrations(jmxMetrics, server, failedMetrics)) {
285329
registration.register(server, metricRegistry);
286330
}
287331
}
288332

289333
/**
290334
* A single {@link JmxMetric} can yield multiple {@link JmxMetricRegistration}s if the {@link JmxMetric} contains multiple attributes
335+
*
336+
* @param jmxMetrics JMX metrics to register
337+
* @param server MBean server
338+
* @param failedMetrics list of JMX metrics that failed to register (out)
291339
*/
292-
private List<JmxMetricRegistration> compileJmxMetricRegistrations(List<JmxMetric> jmxMetrics, MBeanServer server) {
293-
List<JmxMetricRegistration> registrations = new ArrayList<>();
340+
private List<JmxMetricRegistration> compileJmxMetricRegistrations(List<JmxMetric> jmxMetrics, MBeanServer server, List<JmxMetric> failedMetrics) {
341+
List<JmxMetricRegistration> globalRegistrations = new ArrayList<>();
294342
for (JmxMetric jmxMetric : jmxMetrics) {
343+
List<JmxMetricRegistration> metricRegistrations = new ArrayList<>();
295344
try {
296-
addJmxMetricRegistration(jmxMetric, registrations, server);
345+
addJmxMetricRegistration(jmxMetric, metricRegistrations, server);
346+
globalRegistrations.addAll(metricRegistrations);
297347
} catch (Exception e) {
348+
failedMetrics.add(jmxMetric);
298349
logger.error("Failed to register JMX metric {}", jmxMetric.toString(), e);
299350
}
351+
300352
}
301-
return registrations;
353+
return globalRegistrations;
302354
}
303355

304-
private static void addJmxMetricRegistration(final JmxMetric jmxMetric, List<JmxMetricRegistration> registrations, MBeanServer server) throws JMException {
356+
private void addJmxMetricRegistration(final JmxMetric jmxMetric, List<JmxMetricRegistration> registrations, MBeanServer server) throws JMException {
305357
Set<ObjectInstance> mbeans = server.queryMBeans(jmxMetric.getObjectName(), null);
306358
if (!mbeans.isEmpty()) {
307359
logger.debug("Found mbeans for object name {}", jmxMetric.getObjectName());
@@ -355,20 +407,21 @@ private static String metricPrepend(Labels labels) {
355407
return "";
356408
}
357409

358-
private static void addJmxMetricRegistration(JmxMetric jmxMetric, List<JmxMetricRegistration> registrations, ObjectName objectName, Object value, JmxMetric.Attribute attribute, String attributeName, String metricPrepend) throws AttributeNotFoundException {
410+
private void addJmxMetricRegistration(JmxMetric jmxMetric, List<JmxMetricRegistration> registrations, ObjectName objectName, Object value, JmxMetric.Attribute attribute, String attributeName, @Nullable String metricPrepend) throws AttributeNotFoundException {
411+
String effectiveAttributeName = metricPrepend == null ? attributeName : metricPrepend + attributeName;
412+
boolean unsubscribeOnError = jmxConfiguration.getFaildRetryInterval().isDefault();
359413
if (value instanceof Number) {
360414
logger.debug("Found number attribute {}={}", attribute.getJmxAttributeName(), value);
361415
registrations.add(
362416
new JmxMetricRegistration(
363417
attribute.getMetricName(
364-
metricPrepend == null ?
365-
attributeName :
366-
metricPrepend + attributeName
418+
effectiveAttributeName
367419
),
368420
attribute.getLabels(objectName),
369421
attributeName,
370422
null,
371-
objectName
423+
objectName,
424+
unsubscribeOnError
372425
)
373426
);
374427
} else if (value instanceof CompositeData) {
@@ -380,14 +433,12 @@ private static void addJmxMetricRegistration(JmxMetric jmxMetric, List<JmxMetric
380433
new JmxMetricRegistration(
381434
attribute.getCompositeMetricName(
382435
key,
383-
metricPrepend == null ?
384-
attributeName :
385-
metricPrepend + attributeName
386-
),
436+
effectiveAttributeName),
387437
attribute.getLabels(objectName),
388438
attributeName,
389439
key,
390-
objectName
440+
objectName,
441+
unsubscribeOnError
391442
)
392443
);
393444
} else {
@@ -411,13 +462,15 @@ static class JmxMetricRegistration {
411462
@Nullable
412463
private final String compositeDataKey;
413464
private final ObjectName objectName;
465+
private final boolean unsubscribeOnError;
414466

415-
private JmxMetricRegistration(String metricName, Labels labels, String jmxAttribute, @Nullable String compositeDataKey, ObjectName objectName) {
467+
private JmxMetricRegistration(String metricName, Labels labels, String jmxAttribute, @Nullable String compositeDataKey, ObjectName objectName, boolean unsubscribeOnError) {
416468
this.metricName = metricName;
417469
this.labels = labels.immutableCopy();
418470
this.jmxAttribute = jmxAttribute;
419471
this.compositeDataKey = compositeDataKey;
420472
this.objectName = objectName;
473+
this.unsubscribeOnError = unsubscribeOnError;
421474
}
422475

423476

@@ -427,13 +480,17 @@ void register(final MBeanServer server, final MetricRegistry metricRegistry) {
427480
@Override
428481
public double get() {
429482
try {
483+
double value;
430484
if (compositeDataKey == null) {
431-
return ((Number) server.getAttribute(objectName, jmxAttribute)).doubleValue();
485+
value = ((Number) server.getAttribute(objectName, jmxAttribute)).doubleValue();
432486
} else {
433-
return ((Number) ((CompositeData) server.getAttribute(objectName, jmxAttribute)).get(compositeDataKey)).doubleValue();
487+
value = ((Number) ((CompositeData) server.getAttribute(objectName, jmxAttribute)).get(compositeDataKey)).doubleValue();
434488
}
489+
return value;
435490
} catch (InstanceNotFoundException | AttributeNotFoundException e) {
436-
unregister(metricRegistry);
491+
if (unsubscribeOnError) {
492+
unregister(metricRegistry);
493+
}
437494
return Double.NaN;
438495
} catch (Exception e) {
439496
return Double.NaN;
@@ -473,5 +530,8 @@ public void stop() throws Exception {
473530
if (logManagerPropertyPoller != null) {
474531
logManagerPropertyPoller.interrupt();
475532
}
533+
if (retryExecutor != null) {
534+
ExecutorUtils.shutdownAndWaitTermination(retryExecutor);
535+
}
476536
}
477537
}

0 commit comments

Comments
 (0)