Skip to content

Commit 0c372fb

Browse files
committed
Rework crash-tracking to allow auto-injection of JVM arguments for error log capture
1 parent 233e0ce commit 0c372fb

File tree

32 files changed

+950
-380
lines changed

32 files changed

+950
-380
lines changed

dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java

Lines changed: 103 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static datadog.environment.JavaVirtualMachine.isJavaVersionAtLeast;
44
import static datadog.environment.JavaVirtualMachine.isOracleJDK8;
55
import static datadog.trace.api.ConfigDefaults.DEFAULT_STARTUP_LOGS_ENABLED;
6+
import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;
67
import static datadog.trace.bootstrap.Library.WILDFLY;
78
import static datadog.trace.bootstrap.Library.detectLibraries;
89
import static datadog.trace.util.AgentThreadFactory.AgentThread.JMX_STARTUP;
@@ -21,6 +22,7 @@
2122
import datadog.trace.api.appsec.AppSecEventTracker;
2223
import datadog.trace.api.config.AppSecConfig;
2324
import datadog.trace.api.config.CiVisibilityConfig;
25+
import datadog.trace.api.config.CrashTrackingConfig;
2426
import datadog.trace.api.config.CwsConfig;
2527
import datadog.trace.api.config.DebuggerConfig;
2628
import datadog.trace.api.config.GeneralConfig;
@@ -38,6 +40,7 @@
3840
import datadog.trace.api.profiling.ProfilingEnablement;
3941
import datadog.trace.api.scopemanager.ScopeListener;
4042
import datadog.trace.bootstrap.benchmark.StaticEventLogger;
43+
import datadog.trace.bootstrap.config.provider.ConfigProvider;
4144
import datadog.trace.bootstrap.config.provider.StableConfigSource;
4245
import datadog.trace.bootstrap.instrumentation.api.AgentTracer;
4346
import datadog.trace.bootstrap.instrumentation.api.AgentTracer.TracerAPI;
@@ -54,6 +57,7 @@
5457
import java.net.URISyntaxException;
5558
import java.net.URL;
5659
import java.security.CodeSource;
60+
import java.util.Arrays;
5761
import java.util.EnumSet;
5862
import java.util.concurrent.TimeUnit;
5963
import java.util.concurrent.atomic.AtomicBoolean;
@@ -95,6 +99,9 @@ private enum AgentFeature {
9599
TRACING(TraceInstrumentationConfig.TRACE_ENABLED, true),
96100
JMXFETCH(JmxFetchConfig.JMX_FETCH_ENABLED, true),
97101
STARTUP_LOGS(GeneralConfig.STARTUP_LOGS_ENABLED, DEFAULT_STARTUP_LOGS_ENABLED),
102+
CRASH_TRACKING(
103+
CrashTrackingConfig.CRASH_TRACKING_ENABLED,
104+
CrashTrackingConfig.CRASH_TRACKING_ENABLED_DEFAULT),
98105
PROFILING(ProfilingConfig.PROFILING_ENABLED, false),
99106
APPSEC(AppSecConfig.APPSEC_ENABLED, false),
100107
IAST(IastConfig.IAST_ENABLED, false),
@@ -146,9 +153,11 @@ public boolean isEnabledByDefault() {
146153
private static ClassLoader AGENT_CLASSLOADER = null;
147154

148155
private static volatile Runnable PROFILER_INIT_AFTER_JMX = null;
156+
private static volatile Runnable CRASHTRACKER_INIT_AFTER_JMX = null;
149157

150158
private static boolean jmxFetchEnabled = true;
151159
private static boolean profilingEnabled = false;
160+
private static boolean crashTrackingEnabled = false;
152161
private static boolean appSecEnabled;
153162
private static boolean appSecFullyDisabled;
154163
private static boolean remoteConfigEnabled = true;
@@ -276,6 +285,7 @@ public static void start(
276285

277286
jmxFetchEnabled = isFeatureEnabled(AgentFeature.JMXFETCH);
278287
profilingEnabled = isFeatureEnabled(AgentFeature.PROFILING);
288+
crashTrackingEnabled = isFeatureEnabled(AgentFeature.CRASH_TRACKING);
279289
usmEnabled = isFeatureEnabled(AgentFeature.USM);
280290
appSecEnabled = isFeatureEnabled(AgentFeature.APPSEC);
281291
appSecFullyDisabled = isFullyDisabled(AgentFeature.APPSEC);
@@ -303,13 +313,7 @@ public static void start(
303313
// Profiling can not run early on Oracle JDK 8 because it will cause JFR initialization
304314
// deadlock.
305315
// Oracle JDK 8 JFR controller requires JMX so register an 'after-jmx-initialized' callback.
306-
PROFILER_INIT_AFTER_JMX =
307-
new Runnable() {
308-
@Override
309-
public void run() {
310-
startProfilingAgent(false, inst);
311-
}
312-
};
316+
PROFILER_INIT_AFTER_JMX = () -> startProfilingAgent(false, inst);
313317
}
314318
}
315319

@@ -322,6 +326,12 @@ public void run() {
322326
* when it will happen after the class transformers were added.
323327
*/
324328
AgentTaskScheduler.initialize();
329+
330+
// We need to run the crashtracking initialization after all the config has been resolved and
331+
// task scheduler initialized
332+
if (crashTrackingEnabled) {
333+
startCrashTracking();
334+
}
325335
startDatadogAgent(initTelemetry, inst);
326336

327337
final EnumSet<Library> libraries = detectLibraries(log);
@@ -715,6 +725,26 @@ private static synchronized void installDatadogTracer(
715725
StaticEventLogger.end("GlobalTracer");
716726
}
717727

728+
private static void startCrashTracking() {
729+
if (Platform.isJavaVersionAtLeast(9)) {
730+
// it is safe to initialize crashtracking early
731+
// since it can take 100ms+ to initialize the native library we will defer the initialization
732+
// ... unless we request early start with the debug config flag
733+
if (ConfigProvider.getInstance()
734+
.getBoolean(
735+
CrashTrackingConfig.CRASH_TRACKING_START_EARLY,
736+
CrashTrackingConfig.CRASH_TRACKING_START_EARLY_DEFAULT)) {
737+
initializeCrashTrackingDefault();
738+
} else {
739+
AgentTaskScheduler.INSTANCE.execute(Agent::initializeCrashTrackingDefault);
740+
}
741+
} else {
742+
// for Java 8 we are relying on JMX to give us the process PID
743+
// we need to delay the crash tracking initialization until JMX is available
744+
CRASHTRACKER_INIT_AFTER_JMX = Agent::initializeDelayedCrashTracking;
745+
}
746+
}
747+
718748
private static void scheduleJmxStart(final int jmxStartDelay) {
719749
if (jmxStartDelay > 0) {
720750
AgentTaskScheduler.INSTANCE.scheduleWithJitter(
@@ -738,25 +768,33 @@ private static synchronized void startJmx() {
738768
if (jmxStarting.getAndSet(true)) {
739769
return; // another thread is already in startJmx
740770
}
741-
// error tracking initialization relies on JMX being available
742-
initializeErrorTracking();
743771
if (jmxFetchEnabled) {
744772
startJmxFetch();
745773
}
746774
initializeJmxSystemAccessProvider(AGENT_CLASSLOADER);
775+
if (crashTrackingEnabled && CRASHTRACKER_INIT_AFTER_JMX != null) {
776+
try {
777+
CRASHTRACKER_INIT_AFTER_JMX.run();
778+
} finally {
779+
CRASHTRACKER_INIT_AFTER_JMX = null;
780+
}
781+
}
747782
if (profilingEnabled) {
748783
registerDeadlockDetectionEvent();
749784
registerSmapEntryEvent();
750785
if (PROFILER_INIT_AFTER_JMX != null) {
751-
if (getJmxStartDelay() == 0) {
752-
log.debug("Waiting for profiler initialization");
753-
AgentTaskScheduler.INSTANCE.scheduleWithJitter(
754-
PROFILER_INIT_AFTER_JMX, 500, TimeUnit.MILLISECONDS);
755-
} else {
756-
log.debug("Initializing profiler");
757-
PROFILER_INIT_AFTER_JMX.run();
786+
try {
787+
if (getJmxStartDelay() == 0) {
788+
log.debug("Waiting for profiler initialization");
789+
AgentTaskScheduler.INSTANCE.scheduleWithJitter(
790+
PROFILER_INIT_AFTER_JMX, 500, TimeUnit.MILLISECONDS);
791+
} else {
792+
log.debug("Initializing profiler");
793+
PROFILER_INIT_AFTER_JMX.run();
794+
}
795+
} finally {
796+
PROFILER_INIT_AFTER_JMX = null;
758797
}
759-
PROFILER_INIT_AFTER_JMX = null;
760798
}
761799
}
762800
}
@@ -998,16 +1036,55 @@ private static void stopTelemetry() {
9981036
}
9991037
}
10001038

1001-
private static void initializeErrorTracking() {
1039+
private static void initializeDelayedCrashTracking() {
1040+
initializeCrashTracking(true, isCrashTrackingAutoconfigEnabled());
1041+
}
1042+
1043+
private static void initializeDelayedCrashTrackingOnlyJmx() {
1044+
initializeCrashTracking(true, false);
1045+
}
1046+
1047+
private static void initializeCrashTrackingDefault() {
1048+
initializeCrashTracking(false, isCrashTrackingAutoconfigEnabled());
1049+
}
1050+
1051+
private static boolean isCrashTrackingAutoconfigEnabled() {
1052+
return !ConfigProvider.getInstance()
1053+
.getBoolean(
1054+
CrashTrackingConfig.CRASH_TRACKING_DISABLE_AUTOCONFIG,
1055+
CrashTrackingConfig.CRASH_TRACKING_DISABLE_AUTOCONFIG_DEFAULT);
1056+
}
1057+
1058+
private static void initializeCrashTracking(boolean delayed, boolean checkNative) {
10021059
if (JavaVirtualMachine.isJ9()) {
10031060
// TODO currently crash tracking is supported only for HotSpot based JVMs
10041061
return;
10051062
}
1063+
log.debug("Initializing crashtracking");
10061064
try {
1007-
Class<?> clz = AGENT_CLASSLOADER.loadClass("com.datadog.crashtracking.ScriptInitializer");
1008-
clz.getMethod("initialize").invoke(null);
1065+
Class<?> clz = AGENT_CLASSLOADER.loadClass("datadog.crashtracking.Initializer");
1066+
// first try to use the JVMAccess using the native library; unless `checkNative` is false
1067+
boolean rslt =
1068+
checkNative && (boolean) clz.getMethod("initialize", boolean.class).invoke(null, false);
1069+
if (!rslt) {
1070+
if (delayed) {
1071+
// already delayed initialization, so no need to reschedule it again
1072+
// just call initialize and force JMX
1073+
rslt = (boolean) clz.getMethod("initialize", boolean.class).invoke(null, true);
1074+
} else {
1075+
// delayed initialization, so we need to reschedule it and mark as delayed but do not
1076+
// re-check the native library
1077+
CRASHTRACKER_INIT_AFTER_JMX = Agent::initializeDelayedCrashTrackingOnlyJmx;
1078+
}
1079+
}
1080+
if (rslt) {
1081+
log.debug("Crashtracking initialized");
1082+
} else {
1083+
log.debug(
1084+
SEND_TELEMETRY, "Crashtracking failed to initialize. No additional details available.");
1085+
}
10091086
} catch (Throwable t) {
1010-
log.debug("Unable to initialize crash uploader", t);
1087+
log.debug(SEND_TELEMETRY, "Unable to initialize crashtracking", t);
10111088
}
10121089
}
10131090

@@ -1106,8 +1183,11 @@ public void withTracer(TracerAPI tracer) {
11061183
}
11071184
});
11081185
}
1109-
} catch (final Throwable ex) {
1110-
log.error("Throwable thrown while starting profiling agent", ex);
1186+
} catch (final Throwable t) {
1187+
log.error(
1188+
SEND_TELEMETRY,
1189+
"Throwable thrown while starting profiling agent "
1190+
+ Arrays.toString(t.getCause().getStackTrace()));
11111191
} finally {
11121192
Thread.currentThread().setContextClassLoader(contextLoader);
11131193
}

0 commit comments

Comments
 (0)