Skip to content

Commit 08f50c2

Browse files
MattAlpjbachorik
andauthored
Automatically register crashtracking via native extensions (#8851)
Co-authored-by: Jaroslav Bachorik <[email protected]>
1 parent 9b2d628 commit 08f50c2

File tree

50 files changed

+1062
-438
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1062
-438
lines changed

dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java

Lines changed: 123 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static datadog.environment.JavaVirtualMachine.isJavaVersionAtLeast;
44
import static datadog.environment.JavaVirtualMachine.isOracleJDK8;
55
import static datadog.trace.api.ConfigDefaults.DEFAULT_STARTUP_LOGS_ENABLED;
6+
import static datadog.trace.api.telemetry.LogCollector.SEND_TELEMETRY;
67
import static datadog.trace.bootstrap.Library.WILDFLY;
78
import static datadog.trace.bootstrap.Library.detectLibraries;
89
import static datadog.trace.util.AgentThreadFactory.AgentThread.JMX_STARTUP;
@@ -21,6 +22,7 @@
2122
import datadog.trace.api.appsec.AppSecEventTracker;
2223
import datadog.trace.api.config.AppSecConfig;
2324
import datadog.trace.api.config.CiVisibilityConfig;
25+
import datadog.trace.api.config.CrashTrackingConfig;
2426
import datadog.trace.api.config.CwsConfig;
2527
import datadog.trace.api.config.DebuggerConfig;
2628
import datadog.trace.api.config.GeneralConfig;
@@ -56,6 +58,7 @@
5658
import java.net.URISyntaxException;
5759
import java.net.URL;
5860
import java.security.CodeSource;
61+
import java.util.Arrays;
5962
import java.util.EnumSet;
6063
import java.util.concurrent.TimeUnit;
6164
import java.util.concurrent.atomic.AtomicBoolean;
@@ -97,6 +100,9 @@ private enum AgentFeature {
97100
TRACING(TraceInstrumentationConfig.TRACE_ENABLED, true),
98101
JMXFETCH(JmxFetchConfig.JMX_FETCH_ENABLED, true),
99102
STARTUP_LOGS(GeneralConfig.STARTUP_LOGS_ENABLED, DEFAULT_STARTUP_LOGS_ENABLED),
103+
CRASH_TRACKING(
104+
CrashTrackingConfig.CRASH_TRACKING_ENABLED,
105+
CrashTrackingConfig.CRASH_TRACKING_ENABLED_DEFAULT),
100106
PROFILING(ProfilingConfig.PROFILING_ENABLED, false),
101107
APPSEC(AppSecConfig.APPSEC_ENABLED, false),
102108
IAST(IastConfig.IAST_ENABLED, false),
@@ -150,9 +156,11 @@ public boolean isEnabledByDefault() {
150156
private static ClassLoader AGENT_CLASSLOADER = null;
151157

152158
private static volatile Runnable PROFILER_INIT_AFTER_JMX = null;
159+
private static volatile Runnable CRASHTRACKER_INIT_AFTER_JMX = null;
153160

154161
private static boolean jmxFetchEnabled = true;
155162
private static boolean profilingEnabled = false;
163+
private static boolean crashTrackingEnabled = false;
156164
private static boolean appSecEnabled;
157165
private static boolean appSecFullyDisabled;
158166
private static boolean remoteConfigEnabled = true;
@@ -282,6 +290,7 @@ public static void start(
282290

283291
jmxFetchEnabled = isFeatureEnabled(AgentFeature.JMXFETCH);
284292
profilingEnabled = isFeatureEnabled(AgentFeature.PROFILING);
293+
crashTrackingEnabled = isFeatureEnabled(AgentFeature.CRASH_TRACKING);
285294
usmEnabled = isFeatureEnabled(AgentFeature.USM);
286295
appSecEnabled = isFeatureEnabled(AgentFeature.APPSEC);
287296
appSecFullyDisabled = isFullyDisabled(AgentFeature.APPSEC);
@@ -328,13 +337,7 @@ public static void start(
328337
// Profiling can not run early on Oracle JDK 8 because it will cause JFR initialization
329338
// deadlock.
330339
// Oracle JDK 8 JFR controller requires JMX so register an 'after-jmx-initialized' callback.
331-
PROFILER_INIT_AFTER_JMX =
332-
new Runnable() {
333-
@Override
334-
public void run() {
335-
startProfilingAgent(false, inst);
336-
}
337-
};
340+
PROFILER_INIT_AFTER_JMX = () -> startProfilingAgent(false, inst);
338341
}
339342
}
340343

@@ -347,6 +350,14 @@ public void run() {
347350
* when it will happen after the class transformers were added.
348351
*/
349352
AgentTaskScheduler.initialize();
353+
354+
// We need to run the crashtracking initialization after all the config has been resolved and
355+
// task scheduler initialized
356+
if (crashTrackingEnabled) {
357+
StaticEventLogger.begin("crashtracking");
358+
startCrashTracking();
359+
StaticEventLogger.end("crashtracking");
360+
}
350361
startDatadogAgent(initTelemetry, inst);
351362

352363
final EnumSet<Library> libraries = detectLibraries(log);
@@ -741,6 +752,29 @@ private static synchronized void installDatadogTracer(
741752
StaticEventLogger.end("GlobalTracer");
742753
}
743754

755+
private static void startCrashTracking() {
756+
if (isJavaVersionAtLeast(9)) {
757+
// it is safe to initialize crashtracking early
758+
// since it can take 100ms+ to initialize the native library we will defer the initialization
759+
// ... unless we request early start with the debug config flag
760+
boolean forceEarlyStart = CrashTrackingConfig.CRASH_TRACKING_START_EARLY_DEFAULT;
761+
String forceEarlyStartStr =
762+
ddGetProperty("dd." + CrashTrackingConfig.CRASH_TRACKING_START_EARLY);
763+
if (forceEarlyStartStr != null) {
764+
forceEarlyStart = Boolean.parseBoolean(forceEarlyStartStr);
765+
}
766+
if (forceEarlyStart) {
767+
initializeCrashTrackingDefault();
768+
} else {
769+
AgentTaskScheduler.INSTANCE.execute(Agent::initializeCrashTrackingDefault);
770+
}
771+
} else {
772+
// for Java 8 we are relying on JMX to give us the process PID
773+
// we need to delay the crash tracking initialization until JMX is available
774+
CRASHTRACKER_INIT_AFTER_JMX = Agent::initializeDelayedCrashTracking;
775+
}
776+
}
777+
744778
private static void scheduleJmxStart(final int jmxStartDelay) {
745779
if (jmxStartDelay > 0) {
746780
AgentTaskScheduler.INSTANCE.scheduleWithJitter(
@@ -764,25 +798,41 @@ private static synchronized void startJmx() {
764798
if (jmxStarting.getAndSet(true)) {
765799
return; // another thread is already in startJmx
766800
}
767-
// error tracking initialization relies on JMX being available
768-
initializeErrorTracking();
769801
if (jmxFetchEnabled) {
770802
startJmxFetch();
771803
}
772804
initializeJmxSystemAccessProvider(AGENT_CLASSLOADER);
805+
if (crashTrackingEnabled && CRASHTRACKER_INIT_AFTER_JMX != null) {
806+
try {
807+
CRASHTRACKER_INIT_AFTER_JMX.run();
808+
} finally {
809+
CRASHTRACKER_INIT_AFTER_JMX = null;
810+
}
811+
}
773812
if (profilingEnabled) {
774813
registerDeadlockDetectionEvent();
775814
registerSmapEntryEvent();
776815
if (PROFILER_INIT_AFTER_JMX != null) {
777-
if (getJmxStartDelay() == 0) {
778-
log.debug("Waiting for profiler initialization");
779-
AgentTaskScheduler.INSTANCE.scheduleWithJitter(
780-
PROFILER_INIT_AFTER_JMX, 500, TimeUnit.MILLISECONDS);
781-
} else {
782-
log.debug("Initializing profiler");
783-
PROFILER_INIT_AFTER_JMX.run();
816+
try {
817+
/*
818+
When getJmxStartDelay() is set to 0 we will attempt to initialize the JMX subsystem as soon as available.
819+
But, this can cause issues with JFR as it needs some 'grace period' after JMX is ready. That's why we are
820+
re-scheduling the profiler initialization code just a tad later.
821+
822+
If the jmx start delay is set, we are already delayed relative to the jmx init so we can just plainly
823+
run the initialization code.
824+
*/
825+
if (getJmxStartDelay() == 0) {
826+
log.debug("Waiting for profiler initialization");
827+
AgentTaskScheduler.INSTANCE.scheduleWithJitter(
828+
PROFILER_INIT_AFTER_JMX, 500, TimeUnit.MILLISECONDS);
829+
} else {
830+
log.debug("Initializing profiler");
831+
PROFILER_INIT_AFTER_JMX.run();
832+
}
833+
} finally {
834+
PROFILER_INIT_AFTER_JMX = null;
784835
}
785-
PROFILER_INIT_AFTER_JMX = null;
786836
}
787837
}
788838
}
@@ -1042,16 +1092,63 @@ private static void stopTelemetry() {
10421092
}
10431093
}
10441094

1045-
private static void initializeErrorTracking() {
1095+
private static void initializeDelayedCrashTracking() {
1096+
initializeCrashTracking(true, isCrashTrackingAutoconfigEnabled());
1097+
}
1098+
1099+
private static void initializeDelayedCrashTrackingOnlyJmx() {
1100+
initializeCrashTracking(true, false);
1101+
}
1102+
1103+
private static void initializeCrashTrackingDefault() {
1104+
initializeCrashTracking(false, isCrashTrackingAutoconfigEnabled());
1105+
}
1106+
1107+
private static boolean isCrashTrackingAutoconfigEnabled() {
1108+
String enabledVal = ddGetProperty("dd." + CrashTrackingConfig.CRASH_TRACKING_ENABLE_AUTOCONFIG);
1109+
boolean enabled = CrashTrackingConfig.CRASH_TRACKING_ENABLE_AUTOCONFIG_DEFAULT;
1110+
if (enabledVal != null) {
1111+
enabled = Boolean.parseBoolean(enabledVal);
1112+
} else {
1113+
// If the property is not set, then we check if profiling is enabled
1114+
enabled = profilingEnabled;
1115+
}
1116+
return enabled;
1117+
}
1118+
1119+
private static void initializeCrashTracking(boolean delayed, boolean checkNative) {
10461120
if (JavaVirtualMachine.isJ9()) {
10471121
// TODO currently crash tracking is supported only for HotSpot based JVMs
10481122
return;
10491123
}
1124+
log.debug("Initializing crashtracking");
10501125
try {
1051-
Class<?> clz = AGENT_CLASSLOADER.loadClass("com.datadog.crashtracking.ScriptInitializer");
1052-
clz.getMethod("initialize").invoke(null);
1126+
Class<?> clz = AGENT_CLASSLOADER.loadClass("datadog.crashtracking.Initializer");
1127+
// first try to use the JVMAccess using the native library; unless `checkNative` is false
1128+
Boolean rslt =
1129+
checkNative && (Boolean) clz.getMethod("initialize", boolean.class).invoke(null, false);
1130+
if (!rslt) {
1131+
if (delayed) {
1132+
// already delayed initialization, so no need to reschedule it again
1133+
// just call initialize and force JMX
1134+
rslt = (Boolean) clz.getMethod("initialize", boolean.class).invoke(null, true);
1135+
} else {
1136+
// delayed initialization, so we need to reschedule it and mark as delayed but do not
1137+
// re-check the native library
1138+
CRASHTRACKER_INIT_AFTER_JMX = Agent::initializeDelayedCrashTrackingOnlyJmx;
1139+
rslt = null; // we will initialize it later
1140+
}
1141+
}
1142+
if (rslt == null) {
1143+
log.debug("Crashtracking initialization delayed until JMX is available");
1144+
} else if (rslt) {
1145+
log.debug("Crashtracking initialized");
1146+
} else {
1147+
log.debug(
1148+
SEND_TELEMETRY, "Crashtracking failed to initialize. No additional details available.");
1149+
}
10531150
} catch (Throwable t) {
1054-
log.debug("Unable to initialize crash uploader", t);
1151+
log.debug(SEND_TELEMETRY, "Unable to initialize crashtracking", t);
10551152
}
10561153
}
10571154

@@ -1150,8 +1247,11 @@ public void withTracer(TracerAPI tracer) {
11501247
}
11511248
});
11521249
}
1153-
} catch (final Throwable ex) {
1154-
log.error("Throwable thrown while starting profiling agent", ex);
1250+
} catch (final Throwable t) {
1251+
log.error(
1252+
SEND_TELEMETRY,
1253+
"Throwable thrown while starting profiling agent "
1254+
+ Arrays.toString(t.getCause().getStackTrace()));
11551255
} finally {
11561256
Thread.currentThread().setContextClassLoader(contextLoader);
11571257
}

dd-java-agent/agent-crashtracking/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ dependencies {
1616
implementation project(':internal-api')
1717
implementation project(':utils:container-utils')
1818
implementation project(':utils:version-utils')
19+
implementation project(path: ':dd-java-agent:ddprof-lib', configuration: 'shadow')
1920

2021
implementation libs.okhttp
2122
implementation libs.moshi

0 commit comments

Comments
 (0)