|
| 1 | +package datadog.gradle.plugin.dump |
| 2 | + |
| 3 | +import org.gradle.api.Plugin |
| 4 | +import org.gradle.api.Project |
| 5 | +import org.gradle.api.Task |
| 6 | +import org.gradle.api.model.ObjectFactory |
| 7 | +import org.gradle.api.provider.Property |
| 8 | +import org.gradle.api.provider.Provider |
| 9 | +import org.gradle.api.services.BuildService |
| 10 | +import org.gradle.api.services.BuildServiceParameters |
| 11 | +import org.gradle.api.tasks.testing.Test |
| 12 | +import org.gradle.kotlin.dsl.extra |
| 13 | +import org.gradle.kotlin.dsl.withType |
| 14 | +import java.io.File |
| 15 | +import java.io.IOException |
| 16 | +import java.lang.ProcessBuilder.Redirect |
| 17 | +import java.time.Duration |
| 18 | +import java.util.concurrent.Executors |
| 19 | +import java.util.concurrent.ScheduledExecutorService |
| 20 | +import java.util.concurrent.ScheduledFuture |
| 21 | +import java.util.concurrent.TimeUnit |
| 22 | +import javax.inject.Inject |
| 23 | + |
| 24 | +/** |
| 25 | + * Plugin to collect thread and heap dumps for hanged tests. |
| 26 | + */ |
| 27 | +class DumpHangedTestPlugin : Plugin<Project> { |
| 28 | + companion object { |
| 29 | + private const val DUMP_FUTURE_KEY = "dumping_future" |
| 30 | + } |
| 31 | + |
| 32 | + /** Plugin properties */ |
| 33 | + abstract class DumpHangedTestProperties @Inject constructor(objects: ObjectFactory) { |
| 34 | + // Time offset (in seconds) before a test reaches its timeout at which dumps should be started. |
| 35 | + // Defaults to 60 seconds. |
| 36 | + val dumpOffset: Property<Long> = objects.property(Long::class.java) |
| 37 | + } |
| 38 | + |
| 39 | + /** Executor wrapped with proper Gradle lifecycle. */ |
| 40 | + abstract class DumpSchedulerService : BuildService<BuildServiceParameters.None>, AutoCloseable { |
| 41 | + private val executor: ScheduledExecutorService = |
| 42 | + Executors.newSingleThreadScheduledExecutor { r -> Thread(r, "hanged-test-dump").apply { isDaemon = true } } |
| 43 | + |
| 44 | + fun schedule(task: () -> Unit, delay: Duration): ScheduledFuture<*> = |
| 45 | + executor.schedule(task, delay.toMillis(), TimeUnit.MILLISECONDS) |
| 46 | + |
| 47 | + override fun close() { |
| 48 | + executor.shutdownNow() |
| 49 | + } |
| 50 | + } |
| 51 | + |
| 52 | + override fun apply(project: Project) { |
| 53 | + if (project.rootProject != project) { |
| 54 | + return |
| 55 | + } |
| 56 | + |
| 57 | + val scheduler = project.gradle.sharedServices |
| 58 | + .registerIfAbsent("dumpHangedTestScheduler", DumpSchedulerService::class.java) |
| 59 | + |
| 60 | + // Create plugin properties. |
| 61 | + val props = project.extensions.create("dumpHangedTest", DumpHangedTestProperties::class.java) |
| 62 | + |
| 63 | + fun configure(p: Project) { |
| 64 | + p.tasks.withType<Test>().configureEach { |
| 65 | + doFirst { schedule(this, scheduler, props) } |
| 66 | + doLast { cleanup(this) } |
| 67 | + } |
| 68 | + } |
| 69 | + |
| 70 | + configure(project) |
| 71 | + |
| 72 | + project.subprojects(::configure) |
| 73 | + } |
| 74 | + |
| 75 | + private fun schedule(t: Task, scheduler: Provider<DumpSchedulerService>, props: DumpHangedTestProperties) { |
| 76 | + val taskName = t.path |
| 77 | + |
| 78 | + if (t.extra.has(DUMP_FUTURE_KEY)) { |
| 79 | + t.logger.info("Taking dumps already scheduled for $taskName") |
| 80 | + return |
| 81 | + } |
| 82 | + |
| 83 | + val dumpOffset = props.dumpOffset.getOrElse(60) |
| 84 | + val delay = t.timeout.map { it.minusSeconds(dumpOffset) }.orNull |
| 85 | + |
| 86 | + if (delay == null || delay.seconds < 0) { |
| 87 | + t.logger.info("Taking dumps has invalid timeout configured for $taskName") |
| 88 | + return |
| 89 | + } |
| 90 | + |
| 91 | + val future = scheduler.get().schedule({ |
| 92 | + t.logger.quiet("Taking dumps after ${delay.seconds} seconds delay for $taskName") |
| 93 | + |
| 94 | + takeDump(t) |
| 95 | + }, delay) |
| 96 | + |
| 97 | + t.extra.set(DUMP_FUTURE_KEY, future) |
| 98 | + } |
| 99 | + |
| 100 | + private fun takeDump(t: Task) { |
| 101 | + try { |
| 102 | + // Use Gradle's build dir and adjust for CI artifacts collection if needed. |
| 103 | + val dumpsDir: File = t.project.layout.buildDirectory |
| 104 | + .dir("dumps") |
| 105 | + .map { dir -> |
| 106 | + if (t.project.providers.environmentVariable("CI").isPresent) { |
| 107 | + // Move reports into the folder collected by the collect_reports.sh script. |
| 108 | + File( |
| 109 | + dir.asFile.absolutePath.replace( |
| 110 | + "dd-trace-java/dd-java-agent", |
| 111 | + "dd-trace-java/workspace/dd-java-agent" |
| 112 | + ) |
| 113 | + ) |
| 114 | + } else { |
| 115 | + dir.asFile |
| 116 | + } |
| 117 | + } |
| 118 | + .get() |
| 119 | + |
| 120 | + dumpsDir.mkdirs() |
| 121 | + |
| 122 | + fun file(name: String, ext: String = "log") = |
| 123 | + File(dumpsDir, "$name-${System.currentTimeMillis()}.$ext") |
| 124 | + |
| 125 | + // For simplicity, use `0` as the PID, which collects all thread dumps across JVMs. |
| 126 | + val allThreadsFile = file("all-thread-dumps") |
| 127 | + runCmd(Redirect.to(allThreadsFile), "jcmd", "0", "Thread.print", "-l") |
| 128 | + |
| 129 | + // Collect all JVMs pids. |
| 130 | + val allJavaProcessesFile = file("all-java-processes") |
| 131 | + runCmd(Redirect.to(allJavaProcessesFile), "jcmd", "-l") |
| 132 | + |
| 133 | + // Collect pids for 'Gradle Test Executor'. |
| 134 | + val pids = allJavaProcessesFile.readLines() |
| 135 | + .filter { it.contains("Gradle Test Executor") } |
| 136 | + .map { it.substringBefore(' ') } |
| 137 | + |
| 138 | + pids.forEach { pid -> |
| 139 | + // Collect heap dump by pid. |
| 140 | + val heapDumpPath = file("${pid}-heap-dump", "hprof").absolutePath |
| 141 | + runCmd(Redirect.INHERIT, "jcmd", pid, "GC.heap_dump", heapDumpPath) |
| 142 | + |
| 143 | + // Collect thread dump by pid. |
| 144 | + val threadDumpFile = file("${pid}-thread-dump") |
| 145 | + runCmd(Redirect.to(threadDumpFile), "jcmd", pid, "Thread.print", "-l") |
| 146 | + } |
| 147 | + } catch (e: Throwable) { |
| 148 | + t.logger.warn("Taking dumps failed with error: ${e.message}, for ${t.path}") |
| 149 | + } |
| 150 | + } |
| 151 | + |
| 152 | + private fun cleanup(t: Task) { |
| 153 | + val future = t.extra |
| 154 | + .takeIf { it.has(DUMP_FUTURE_KEY) } |
| 155 | + ?.get(DUMP_FUTURE_KEY) as? ScheduledFuture<*> |
| 156 | + |
| 157 | + if (future != null && !future.isDone) { |
| 158 | + t.logger.info("Taking dump canceled with remaining delay of ${future.getDelay(TimeUnit.SECONDS)} seconds for ${t.path}") |
| 159 | + future.cancel(false) |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + private fun runCmd( |
| 164 | + redirectTo: Redirect, |
| 165 | + vararg args: String |
| 166 | + ) { |
| 167 | + val exitCode = ProcessBuilder(*args) |
| 168 | + .redirectErrorStream(true) |
| 169 | + .redirectOutput(redirectTo) |
| 170 | + .start() |
| 171 | + .waitFor() |
| 172 | + |
| 173 | + if (exitCode != 0) { |
| 174 | + throw IOException("Process failed: ${args.joinToString(" ")}, exit code: $exitCode") |
| 175 | + } |
| 176 | + } |
| 177 | +} |
0 commit comments