Skip to content

Commit 2b7a97c

Browse files
feat: Improving error handling (#7427)
Try to capture and explain as much as possible initialization error. Avoid to throw stacktraces in log events. Split crash tracking and OOME notifier initialization for readibility.
1 parent 8638837 commit 2b7a97c

File tree

4 files changed

+365
-263
lines changed

4 files changed

+365
-263
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
package com.datadog.crashtracking;
2+
3+
import static com.datadog.crashtracking.ScriptInitializer.LOG;
4+
import static com.datadog.crashtracking.ScriptInitializer.RWXRWXRWX;
5+
import static com.datadog.crashtracking.ScriptInitializer.R_XR_XR_X;
6+
import static com.datadog.crashtracking.ScriptInitializer.findAgentJar;
7+
import static com.datadog.crashtracking.ScriptInitializer.getCrashUploaderTemplate;
8+
import static com.datadog.crashtracking.ScriptInitializer.writeConfig;
9+
import static java.nio.file.attribute.PosixFilePermissions.asFileAttribute;
10+
import static java.nio.file.attribute.PosixFilePermissions.fromString;
11+
import static java.util.Locale.ROOT;
12+
13+
import datadog.trace.util.PidHelper;
14+
import datadog.trace.util.Strings;
15+
import java.io.BufferedReader;
16+
import java.io.BufferedWriter;
17+
import java.io.IOException;
18+
import java.io.InputStream;
19+
import java.io.InputStreamReader;
20+
import java.nio.file.FileAlreadyExistsException;
21+
import java.nio.file.Files;
22+
import java.nio.file.Path;
23+
import java.nio.file.Paths;
24+
25+
public final class CrashUploaderScriptInitializer {
26+
private static final String SETUP_FAILURE_MESSAGE = "Crash tracking will not work properly.";
27+
28+
private CrashUploaderScriptInitializer() {}
29+
30+
// @VisibleForTests
31+
static void initialize(String onErrorVal, String onErrorFile) {
32+
if (onErrorVal == null || onErrorVal.isEmpty()) {
33+
LOG.debug("'-XX:OnError' argument was not provided. Crash tracking is disabled.");
34+
return;
35+
}
36+
if (onErrorFile == null || onErrorFile.isEmpty()) {
37+
onErrorFile = System.getProperty("user.dir") + "/hs_err_pid" + PidHelper.getPid() + ".log";
38+
LOG.debug("No -XX:ErrorFile value, defaulting to {}", onErrorFile);
39+
} else {
40+
onErrorFile = Strings.replace(onErrorFile, "%p", PidHelper.getPid());
41+
}
42+
43+
String agentJar = findAgentJar();
44+
if (agentJar == null) {
45+
LOG.warn("Unable to locate the agent jar. {}", SETUP_FAILURE_MESSAGE);
46+
return;
47+
}
48+
49+
Path scriptPath = Paths.get(onErrorVal.replace(" %p", ""));
50+
boolean isDDCrashUploader =
51+
scriptPath.getFileName().toString().toLowerCase(ROOT).contains("dd_crash_uploader");
52+
if (isDDCrashUploader && !copyCrashUploaderScript(scriptPath, onErrorFile, agentJar)) {
53+
return;
54+
}
55+
56+
writeConfig(scriptPath, "agent", agentJar, "hs_err", onErrorFile);
57+
}
58+
59+
private static boolean copyCrashUploaderScript(
60+
Path scriptPath, String onErrorFile, String agentJar) {
61+
Path scriptDirectory = scriptPath.getParent();
62+
try {
63+
Files.createDirectories(scriptDirectory, asFileAttribute(fromString(RWXRWXRWX)));
64+
} catch (UnsupportedOperationException e) {
65+
LOG.warn(
66+
"Unsupported permissions {} for {}. {}",
67+
RWXRWXRWX,
68+
scriptDirectory,
69+
SETUP_FAILURE_MESSAGE);
70+
return false;
71+
} catch (FileAlreadyExistsException ignored) {
72+
// can be safely ignored; if the folder exists we will just reuse it
73+
if (!Files.isWritable(scriptDirectory)) {
74+
LOG.warn("Read only directory {}. {}", scriptDirectory, SETUP_FAILURE_MESSAGE);
75+
return false;
76+
}
77+
} catch (IOException e) {
78+
LOG.warn(
79+
"Failed to create writable crash tracking script folder {}. {}",
80+
scriptDirectory,
81+
SETUP_FAILURE_MESSAGE);
82+
return false;
83+
}
84+
try {
85+
LOG.debug("Writing crash uploader script: {}", scriptPath);
86+
writeCrashUploaderScript(getCrashUploaderTemplate(), scriptPath, agentJar, onErrorFile);
87+
} catch (IOException e) {
88+
LOG.warn("Failed to copy crash tracking script {}. {}", scriptPath, SETUP_FAILURE_MESSAGE);
89+
return false;
90+
}
91+
return true;
92+
}
93+
94+
private static void writeCrashUploaderScript(
95+
InputStream template, Path scriptPath, String execClass, String crashFile)
96+
throws IOException {
97+
if (!Files.exists(scriptPath)) {
98+
try (BufferedReader br = new BufferedReader(new InputStreamReader(template));
99+
BufferedWriter bw = Files.newBufferedWriter(scriptPath)) {
100+
String line;
101+
while ((line = br.readLine()) != null) {
102+
bw.write(template(line, execClass, crashFile));
103+
bw.newLine();
104+
}
105+
}
106+
Files.setPosixFilePermissions(scriptPath, fromString(R_XR_XR_X));
107+
}
108+
}
109+
110+
private static String template(String line, String execClass, String crashFile) {
111+
line = Strings.replace(line, "!AGENT_JAR!", execClass);
112+
if (crashFile != null) {
113+
line = Strings.replace(line, "!JAVA_ERROR_FILE!", crashFile);
114+
}
115+
return line;
116+
}
117+
}
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
package com.datadog.crashtracking;
2+
3+
import static com.datadog.crashtracking.ScriptInitializer.LOG;
4+
import static com.datadog.crashtracking.ScriptInitializer.PID_PREFIX;
5+
import static com.datadog.crashtracking.ScriptInitializer.RWXRWXRWX;
6+
import static com.datadog.crashtracking.ScriptInitializer.R_XR_XR_X;
7+
import static com.datadog.crashtracking.ScriptInitializer.findAgentJar;
8+
import static com.datadog.crashtracking.ScriptInitializer.getOomeNotifierTemplate;
9+
import static com.datadog.crashtracking.ScriptInitializer.writeConfig;
10+
import static java.nio.file.FileVisitResult.CONTINUE;
11+
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
12+
import static java.nio.file.attribute.PosixFilePermissions.asFileAttribute;
13+
import static java.nio.file.attribute.PosixFilePermissions.fromString;
14+
import static java.util.regex.Pattern.CASE_INSENSITIVE;
15+
16+
import datadog.trace.api.Config;
17+
import datadog.trace.util.PidHelper;
18+
import java.io.IOException;
19+
import java.nio.file.FileAlreadyExistsException;
20+
import java.nio.file.FileVisitResult;
21+
import java.nio.file.FileVisitor;
22+
import java.nio.file.Files;
23+
import java.nio.file.Path;
24+
import java.nio.file.Paths;
25+
import java.nio.file.attribute.BasicFileAttributes;
26+
import java.util.Set;
27+
import java.util.regex.Matcher;
28+
import java.util.regex.Pattern;
29+
import java.util.stream.Collectors;
30+
31+
public final class OOMENotifierScriptInitializer {
32+
private static final Pattern OOME_NOTIFIER_SCRIPT_PATTERN =
33+
Pattern.compile("(.*?dd_oome_notifier[.](sh|bat))\\s+(%p)", CASE_INSENSITIVE);
34+
35+
private OOMENotifierScriptInitializer() {}
36+
37+
// @VisibleForTests
38+
static void initialize(String onOutOfMemoryVal) {
39+
if (onOutOfMemoryVal == null || onOutOfMemoryVal.isEmpty()) {
40+
LOG.debug("'-XX:OnOutOfMemoryError' argument was not provided. OOME tracking is disabled.");
41+
return;
42+
}
43+
Path scriptPath = getOOMEScripPath(onOutOfMemoryVal);
44+
if (scriptPath == null) {
45+
LOG.info(
46+
"OOME notifier script value ({}) does not follow the expected format: <path>/dd_ome_notifier.(sh|bat) %p. OOME tracking is disabled.",
47+
onOutOfMemoryVal);
48+
return;
49+
}
50+
String agentJar = findAgentJar();
51+
if (agentJar == null) {
52+
LOG.warn("Unable to locate the agent jar. OOME notification will not work properly.");
53+
return;
54+
}
55+
if (!copyOOMEscript(scriptPath)) {
56+
return;
57+
}
58+
String tags = getTags();
59+
writeConfig(scriptPath, "agent", agentJar, "tags", tags);
60+
}
61+
62+
private static String getTags() {
63+
return Config.get().getMergedJmxTags().entrySet().stream()
64+
.map(e -> e.getKey() + ":" + e.getValue())
65+
.collect(Collectors.joining(","));
66+
}
67+
68+
private static Path getOOMEScripPath(String onOutOfMemoryVal) {
69+
Matcher m = OOME_NOTIFIER_SCRIPT_PATTERN.matcher(onOutOfMemoryVal);
70+
if (!m.find()) {
71+
return null;
72+
}
73+
return Paths.get(m.group(1));
74+
}
75+
76+
private static boolean copyOOMEscript(Path scriptPath) {
77+
Path scriptDirectory = scriptPath.getParent();
78+
79+
// cleanup all stale process-specific generated files in the parent folder of the given OOME
80+
// notifier script
81+
ScriptCleanupVisitor.run(scriptDirectory);
82+
83+
try {
84+
Files.createDirectories(scriptDirectory, asFileAttribute(fromString(RWXRWXRWX)));
85+
} catch (UnsupportedOperationException e) {
86+
LOG.warn(
87+
"Unsupported permissions {} for {}. OOME notification will not work properly.",
88+
RWXRWXRWX,
89+
scriptDirectory);
90+
return false;
91+
} catch (FileAlreadyExistsException ignored) {
92+
// can be safely ignored; if the folder exists we will just reuse it
93+
if (!Files.isWritable(scriptDirectory)) {
94+
LOG.warn(
95+
"Read only directory {}. OOME notification will not work properly.", scriptDirectory);
96+
return false;
97+
}
98+
} catch (IOException e) {
99+
LOG.warn(
100+
"Failed to create writable OOME script folder {}. OOME notification will not work properly.",
101+
scriptDirectory);
102+
return false;
103+
}
104+
105+
try {
106+
Files.copy(getOomeNotifierTemplate(), scriptPath, REPLACE_EXISTING);
107+
Files.setPosixFilePermissions(scriptPath, fromString(R_XR_XR_X));
108+
} catch (IOException e) {
109+
LOG.warn(
110+
"Failed to copy OOME script {}. OOME notification will not work properly.", scriptPath);
111+
return false;
112+
}
113+
return true;
114+
}
115+
116+
private static class ScriptCleanupVisitor implements FileVisitor<Path> {
117+
private static final Pattern PID_PATTERN = Pattern.compile(".*?" + PID_PREFIX + "(\\d+)");
118+
119+
private final Set<String> pidSet = PidHelper.getJavaPids();
120+
121+
static void run(Path dir) {
122+
try {
123+
if (Files.exists(dir)) {
124+
Files.walkFileTree(dir, new ScriptCleanupVisitor());
125+
}
126+
} catch (IOException e) {
127+
LOG.warn("Failed cleaning up process specific files in {}", dir, e);
128+
}
129+
}
130+
131+
private ScriptCleanupVisitor() {}
132+
133+
@Override
134+
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) {
135+
return CONTINUE;
136+
}
137+
138+
@Override
139+
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
140+
String fileName = file.getFileName().toString();
141+
Matcher matcher = PID_PATTERN.matcher(fileName);
142+
if (matcher.find()) {
143+
String pid = matcher.group(1);
144+
if (pid != null && !pid.equals(PidHelper.getPid()) && !this.pidSet.contains(pid)) {
145+
LOG.debug("Cleaning process specific file {}", file);
146+
Files.delete(file);
147+
}
148+
}
149+
return CONTINUE;
150+
}
151+
152+
@Override
153+
public FileVisitResult visitFileFailed(Path file, IOException exc) {
154+
LOG.debug("Failed to delete file {}", file, exc);
155+
return CONTINUE;
156+
}
157+
158+
@Override
159+
public FileVisitResult postVisitDirectory(Path dir, IOException exc) {
160+
return CONTINUE;
161+
}
162+
}
163+
}

0 commit comments

Comments
 (0)