Skip to content

Commit d8b78d5

Browse files
authored
Merge pull request #33 from paypal/issue-32-sanitized-string-charset-java8
fix wrong charset of sanitized strings in Java8 heap dumps
2 parents 6c9a63a + d0682cf commit d8b78d5

File tree

3 files changed

+57
-18
lines changed

3 files changed

+57
-18
lines changed

src/main/java/com/paypal/heapdumptool/sanitizer/HeapDumpSanitizer.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import java.io.IOException;
1010
import java.io.InputStream;
1111
import java.io.OutputStream;
12+
import java.io.UnsupportedEncodingException;
1213
import java.nio.charset.StandardCharsets;
1314
import java.util.Collection;
1415
import java.util.HashMap;
@@ -68,6 +69,7 @@ public class HeapDumpSanitizer {
6869
private final Map<String, ClassObject> classNameToClassObjectsMap = new HashMap<>();
6970
private final Set<Long> excludeStringObjectIds = new HashSet<>();
7071
private final Set<Long> excludeStringValueArrayObjectIds = new HashSet<>();
72+
private boolean isLikelyJdk9Plus;
7173

7274
public void setInputStream(final InputStream inputStream) {
7375
this.inputStream = inputStream;
@@ -260,6 +262,10 @@ private void copyHeapDumpClassDump(final Pipe pipe, final long classObjectId) th
260262
final String fieldName = stringIdToStringMap.getOrDefault(fieldNameStringId, "");
261263
final BasicType basicType = BasicType.findByU1Code(fieldType).orElseThrow(IllegalStateException::new);
262264
classObject.fields.add(new Field(fieldName, basicType));
265+
266+
if (isStringClass(classObjectId) && STRING_CODER_FIELD.equals(fieldName)) {
267+
isLikelyJdk9Plus = true;
268+
}
263269
}
264270
}
265271

@@ -457,13 +463,25 @@ private boolean shouldApplyArraySanitization(final long objectId, final int elem
457463

458464
private void applySanitization(final Pipe pipe, final long numBytes) throws IOException {
459465
pipe.skipInput(numBytes);
466+
final byte[] replacementData = getSanitizationTextBytes();
460467

461-
final byte[] replacementData = sanitizeCommand.getSanitizationText().getBytes(StandardCharsets.UTF_8);
462468
try (final InputStream replacementDataStream = new InfiniteCircularInputStream(replacementData)) {
463469
pipe.copyFrom(replacementDataStream, numBytes);
464470
}
465471
}
466472

473+
private byte[] getSanitizationTextBytes() throws UnsupportedEncodingException {
474+
if (!sanitizeCommand.isSanitizationTextCharsetAutoDetect()) {
475+
final String sanitizationTextCharset = sanitizeCommand.getSanitizationTextCharset();
476+
return sanitizeCommand.getSanitizationText().getBytes(sanitizationTextCharset);
477+
}
478+
479+
if (isLikelyJdk9Plus) {
480+
return sanitizeCommand.getSanitizationText().getBytes(StandardCharsets.UTF_8);
481+
}
482+
return sanitizeCommand.getSanitizationText().getBytes(StandardCharsets.UTF_16BE);
483+
}
484+
467485
private static boolean isLatin1(final String input) {
468486
for (final char c : input.toCharArray()) {
469487
if (c > 0xFF) {

src/main/java/com/paypal/heapdumptool/sanitizer/SanitizeOrCaptureCommandBase.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public abstract class SanitizeOrCaptureCommandBase implements CliCommand {
2828
@Option(names = {"-d", DOCKER_REGISTRY_OPTION}, description = "docker registry hostname for bootstrapping heap-dump-tool docker image")
2929
private String dockerRegistry;
3030

31-
@Option(names = {"-a", "--tar-input"}, description = "Treat input as tar archive")
31+
@Option(names = {"-a", "--tar-input"}, description = "Treat input as tar archive", arity = "1")
3232
private boolean tarInput;
3333

3434
@Option(names = {"-e", "--exclude-string-fields"},
@@ -47,15 +47,29 @@ public abstract class SanitizeOrCaptureCommandBase implements CliCommand {
4747
// match, all strings would be displayed as "*"
4848
private boolean forceMatchStringCoder;
4949

50-
@Option(names = {"-s", "--sanitize-byte-char-arrays-only"}, description = "Sanitize byte/char arrays only", defaultValue = "true", showDefaultValue = ALWAYS)
50+
@Option(names = {"-s", "--sanitize-byte-char-arrays-only"},
51+
description = "Sanitize byte/char arrays only",
52+
defaultValue = "true",
53+
arity = "1",
54+
showDefaultValue = ALWAYS)
5155
private boolean sanitizeByteCharArraysOnly = true;
5256

53-
@Option(names = {"-S", "--sanitize-arrays-only"}, description = "Sanitize arrays only", defaultValue = "false", showDefaultValue = ALWAYS)
57+
@Option(names = {"-S", "--sanitize-arrays-only"},
58+
description = "Sanitize arrays only",
59+
arity = "1",
60+
defaultValue = "false",
61+
showDefaultValue = ALWAYS)
5462
private boolean sanitizeArraysOnly;
5563

5664
@Option(names = {"-t", "--text"}, description = "Sanitization text to replace with", defaultValue = "\\0", showDefaultValue = ALWAYS)
5765
private String sanitizationText = "\\0";
5866

67+
@Option(names = {"-T", "--text-charset"},
68+
description = "Sanitization text charset",
69+
defaultValue = "<auto-detect>",
70+
showDefaultValue = ALWAYS)
71+
private String sanitizationTextCharset = "<auto-detect>";
72+
5973
private StringFieldMap excludeStringFieldMap;
6074

6175
@Option(names = {"-b", "--buffer-size"}, description = "Buffer size for reading and writing", defaultValue = "100MB", showDefaultValue = ALWAYS)
@@ -113,6 +127,18 @@ public void setSanitizationText(final String sanitizationText) {
113127
this.sanitizationText = StringEscapeUtils.unescapeJava(sanitizationText);
114128
}
115129

130+
public boolean isSanitizationTextCharsetAutoDetect() {
131+
return new SanitizeCommand().getSanitizationTextCharset().equals(getSanitizationTextCharset());
132+
}
133+
134+
public String getSanitizationTextCharset() {
135+
return sanitizationTextCharset;
136+
}
137+
138+
public void setSanitizationTextCharset(final String sanitizationTextCharset) {
139+
this.sanitizationTextCharset = sanitizationTextCharset;
140+
}
141+
116142
public boolean isForceMatchStringCoder() {
117143
return forceMatchStringCoder;
118144
}

src/test/java/com/paypal/heapdumptool/sanitizer/HeapDumpSanitizerTest.java

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020

2121
import java.io.IOException;
2222
import java.nio.ByteBuffer;
23+
import java.nio.charset.Charset;
2324
import java.nio.file.Files;
2425
import java.nio.file.Path;
25-
import java.nio.file.Paths;
2626
import java.util.ArrayList;
2727
import java.util.List;
2828
import java.util.concurrent.ThreadLocalRandom;
@@ -33,17 +33,16 @@
3333
import static com.paypal.heapdumptool.fixture.ByteArrayTool.lengthen;
3434
import static com.paypal.heapdumptool.fixture.ByteArrayTool.nCopiesLongToBytes;
3535
import static java.nio.ByteOrder.BIG_ENDIAN;
36+
import static java.nio.charset.StandardCharsets.UTF_16BE;
3637
import static java.nio.charset.StandardCharsets.UTF_8;
3738
import static java.util.Arrays.asList;
3839
import static org.apache.commons.io.FileUtils.byteCountToDisplaySize;
3940
import static org.apache.commons.lang3.ArrayUtils.EMPTY_BYTE_ARRAY;
4041
import static org.apache.commons.lang3.ArrayUtils.EMPTY_STRING_ARRAY;
41-
import static org.apache.commons.lang3.JavaVersion.JAVA_9;
42-
import static org.apache.commons.lang3.JavaVersion.JAVA_RECENT;
43-
import static org.apache.commons.lang3.SystemUtils.isJavaVersionAtLeast;
42+
import static org.apache.commons.lang3.JavaVersion.JAVA_1_8;
43+
import static org.apache.commons.lang3.SystemUtils.isJavaVersionAtMost;
4444
import static org.assertj.core.api.Assertions.assertThat;
4545
import static org.assertj.core.api.Assertions.assertThatCode;
46-
import static org.assertj.core.api.Assumptions.assumeThat;
4746

4847
@TestMethodOrder(Random.class)
4948
class HeapDumpSanitizerTest {
@@ -215,11 +214,6 @@ void testSanitizeArraysOnly() throws Exception {
215214

216215
@Test
217216
void testThreadNameExcludedFromSanitization() throws Exception {
218-
// verify java 8 manually ...
219-
assumeThat(isJavaVersionAtLeast(JAVA_9))
220-
.withFailMessage(JAVA_RECENT + "")
221-
.isTrue();
222-
223217
// "xN-classified-value" with each letter incremented by 1
224218
final String x2ClassifiedValue = "y3.dmbttjgjfe.wbmvf";
225219
final String x5ClassifiedValue = "y6.dmbttjgjfe.wbmvf";
@@ -228,12 +222,13 @@ void testThreadNameExcludedFromSanitization() throws Exception {
228222
thread.setDaemon(true);
229223
thread.setName(adjustLetters(x2ClassifiedValue));
230224

225+
final Charset charset = isJavaVersionAtMost(JAVA_1_8) ? UTF_16BE : UTF_8;
231226
final byte[] sanitizedHeapDump = loadSanitizedHeapDump();
232227
assertThat(sanitizedHeapDump)
233228
.withFailMessage("threadGroupName " + threadGroup.getName())
234-
.containsSequence(butLast(threadGroup.getName()).getBytes(UTF_8))
229+
.containsSequence(butLast(threadGroup.getName()).getBytes(charset))
235230
.withFailMessage("threadName " + thread.getName())
236-
.containsSequence(butLast(thread.getName()).getBytes(UTF_8));
231+
.containsSequence(butLast(thread.getName()).getBytes(charset));
237232
}
238233

239234
private String butLast(final String input) {
@@ -257,12 +252,12 @@ void testThreadNameIncludedInSanitization() throws Exception {
257252

258253
// 0xDEADBEEF
259254
private long deadcow() {
260-
return 0xDEADBEEE + Long.parseLong("1");
255+
return 0xDEADBEEE + Integer.parseInt("1");
261256
}
262257

263258
// 0xCAFEBABE
264259
private long cafegirl() {
265-
return 0XCAFEBABD + Long.parseLong("1");
260+
return 0xCAFEBABD + Integer.parseInt("1");
266261
}
267262

268263
private void verifyDoesNotContainsSequence(final byte[] big, final byte[] small) {

0 commit comments

Comments
 (0)