Skip to content

Commit 1f70837

Browse files
committed
fix: Enforce UTF-8 for AppMap I/O and add encoding regression tests
This commit ensures consistent UTF-8 character encoding for AppMap data across file system operations and HTTP responses, preventing corruption on systems where the default charset is not UTF-8 (e.g., Windows-1252). Key changes: - `RecordingSession.java`: Enforce `StandardCharsets.UTF_8` when creating writers for AppMap files, replacing reliance on default `FileWriter`. - `Recording.java`: Explicitly use `StandardCharsets.UTF_8` in `readFully` to correctly decode AppMap content during retrieval. - `ServletRequest.java`: Set `Content-Type: application/json; charset=UTF-8` for remote recording responses and calculate `Content-Length` based on UTF-8 byte size rather than string length. - `agent/test/encoding/`: Add a comprehensive regression test suite (`encoding.bats`, `UnicodeTest.java`, `ReadFullyTest.java`) to verify encoding handling for both reading and writing operations under non-UTF-8 environment settings.
1 parent 78d2523 commit 1f70837

File tree

8 files changed

+172
-14
lines changed

8 files changed

+172
-14
lines changed

agent/src/main/java/com/appland/appmap/process/hooks/remoterecording/ServletRequest.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.io.IOException;
44
import java.io.PrintWriter;
5+
import java.nio.charset.StandardCharsets;
56

67
import com.appland.appmap.record.Recording;
78
import com.appland.appmap.reflect.HttpServletRequest;
@@ -29,8 +30,8 @@ public void setStatus(int status) {
2930
}
3031

3132
public void writeJson(String responseJson) throws IOException {
32-
res.setContentType("application/json");
33-
res.setContentLength(responseJson.length());
33+
res.setContentType("application/json; charset=UTF-8");
34+
res.setContentLength(responseJson.getBytes(StandardCharsets.UTF_8).length);
3435
res.setStatus(HttpServletResponse.SC_OK);
3536

3637
PrintWriter writer = res.getWriter();
@@ -39,7 +40,7 @@ public void writeJson(String responseJson) throws IOException {
3940
}
4041

4142
public void writeRecording(Recording recording) throws IOException {
42-
res.setContentType("application/json");
43+
res.setContentType("application/json; charset=UTF-8");
4344
res.setContentLength(recording.size());
4445
recording.readFully(true, res.getWriter());
4546
}

agent/src/main/java/com/appland/appmap/record/Recording.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55

66
import java.io.File;
77
import java.io.FileInputStream;
8-
import java.io.FileReader;
98
import java.io.IOException;
109
import java.io.InputStream;
10+
import java.io.InputStreamReader;
1111
import java.io.Reader;
1212
import java.io.Writer;
13+
import java.nio.charset.StandardCharsets;
1314
import java.nio.file.Files;
1415
import java.nio.file.Path;
1516
import java.nio.file.Paths;
@@ -100,7 +101,7 @@ public Path moveTo(Path targetPath) {
100101
}
101102

102103
public void readFully(boolean delete, Writer writer) throws IOException {
103-
try (final Reader reader = new FileReader(this.file)) {
104+
try (final Reader reader = new InputStreamReader(new FileInputStream(this.file), StandardCharsets.UTF_8)) {
104105
char[] buffer = new char[2048];
105106
int bytesRead;
106107
while ((bytesRead = reader.read(buffer)) != -1) {

agent/src/main/java/com/appland/appmap/record/RecordingSession.java

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
package com.appland.appmap.record;
22

3-
import java.io.File;
4-
import java.io.FileWriter;
5-
import java.io.IOException;
6-
import java.io.OutputStream;
7-
import java.io.OutputStreamWriter;
8-
import java.io.RandomAccessFile;
9-
import java.io.Writer;
3+
import java.io.*;
4+
import java.nio.charset.StandardCharsets;
105
import java.nio.file.Files;
116
import java.nio.file.Path;
127
import java.nio.file.StandardCopyOption;
@@ -98,7 +93,7 @@ public synchronized Recording checkpoint() {
9893
public void write(int b) throws IOException {
9994
raf.write(b);
10095
}
101-
});
96+
}, StandardCharsets.UTF_8);
10297
raf.seek(targetPath.toFile().length());
10398

10499
if ( eventReceived ) {
@@ -162,7 +157,7 @@ void start() {
162157
try {
163158
this.tmpPath = Files.createTempFile(null, ".appmap.json");
164159
this.tmpPath.toFile().deleteOnExit();
165-
this.serializer = AppMapSerializer.open(new FileWriter(this.tmpPath.toFile()));
160+
this.serializer = AppMapSerializer.open(new OutputStreamWriter(new FileOutputStream(this.tmpPath.toFile()), StandardCharsets.UTF_8));
166161
} catch (IOException e) {
167162
this.tmpPath = null;
168163
this.serializer = null;
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package test.pkg;
2+
3+
import com.appland.appmap.config.AppMapConfig;
4+
import com.appland.appmap.record.Recording;
5+
6+
import java.io.File;
7+
import java.io.FileOutputStream;
8+
import java.io.IOException;
9+
import java.io.OutputStreamWriter;
10+
import java.io.Writer;
11+
import java.nio.charset.StandardCharsets;
12+
import java.nio.file.FileSystems;
13+
14+
public class ReadFullyTest {
15+
public static void main(String[] args) {
16+
try {
17+
runTest();
18+
} catch (Exception e) {
19+
e.printStackTrace();
20+
System.exit(1);
21+
}
22+
}
23+
24+
public static void runTest() throws IOException {
25+
// Initialize AppMapConfig
26+
AppMapConfig.initialize(FileSystems.getDefault());
27+
28+
// 1. Create a dummy AppMap file with known UTF-8 content
29+
String content = "Check: \u26A0\uFE0F \u041F\u0440\u0438\u0432\u0435\u0442";
30+
File tempFile = File.createTempFile("readfully", ".appmap.json");
31+
tempFile.deleteOnExit();
32+
33+
try (Writer fw = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8)) {
34+
fw.write(content);
35+
}
36+
37+
// 2. Create a Recording object pointing to it
38+
Recording recording = new Recording("test", tempFile);
39+
40+
// 3. Call readFully and write to stdout using UTF-8
41+
// This validates that readFully correctly reads the UTF-8 file bytes into characters
42+
// regardless of the system's default encoding (which we will set to something else in BATS).
43+
Writer stdoutWriter = new OutputStreamWriter(System.out, StandardCharsets.UTF_8);
44+
recording.readFully(false, stdoutWriter);
45+
stdoutWriter.flush();
46+
}
47+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package test.pkg;
2+
3+
import java.nio.file.Files;
4+
import java.nio.file.Paths;
5+
import java.io.IOException;
6+
import java.io.UnsupportedEncodingException;
7+
8+
public class UnicodeTest {
9+
public static String echo(String input) {
10+
return input;
11+
}
12+
13+
public static byte[] echoBytes(byte[] input) {
14+
return input.clone();
15+
}
16+
17+
public static void main(String[] args) {
18+
try {
19+
runTest();
20+
} catch (IOException e) {
21+
e.printStackTrace();
22+
// exit 1
23+
System.exit(1);
24+
}
25+
}
26+
27+
public static void runTest() throws IOException {
28+
byte[] allBytes = Files.readAllBytes(Paths.get("encoding_test.cp1252"));
29+
30+
String allString = new String(allBytes, "Cp1252");
31+
String echoedString = echo(allString);
32+
33+
// print out the echoed string
34+
System.out.println(echoedString);
35+
36+
byte[] echoedBytes = echoBytes(allBytes);
37+
// print out the echoed bytes as hex
38+
for (byte b : echoedBytes) {
39+
System.out.printf("%02X ", b);
40+
}
41+
}
42+
}

agent/test/encoding/appmap.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
name: encoding
2+
packages:
3+
- path: test.pkg

agent/test/encoding/encoding.bats

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env bats
2+
# shellcheck disable=SC2164
3+
4+
load '../helper'
5+
6+
sep="$JAVA_PATH_SEPARATOR"
7+
AGENT_JAR="$(find_agent_jar)"
8+
java_cmd="java -cp ${BATS_TEST_DIRNAME}/build -javaagent:'${AGENT_JAR}'"
9+
10+
setup() {
11+
cd "${BATS_TEST_DIRNAME}"
12+
13+
mkdir -p build
14+
# Compile tests. Output to test/encoding so package structure 'pkg' works.
15+
# We need to compile both UnicodeTest.java and pkg/Target.java.
16+
javac -d ./build UnicodeTest.java
17+
18+
# Compile ReadFullyTest, requiring the agent jar on the classpath
19+
javac -cp "${AGENT_JAR}" -d ./build ReadFullyTest.java
20+
21+
rm -rf "${BATS_TEST_DIRNAME}/tmp/appmap"
22+
_configure_logging
23+
}
24+
25+
@test "AppMap file encoding with Windows-1252" {
26+
# Run with windows-1252 encoding.
27+
# We assert that the generated file is valid UTF-8 and contains the correct characters,
28+
# even though the JVM default encoding is Windows-1252.
29+
local cmd="${java_cmd} -Dfile.encoding=windows-1252 -Dappmap.recording.auto=true test.pkg.UnicodeTest"
30+
[[ $BATS_VERBOSE_RUN == 1 ]] && echo "cmd: $cmd" >&3
31+
32+
eval "$cmd"
33+
34+
# Verify the output file exists — it should be the only AppMap file generated, with random name
35+
# so glob for tmp/appmap/java/*.appmap.json
36+
appmap_file=$(ls tmp/appmap/java/*.appmap.json)
37+
[ -f "$appmap_file" ]
38+
39+
# Verify it is valid JSON
40+
jq . "$appmap_file" > /dev/null
41+
42+
# Verify it is valid UTF-8
43+
iconv -f UTF-8 -t UTF-8 "$appmap_file" > /dev/null
44+
45+
# Verify it contains the expected Unicode characters
46+
grep -q "Euro: €, Accent: é, Quote: „" "$appmap_file"
47+
}
48+
49+
@test "Recording.readFully works with Windows-1252 default encoding" {
50+
# Run ReadFullyTest with windows-1252 default encoding.
51+
# We also need to add the agent jar to the classpath so it can find the Recording class.
52+
local cmd="java -cp ${BATS_TEST_DIRNAME}/build${sep}${AGENT_JAR} -Dfile.encoding=windows-1252 test.pkg.ReadFullyTest"
53+
[[ $BATS_VERBOSE_RUN == 1 ]] && echo "cmd: $cmd" >&3
54+
55+
run eval "$cmd"
56+
57+
[ "$status" -eq 0 ]
58+
[[ "$output" == *"Check: ⚠️ Привет"* ]]
59+
}
60+
61+
teardown() {
62+
rm -rf tmp
63+
rm -rf build
64+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Euro: �, Accent: �, Quote: �
2+
---SEPARATOR---
3+
:��7��ǞI�
4+
---BINARY---
5+
ޭ��

0 commit comments

Comments
 (0)