Skip to content

Commit 8a19bc2

Browse files
committed
fix: Enforce UTF-8 for AppMap I/O and add encoding regression tests
This commit ensures consistent UTF-8 character encoding for AppMap data across file system operations and HTTP responses, preventing corruption on systems where the default charset is not UTF-8 (e.g., Windows-1252). Key changes: - `RecordingSession.java`: Enforce `StandardCharsets.UTF_8` when creating writers for AppMap files, replacing reliance on default `FileWriter`. - `Recording.java`: Explicitly use `StandardCharsets.UTF_8` in `readFully` to correctly decode AppMap content during retrieval. - `ServletRequest.java`: Set `Content-Type: application/json; charset=UTF-8` for remote recording responses and calculate `Content-Length` based on UTF-8 byte size rather than string length. - `agent/test/encoding/`: Add a comprehensive regression test suite (`encoding.bats`, `UnicodeTest.java`, `ReadFullyTest.java`) to verify encoding handling for both reading and writing operations under non-UTF-8 environment settings.
1 parent db459a4 commit 8a19bc2

File tree

8 files changed

+170
-8
lines changed

8 files changed

+170
-8
lines changed

agent/src/main/java/com/appland/appmap/process/hooks/remoterecording/ServletRequest.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import java.io.IOException;
44
import java.io.PrintWriter;
5+
import java.nio.charset.StandardCharsets;
56

67
import com.appland.appmap.record.Recording;
78
import com.appland.appmap.reflect.HttpServletRequest;
@@ -29,8 +30,8 @@ public void setStatus(int status) {
2930
}
3031

3132
public void writeJson(String responseJson) throws IOException {
32-
res.setContentType("application/json");
33-
res.setContentLength(responseJson.length());
33+
res.setContentType("application/json; charset=UTF-8");
34+
res.setContentLength(responseJson.getBytes(StandardCharsets.UTF_8).length);
3435
res.setStatus(HttpServletResponse.SC_OK);
3536

3637
PrintWriter writer = res.getWriter();
@@ -39,7 +40,7 @@ public void writeJson(String responseJson) throws IOException {
3940
}
4041

4142
public void writeRecording(Recording recording) throws IOException {
42-
res.setContentType("application/json");
43+
res.setContentType("application/json; charset=UTF-8");
4344
res.setContentLength(recording.size());
4445
recording.readFully(true, res.getWriter());
4546
}

agent/src/main/java/com/appland/appmap/record/Recording.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55

66
import java.io.File;
77
import java.io.FileInputStream;
8-
import java.io.FileReader;
98
import java.io.IOException;
109
import java.io.InputStream;
10+
import java.io.InputStreamReader;
1111
import java.io.Reader;
1212
import java.io.Writer;
13+
import java.nio.charset.StandardCharsets;
1314
import java.nio.file.Files;
1415
import java.nio.file.Path;
1516
import java.nio.file.Paths;
@@ -100,7 +101,7 @@ public Path moveTo(Path targetPath) {
100101
}
101102

102103
public void readFully(boolean delete, Writer writer) throws IOException {
103-
try (final Reader reader = new FileReader(this.file)) {
104+
try (final Reader reader = new InputStreamReader(new FileInputStream(this.file), StandardCharsets.UTF_8)) {
104105
char[] buffer = new char[2048];
105106
int bytesRead;
106107
while ((bytesRead = reader.read(buffer)) != -1) {

agent/src/main/java/com/appland/appmap/record/RecordingSession.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
package com.appland.appmap.record;
22

33
import java.io.File;
4-
import java.io.FileWriter;
4+
import java.io.FileOutputStream;
55
import java.io.IOException;
66
import java.io.OutputStream;
77
import java.io.OutputStreamWriter;
88
import java.io.RandomAccessFile;
99
import java.io.Writer;
10+
import java.nio.charset.StandardCharsets;
1011
import java.nio.file.Files;
1112
import java.nio.file.Path;
1213
import java.nio.file.StandardCopyOption;
@@ -98,7 +99,7 @@ public synchronized Recording checkpoint() {
9899
public void write(int b) throws IOException {
99100
raf.write(b);
100101
}
101-
});
102+
}, StandardCharsets.UTF_8);
102103
raf.seek(targetPath.toFile().length());
103104

104105
if ( eventReceived ) {
@@ -162,7 +163,7 @@ void start() {
162163
try {
163164
this.tmpPath = Files.createTempFile(null, ".appmap.json");
164165
this.tmpPath.toFile().deleteOnExit();
165-
this.serializer = AppMapSerializer.open(new FileWriter(this.tmpPath.toFile()));
166+
this.serializer = AppMapSerializer.open(new OutputStreamWriter(new FileOutputStream(this.tmpPath.toFile()), StandardCharsets.UTF_8));
166167
} catch (IOException e) {
167168
this.tmpPath = null;
168169
this.serializer = null;
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package test.pkg;
2+
3+
import com.appland.appmap.config.AppMapConfig;
4+
import com.appland.appmap.record.Recording;
5+
6+
import java.io.File;
7+
import java.io.FileOutputStream;
8+
import java.io.IOException;
9+
import java.io.OutputStreamWriter;
10+
import java.io.Writer;
11+
import java.nio.charset.StandardCharsets;
12+
import java.nio.file.FileSystems;
13+
14+
public class ReadFullyTest {
15+
public static void main(String[] args) {
16+
try {
17+
runTest();
18+
} catch (Exception e) {
19+
e.printStackTrace();
20+
System.exit(1);
21+
}
22+
}
23+
24+
public static void runTest() throws IOException {
25+
// Initialize AppMapConfig
26+
AppMapConfig.initialize(FileSystems.getDefault());
27+
28+
// 1. Create a dummy AppMap file with known UTF-8 content
29+
String content = "Check: \u26A0\uFE0F \u041F\u0440\u0438\u0432\u0435\u0442";
30+
File tempFile = File.createTempFile("readfully", ".appmap.json");
31+
tempFile.deleteOnExit();
32+
33+
try (Writer fw = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8)) {
34+
fw.write(content);
35+
}
36+
37+
// 2. Create a Recording object pointing to it
38+
Recording recording = new Recording("test", tempFile);
39+
40+
// 3. Call readFully and write to stdout using UTF-8
41+
// This validates that readFully correctly reads the UTF-8 file bytes into characters
42+
// regardless of the system's default encoding (which we will set to something else in BATS).
43+
Writer stdoutWriter = new OutputStreamWriter(System.out, StandardCharsets.UTF_8);
44+
recording.readFully(false, stdoutWriter);
45+
stdoutWriter.flush();
46+
}
47+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package test.pkg;
2+
3+
import java.nio.file.Files;
4+
import java.nio.file.Paths;
5+
import java.io.IOException;
6+
7+
public class UnicodeTest {
8+
public static String echo(String input) {
9+
return input;
10+
}
11+
12+
public static byte[] echoBytes(byte[] input) {
13+
return input.clone();
14+
}
15+
16+
public static void main(String[] args) {
17+
try {
18+
runTest();
19+
} catch (IOException e) {
20+
e.printStackTrace();
21+
// exit 1
22+
System.exit(1);
23+
}
24+
}
25+
26+
public static void runTest() throws IOException {
27+
byte[] allBytes = Files.readAllBytes(Paths.get("encoding_test.cp1252"));
28+
29+
String allString = new String(allBytes, "Cp1252");
30+
String echoedString = echo(allString);
31+
32+
// print out the echoed string
33+
System.out.println(echoedString);
34+
35+
byte[] echoedBytes = echoBytes(allBytes);
36+
// print out the echoed bytes as hex
37+
for (byte b : echoedBytes) {
38+
System.out.printf("%02X ", b);
39+
}
40+
}
41+
}

agent/test/encoding/appmap.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
name: encoding
2+
packages:
3+
- path: test.pkg

agent/test/encoding/encoding.bats

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env bats
2+
# shellcheck disable=SC2164
3+
4+
load '../helper'
5+
6+
sep="$JAVA_PATH_SEPARATOR"
7+
AGENT_JAR="$(find_agent_jar)"
8+
java_cmd="java -cp ${BATS_TEST_DIRNAME}/build -javaagent:'${AGENT_JAR}'"
9+
10+
setup() {
11+
cd "${BATS_TEST_DIRNAME}"
12+
13+
mkdir -p build
14+
# Compile tests. Output to build so package structure 'test/pkg' works.
15+
javac -d ./build UnicodeTest.java
16+
17+
# Require the agent jar on the classpath to find the Recording class.
18+
javac -cp "${AGENT_JAR}" -d ./build ReadFullyTest.java
19+
20+
rm -rf "${BATS_TEST_DIRNAME}/tmp/appmap"
21+
_configure_logging
22+
}
23+
24+
@test "AppMap file encoding with Windows-1252" {
25+
# Run with windows-1252 encoding.
26+
# We assert that the generated file is valid UTF-8 and contains the correct characters,
27+
# even though the JVM default encoding is Windows-1252.
28+
local cmd="${java_cmd} -Dfile.encoding=windows-1252 -Dappmap.recording.auto=true test.pkg.UnicodeTest"
29+
[[ $BATS_VERBOSE_RUN == 1 ]] && echo "cmd: $cmd" >&3
30+
31+
eval "$cmd"
32+
33+
# Verify the output file exists — it should be the only AppMap file generated, with random name
34+
# so glob for tmp/appmap/java/*.appmap.json
35+
appmap_file=$(ls tmp/appmap/java/*.appmap.json)
36+
[ -f "$appmap_file" ]
37+
38+
# Verify it is valid JSON
39+
jq . "$appmap_file" > /dev/null
40+
41+
# Verify it is valid UTF-8
42+
iconv -f UTF-8 -t UTF-8 "$appmap_file" > /dev/null
43+
44+
# Verify it contains the expected Unicode characters
45+
grep -q "Euro: €, Accent: é, Quote: „" "$appmap_file"
46+
}
47+
48+
@test "Recording.readFully works with Windows-1252 default encoding" {
49+
# Run ReadFullyTest with windows-1252 default encoding.
50+
# We also need to add the agent jar to the classpath so it can find the Recording class.
51+
local cmd="java -cp ${BATS_TEST_DIRNAME}/build${sep}${AGENT_JAR} -Dfile.encoding=windows-1252 test.pkg.ReadFullyTest"
52+
[[ $BATS_VERBOSE_RUN == 1 ]] && echo "cmd: $cmd" >&3
53+
54+
run eval "$cmd"
55+
56+
[ "$status" -eq 0 ]
57+
[[ "$output" == *"Check: ⚠️ Привет"* ]]
58+
}
59+
60+
teardown() {
61+
rm -rf tmp
62+
rm -rf build
63+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Euro: �, Accent: �, Quote: �
2+
---SEPARATOR---
3+
:��7��ǞI�
4+
---BINARY---
5+
ޭ��

0 commit comments

Comments
 (0)