Skip to content

Commit 10118a2

Browse files
committed
test: add tests for truncated utf8 sequences
If a source gives us an invalid utf8 sequence, ensure that it is replaced by a Unicode replacement character as expected. One of the semver-major changes in node 8 was to change string_decoder behaviour to better align with the implementation in v8 itself. Now that we have tests for the handling of incomplete utf8 sequences, this actually affects us. To account for this, use a simple version check to determine the expected string output of our bad-utf8 tests. For background see nodejs/node@24ef1e6775 Signed-off-by: Ryan Graham <[email protected]>
1 parent f3ae232 commit 10118a2

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

test/fixtures/basic.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
one
22
two
33
three
4+
45
four

test/test-bad-utf8.js

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
var tap = require('tap');
2+
var Log = require('../');
3+
4+
tap.test('truncated utf8', function(t) {
5+
var slt = Log();
6+
var input = [
7+
Buffer.from('good line\n'),
8+
Buffer.from('good line\n'),
9+
Buffer.from('good line\n'),
10+
Buffer.from('good line\n'),
11+
Buffer.from([
12+
// an incomplete utf8 sequence (3/4 bytes)
13+
0xf0, // byte 1 of 4 marker
14+
0xbf, // byte 2 of 4 marker
15+
0xbf, // byte 3 of 4 marker
16+
]),
17+
];
18+
var expected = Buffer.concat([
19+
Buffer.from('good line\n'),
20+
Buffer.from('good line\n'),
21+
Buffer.from('good line\n'),
22+
Buffer.from('good line\n'),
23+
Buffer.from([
24+
0xef, 0xbf, 0xbd, // single replacement character
25+
0x0a, // trailing newline adde by strong-log-transformer
26+
]),
27+
]);
28+
var received = '';
29+
30+
if (/^v(4|6)\./.test(process.version)) {
31+
expected = Buffer.concat([
32+
Buffer.from('good line\n'),
33+
Buffer.from('good line\n'),
34+
Buffer.from('good line\n'),
35+
Buffer.from('good line\n'),
36+
Buffer.from([
37+
// prior to node 8 each byte of an invalid utf8 sequence would be
38+
// replaced by a UTF replacement character. For more details, see
39+
// https://github.com/nodejs/node/commit/24ef1e6775
40+
0xef, 0xbf, 0xbd, // replacement character
41+
0xef, 0xbf, 0xbd, // replacement character
42+
0xef, 0xbf, 0xbd, // replacement character
43+
0x0a, // trailing newline adde by strong-log-transformer
44+
]),
45+
]);
46+
}
47+
slt.on('data', function(buf) {
48+
t.comment(buf);
49+
if (Buffer.isBuffer(buf)) {
50+
received += buf.toString('utf8');
51+
} else if (buf !== null) {
52+
received += buf;
53+
}
54+
});
55+
slt.on('end', function() {
56+
var expectedStr = expected.toString('utf8');
57+
t.same(received, expectedStr, 'output is input + trailing newline');
58+
t.end();
59+
});
60+
slt.write(Buffer.concat(input));
61+
slt.end();
62+
});

0 commit comments

Comments
 (0)