Skip to content

Commit cc96bf5

Browse files
committed
Encoder: invalid UTF-8 sequences are replaced with U+FFFD [Closes #76]
1 parent 5163bfd commit cc96bf5

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

src/Neon/Encoder.php

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,14 @@ public function valueToNode(mixed $val, bool $blockMode = false): Node
6565
$node->items = $this->arrayToNodes($val, $blockMode);
6666
return $node;
6767

68-
} elseif (is_string($val) && Lexer::requiresDelimiters($val)) {
69-
return new Node\StringNode($val, $this->indentation);
68+
} elseif (is_string($val)) {
69+
if (preg_match('//u', $val) === false) {
70+
trigger_error('Invalid UTF-8 sequence in string, replaced with U+FFFD', E_USER_WARNING);
71+
$val = json_decode(json_encode($val, JSON_INVALID_UTF8_SUBSTITUTE));
72+
}
73+
return Lexer::requiresDelimiters($val)
74+
? new Node\StringNode($val, $this->indentation)
75+
: new Node\LiteralNode($val);
7076

7177
} else {
7278
return new Node\LiteralNode($val);

tests/Neon/Encoder.phpt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,10 @@ Assert::same(
185185
"inner:\n msg: '''\n string\n with newline\n '''\n\n",
186186
Neon::encode(['inner' => ['msg' => "string\nwith newline"]], true, ' '),
187187
);
188+
189+
190+
// Invalid UTF-8 is replaced with U+FFFD and triggers warning
191+
Assert::error(function () {
192+
$result = Neon::encode("\x80");
193+
Assert::same("\u{FFFD}", $result);
194+
}, E_USER_WARNING, 'Invalid UTF-8 sequence in string, replaced with U+FFFD');

0 commit comments

Comments
 (0)