Skip to content

Commit 19941d7

Browse files
committed
Dumper: improved encoding of strings
1 parent fc97959 commit 19941d7

File tree

2 files changed

+41
-18
lines changed

2 files changed

+41
-18
lines changed

src/PhpGenerator/Dumper.php

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -64,25 +64,44 @@ private function dumpVar(&$var, array $parents = [], int $level = 0, int $column
6464
}
6565

6666

67-
private function dumpString(string $var): string
67+
private function dumpString(string $s): string
6868
{
69-
if (preg_match('#[^\x09\x20-\x7E\xA0-\x{10FFFF}]#u', $var) || preg_last_error()) {
70-
static $table;
71-
if ($table === null) {
72-
foreach (array_merge(range("\x00", "\x1F"), range("\x7F", "\xFF")) as $ch) {
73-
$table[$ch] = '\x' . str_pad(dechex(ord($ch)), 2, '0', STR_PAD_LEFT);
74-
}
75-
$table['\\'] = '\\\\';
76-
$table["\r"] = '\r';
77-
$table["\n"] = '\n';
78-
$table["\t"] = '\t';
79-
$table['$'] = '\$';
80-
$table['"'] = '\"';
81-
}
82-
return '"' . strtr($var, $table) . '"';
83-
}
69+
static $special = [
70+
"\r" => '\r',
71+
"\n" => '\n',
72+
"\t" => '\t',
73+
"\e" => '\e',
74+
'\\' => '\\\\',
75+
];
76+
77+
$utf8 = preg_match('##u', $s);
78+
$escaped = preg_replace_callback(
79+
$utf8 ? '#[\p{C}\\\\]#u' : '#[\x00-\x1F\x7F-\xFF\\\\]#',
80+
function ($m) use ($special) {
81+
return $special[$m[0]] ?? (strlen($m[0]) === 1
82+
? '\x' . str_pad(strtoupper(dechex(ord($m[0]))), 2, '0', STR_PAD_LEFT) . ''
83+
: '\u{' . strtoupper(ltrim(dechex(self::utf8Ord($m[0])), '0')) . '}');
84+
},
85+
$s
86+
);
87+
return $s === str_replace('\\\\', '\\', $escaped)
88+
? "'" . preg_replace('#\'|\\\\(?=[\'\\\\]|$)#D', '\\\\$0', $s) . "'"
89+
: '"' . addcslashes($escaped, '"$') . '"';
90+
}
8491

85-
return "'" . preg_replace('#\'|\\\\(?=[\'\\\\]|$)#D', '\\\\$0', $var) . "'";
92+
93+
private static function utf8Ord(string $c): int
94+
{
95+
$ord0 = ord($c[0]);
96+
if ($ord0 < 0x80) {
97+
return $ord0;
98+
} elseif ($ord0 < 0xE0) {
99+
return ($ord0 << 6) + ord($c[1]) - 0x3080;
100+
} elseif ($ord0 < 0xF0) {
101+
return ($ord0 << 12) + (ord($c[1]) << 6) + ord($c[2]) - 0xE2080;
102+
} else {
103+
return ($ord0 << 18) + (ord($c[1]) << 12) + (ord($c[2]) << 6) + ord($c[3]) - 0x3C82080;
104+
}
86105
}
87106

88107

tests/PhpGenerator/Dumper.dump().phpt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ Assert::same('false', $dumper->dump(false));
3232

3333
Assert::same("''", $dumper->dump(''));
3434
Assert::same("'Hello'", $dumper->dump('Hello'));
35-
Assert::same('"\t\n\t"', $dumper->dump("\t\n\t"));
35+
Assert::same('"\t\n\r\e"', $dumper->dump("\t\n\r\e"));
36+
Assert::same('"\u{FEFF}"', $dumper->dump("\xEF\xBB\xBF")); // BOM
37+
Assert::same('\'$"\\\\\'', $dumper->dump('$"\\'));
38+
Assert::same('\'$"\\ \x00\'', $dumper->dump('$"\\ \x00')); // no escape
39+
Assert::same('"\\$\\"\\\\ \x00"', $dumper->dump("$\"\\ \x00"));
3640
Assert::same(
3741
"'I\u{F1}t\u{EB}rn\u{E2}ti\u{F4}n\u{E0}liz\u{E6}ti\u{F8}n'",
3842
$dumper->dump("I\u{F1}t\u{EB}rn\u{E2}ti\u{F4}n\u{E0}liz\u{E6}ti\u{F8}n") // Iñtërnâtiônàlizætiøn

0 commit comments

Comments
 (0)