Skip to content

Commit 4bd4376

Browse files
committed
Dumper: improved encoding of strings
1 parent b54326b commit 4bd4376

File tree

3 files changed

+63
-33
lines changed

3 files changed

+63
-33
lines changed

src/Framework/Dumper.php

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,6 @@ class Dumper
3333
*/
3434
public static function toLine($var): string
3535
{
36-
static $table;
37-
if ($table === null) {
38-
foreach (array_merge(range("\x00", "\x1F"), range("\x7F", "\xFF")) as $ch) {
39-
$table[$ch] = '\x' . str_pad(dechex(ord($ch)), 2, '0', STR_PAD_LEFT);
40-
}
41-
$table['\\'] = '\\\\';
42-
$table["\r"] = '\r';
43-
$table["\n"] = '\n';
44-
$table["\t"] = '\t';
45-
}
46-
4736
if (is_bool($var)) {
4837
return $var ? 'TRUE' : 'FALSE';
4938

@@ -62,9 +51,7 @@ public static function toLine($var): string
6251
} elseif (strlen($var) > self::$maxLength) {
6352
$var = substr($var, 0, self::$maxLength) . '...';
6453
}
65-
return preg_match('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{10FFFF}]#u', $var) || preg_last_error()
66-
? '"' . strtr($var, $table) . '"'
67-
: "'$var'";
54+
return self::encodeString($var, false);
6855

6956
} elseif (is_array($var)) {
7057
$out = '';
@@ -146,20 +133,10 @@ private static function _toPhp(&$var, array &$list = [], int $level = 0, int &$l
146133
} elseif ($var === null) {
147134
return 'null';
148135

149-
} elseif (is_string($var) && (preg_match('#[^\x09\x20-\x7E\xA0-\x{10FFFF}]#u', $var) || preg_last_error())) {
150-
static $table;
151-
if ($table === null) {
152-
foreach (array_merge(range("\x00", "\x1F"), range("\x7F", "\xFF")) as $ch) {
153-
$table[$ch] = '\x' . str_pad(dechex(ord($ch)), 2, '0', STR_PAD_LEFT);
154-
}
155-
$table['\\'] = '\\\\';
156-
$table["\r"] = '\r';
157-
$table["\n"] = '\n';
158-
$table["\t"] = '\t';
159-
$table['$'] = '\$';
160-
$table['"'] = '\"';
161-
}
162-
return '"' . strtr($var, $table) . '"';
136+
} elseif (is_string($var)) {
137+
$res = self::encodeString($var, true);
138+
$line += substr_count($res, "\n");
139+
return $res;
163140

164141
} elseif (is_array($var)) {
165142
$space = str_repeat("\t", $level);
@@ -242,9 +219,56 @@ private static function _toPhp(&$var, array &$list = [], int $level = 0, int &$l
242219
return '/* resource ' . get_resource_type($var) . ' */';
243220

244221
} else {
245-
$res = var_export($var, true);
246-
$line += substr_count($res, "\n");
247-
return $res;
222+
return var_export($var, true);
223+
}
224+
}
225+
226+
227+
private static function encodeString(string $s, bool $asPhp): string
228+
{
229+
static $specials = [
230+
true => [
231+
"\r" => '\r',
232+
"\n" => '\n',
233+
"\t" => "\t",
234+
"\e" => '\e',
235+
'\\' => '\\\\',
236+
],
237+
false => [
238+
"\r" => "\r",
239+
"\n" => "\n",
240+
"\t" => "\t",
241+
'\\' => '\\\\',
242+
],
243+
];
244+
$special = $specials[$asPhp];
245+
$utf8 = preg_match('##u', $s);
246+
$escaped = preg_replace_callback(
247+
$utf8 ? '#[\p{C}\\\\]#u' : '#[\x00-\x1F\x7F-\xFF\\\\]#',
248+
function ($m) use ($special) {
249+
return $special[$m[0]] ?? (strlen($m[0]) === 1
250+
? '\x' . str_pad(strtoupper(dechex(ord($m[0]))), 2, '0', STR_PAD_LEFT) . ''
251+
: '\u{' . strtoupper(ltrim(dechex(self::utf8Ord($m[0])), '0')) . '}');
252+
},
253+
$s
254+
);
255+
return $s === str_replace('\\\\', '\\', $escaped)
256+
? "'" . preg_replace('#\'|\\\\(?=[\'\\\\]|$)#D', '\\\\$0', $s) . "'"
257+
: '"' . addcslashes($escaped, '"$') . '"';
258+
}
259+
260+
261+
private static function utf8Ord(string $c): int
262+
{
263+
$ord0 = ord($c[0]);
264+
if ($ord0 < 0x80) {
265+
return $ord0;
266+
} elseif ($ord0 < 0xE0) {
267+
return ($ord0 << 6) + ord($c[1]) - 0x3080;
268+
} elseif ($ord0 < 0xF0) {
269+
return ($ord0 << 12) + (ord($c[1]) << 6) + ord($c[2]) - 0xE2080;
270+
} else {
271+
return ($ord0 << 18) + (ord($c[1]) << 12) + (ord($c[2]) << 6) + ord($c[3]) - 0x3C82080;
248272
}
249273
}
250274

tests/Framework/Dumper.toLine.phpt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ Assert::match("''", Dumper::toLine(''));
2222
Assert::match("' '", Dumper::toLine(' '));
2323
Assert::match("'0'", Dumper::toLine('0'));
2424
Assert::match('"\\x00"', Dumper::toLine("\x00"));
25+
Assert::match('"\u{FEFF}"', Dumper::toLine("\xEF\xBB\xBF")); // BOM
2526
Assert::match("' '", Dumper::toLine("\t"));
26-
Assert::match('"\\xff"', Dumper::toLine("\xFF"));
27+
Assert::match('"\\xFF"', Dumper::toLine("\xFF"));
2728
Assert::match("'multi\nline'", Dumper::toLine("multi\nline"));
2829
Assert::match("'Iñtërnâtiônàlizætiøn'", Dumper::toLine("I\xc3\xb1t\xc3\xabrn\xc3\xa2ti\xc3\xb4n\xc3\xa0liz\xc3\xa6ti\xc3\xb8n"));
2930
Assert::match('resource(stream)', Dumper::toLine(fopen(__FILE__, 'r')));

tests/Framework/Dumper.toPhp.phpt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,9 @@ Assert::match("''", Dumper::toPhp(''));
2929
Assert::match("' '", Dumper::toPhp(' '));
3030
Assert::match("'0'", Dumper::toPhp('0'));
3131
Assert::match('"\\x00"', Dumper::toPhp("\x00"));
32+
Assert::match('"\u{FEFF}"', Dumper::toPhp("\xEF\xBB\xBF")); // BOM
3233
Assert::match("' '", Dumper::toPhp("\t"));
33-
Assert::match('"\\xff"', Dumper::toPhp("\xFF"));
34+
Assert::match('"\\xFF"', Dumper::toPhp("\xFF"));
3435
Assert::match('"multi\nline"', Dumper::toPhp("multi\nline"));
3536
Assert::match("'Iñtërnâtiônàlizætiøn'", Dumper::toPhp("I\xc3\xb1t\xc3\xabrn\xc3\xa2ti\xc3\xb4n\xc3\xa0liz\xc3\xa6ti\xc3\xb8n"));
3637
Assert::match('[
@@ -41,6 +42,10 @@ Assert::match('[
4142
[1 => 1, 2, 3, 4, 5, 6, 7, \'abcdefgh\'],
4243
]', Dumper::toPhp([1, 'hello', "\r" => [], [1, 2], [1 => 1, 2, 3, 4, 5, 6, 7, 'abcdefgh']]));
4344

45+
Assert::match('\'$"\\\\\'', Dumper::toPhp('$"\\'));
46+
Assert::match('\'$"\\ \x00\'', Dumper::toPhp('$"\\ \x00'));
47+
Assert::match('"\\$\\"\\\\ \x00"', Dumper::toPhp("$\"\\ \x00"));
48+
4449
Assert::match('/* resource stream */', Dumper::toPhp(fopen(__FILE__, 'r')));
4550
Assert::match('(object) /* #%a% */ []', Dumper::toPhp((object) null));
4651
Assert::match("(object) /* #%a% */ [

0 commit comments

Comments
 (0)