Skip to content

Commit 05100ea

Browse files
committed
Refactor parseString
regex is fragile, so use per-character parsing from spec
1 parent 7e044d8 commit 05100ea

File tree

2 files changed

+67
-17
lines changed

2 files changed

+67
-17
lines changed

src/Parser.php

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -247,29 +247,36 @@ private static function parseNumber(string &$string)
247247

248248
private static function parseString(string &$string): string
249249
{
250-
if (preg_match('/^"([\x00-\x7F]*)"/i', $string, $matches)) {
251-
$string = substr($string, strlen($matches[1]) + 2);
250+
// parseString is only called if first character is a double quote, so
251+
// don't need to validate it here.
252+
$string = substr($string, 1);
252253

253-
// Newlines and Tabs are not allowed; string cannot end in escape character.
254-
if (preg_match('/(?<!\\\)\\\([nt]|$)/', $matches[1])) {
255-
throw new ParseException('Invalid whitespace in string');
256-
}
257-
// Only quotes and backslashes should be escaped.
258-
if (preg_match_all('/(?<!\\\)\\\./', $matches[1], $quoted_matches, PREG_PATTERN_ORDER)) {
259-
foreach ($quoted_matches[0] as $quoted_match) {
260-
if (!in_array($quoted_match, ['\\"', '\\\\'])) {
261-
throw new ParseException('Invalid escaped character in string');
262-
}
254+
$output_string = '';
255+
256+
while (strlen($string)) {
257+
$char = $string[0];
258+
$string = substr($string, 1);
259+
260+
if ($char == '\\') {
261+
if ($string == '') {
262+
throw new ParseException("Invalid end of string");
263263
}
264+
265+
$char = $string[0];
266+
$string = substr($string, 1);
267+
if ($char != '"' && $char != '\\') {
268+
throw new ParseException('Invalid escaped character in string');
269+
}
270+
} elseif ($char == '"') {
271+
return $output_string;
272+
} elseif (ord($char) <= 0x1f || ord($char) >= 0x7f) {
273+
throw new ParseException('Invalid character in string');
264274
}
265275

266-
// Unescape quotes and backslashes.
267-
$output_string = preg_replace('/\\\(["\\\])/', '$1', $matches[1]);
268-
} else {
269-
throw new ParseException('Invalid character in string');
276+
$output_string .= $char;
270277
}
271278

272-
return $output_string;
279+
throw new ParseException("Invalid end of string");
273280
}
274281

275282
private static function parseToken(string &$string): Token

tests/ParseListTest.php

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<?php
2+
3+
namespace gapple\Tests\StructuredFields;
4+
5+
use gapple\StructuredFields\Parser;
6+
use PHPUnit\Framework\TestCase;
7+
8+
class ParseListTest extends TestCase
9+
{
10+
11+
public function multipleStringProvider()
12+
{
13+
$dataset = [];
14+
15+
$dataset[] = [
16+
'raw' => '"one", 1, 42;towel;panic=?0, "two"',
17+
'expected' => [
18+
['one', (object) []],
19+
[1, (object) []],
20+
[42, (object) ['towel' => true, 'panic' => false]],
21+
['two', (object) []],
22+
]
23+
];
24+
25+
$dataset[] = [
26+
'raw' => '"\"Not\\\A;Brand";v="99", "Chromium";v="86"',
27+
'expected' => [
28+
['"Not\\A;Brand', (object) ['v' => "99"]],
29+
['Chromium', (object) ['v' => "86"]],
30+
],
31+
];
32+
33+
return $dataset;
34+
}
35+
36+
/**
37+
* @dataProvider multipleStringProvider
38+
*/
39+
public function testListWithMultipleStrings($raw, $expected)
40+
{
41+
$this->assertEquals($expected, Parser::parseList($raw));
42+
}
43+
}

0 commit comments

Comments
 (0)