Skip to content

Commit 7772dd9

Browse files
Merge branch '2.8' into 3.2
* 2.8: [DI] Fix PhpDumper generated doc block #20411 fix Yaml parsing for very long quoted strings [Doctrine Bridge] fix priority for doctrine event listeners Use PHP functions as array_map callbacks when possible [Validator] revert wrong Phpdoc change Use proper line endings
2 parents 093e416 + 8cb74a2 commit 7772dd9

File tree

3 files changed

+81
-43
lines changed

3 files changed

+81
-43
lines changed

Inline.php

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,9 @@ public static function dump($value, $flags = 0)
210210
case Escaper::requiresDoubleQuoting($value):
211211
return Escaper::escapeWithDoubleQuotes($value);
212212
case Escaper::requiresSingleQuoting($value):
213-
case preg_match('{^[0-9]+[_0-9]*$}', $value):
214-
case preg_match(self::getHexRegex(), $value):
215-
case preg_match(self::getTimestampRegex(), $value):
213+
case Parser::preg_match('{^[0-9]+[_0-9]*$}', $value):
214+
case Parser::preg_match(self::getHexRegex(), $value):
215+
case Parser::preg_match(self::getTimestampRegex(), $value):
216216
return Escaper::escapeWithSingleQuotes($value);
217217
default:
218218
return $value;
@@ -306,10 +306,10 @@ public static function parseScalar($scalar, $flags = 0, $delimiters = null, $str
306306
$i += strlen($output);
307307

308308
// remove comments
309-
if (preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
309+
if (Parser::preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
310310
$output = substr($output, 0, $match[0][1]);
311311
}
312-
} elseif (preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
312+
} elseif (Parser::preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
313313
$output = $match[1];
314314
$i += strlen($output);
315315
} else {
@@ -345,7 +345,7 @@ public static function parseScalar($scalar, $flags = 0, $delimiters = null, $str
345345
*/
346346
private static function parseQuotedScalar($scalar, &$i)
347347
{
348-
if (!preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
348+
if (!Parser::preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
349349
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)));
350350
}
351351

@@ -614,7 +614,7 @@ private static function evaluateScalar($scalar, $flags, $references = array())
614614
return;
615615
case 0 === strpos($scalar, '!!float '):
616616
return (float) substr($scalar, 8);
617-
case preg_match('{^[+-]?[0-9][0-9_]*$}', $scalar):
617+
case Parser::preg_match('{^[+-]?[0-9][0-9_]*$}', $scalar):
618618
$scalar = str_replace('_', '', (string) $scalar);
619619
// omitting the break / return as integers are handled in the next case
620620
case ctype_digit($scalar):
@@ -628,7 +628,7 @@ private static function evaluateScalar($scalar, $flags, $references = array())
628628

629629
return '0' == $scalar[1] ? octdec($scalar) : (((string) $raw === (string) $cast) ? $cast : $raw);
630630
case is_numeric($scalar):
631-
case preg_match(self::getHexRegex(), $scalar):
631+
case Parser::preg_match(self::getHexRegex(), $scalar):
632632
$scalar = str_replace('_', '', $scalar);
633633

634634
return '0x' === $scalar[0].$scalar[1] ? hexdec($scalar) : (float) $scalar;
@@ -639,14 +639,14 @@ private static function evaluateScalar($scalar, $flags, $references = array())
639639
return log(0);
640640
case 0 === strpos($scalar, '!!binary '):
641641
return self::evaluateBinaryScalar(substr($scalar, 9));
642-
case preg_match('/^(-|\+)?[0-9][0-9,]*(\.[0-9_]+)?$/', $scalar):
643-
case preg_match('/^(-|\+)?[0-9][0-9_]*(\.[0-9_]+)?$/', $scalar):
642+
case Parser::preg_match('/^(-|\+)?[0-9][0-9,]*(\.[0-9_]+)?$/', $scalar):
643+
case Parser::preg_match('/^(-|\+)?[0-9][0-9_]*(\.[0-9_]+)?$/', $scalar):
644644
if (false !== strpos($scalar, ',')) {
645645
@trigger_error('Using the comma as a group separator for floats is deprecated since version 3.2 and will be removed in 4.0.', E_USER_DEPRECATED);
646646
}
647647

648648
return (float) str_replace(array(',', '_'), '', $scalar);
649-
case preg_match(self::getTimestampRegex(), $scalar):
649+
case Parser::preg_match(self::getTimestampRegex(), $scalar):
650650
if (Yaml::PARSE_DATETIME & $flags) {
651651
// When no timezone is provided in the parsed date, YAML spec says we must assume UTC.
652652
return new \DateTime($scalar, new \DateTimeZone('UTC'));
@@ -679,7 +679,7 @@ public static function evaluateBinaryScalar($scalar)
679679
throw new ParseException(sprintf('The normalized base64 encoded data (data without whitespace characters) length must be a multiple of four (%d bytes given).', strlen($parsedBinaryData)));
680680
}
681681

682-
if (!preg_match('#^[A-Z0-9+/]+={0,2}$#i', $parsedBinaryData)) {
682+
if (!Parser::preg_match('#^[A-Z0-9+/]+={0,2}$#i', $parsedBinaryData)) {
683683
throw new ParseException(sprintf('The base64 encoded data (%s) contains invalid characters.', $parsedBinaryData));
684684
}
685685

Parser.php

Lines changed: 57 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ public function parse($value, $flags = 0)
8484
}
8585
}
8686

87-
if (!preg_match('//u', $value)) {
87+
if (false === preg_match('//u', $value)) {
8888
throw new ParseException('The YAML value does not appear to be valid UTF-8.');
8989
}
9090
$this->currentLineNb = -1;
@@ -115,13 +115,13 @@ public function parse($value, $flags = 0)
115115
}
116116

117117
$isRef = $mergeNode = false;
118-
if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
118+
if (self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
119119
if ($context && 'mapping' == $context) {
120120
throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine);
121121
}
122122
$context = 'sequence';
123123

124-
if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
124+
if (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
125125
$isRef = $matches['ref'];
126126
$values['value'] = $matches['value'];
127127
}
@@ -131,7 +131,7 @@ public function parse($value, $flags = 0)
131131
$data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $flags);
132132
} else {
133133
if (isset($values['leadspaces'])
134-
&& preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches)
134+
&& self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($values['value']), $matches)
135135
) {
136136
// this is a compact notation element, add to next block and parse
137137
$block = $values['value'];
@@ -147,7 +147,10 @@ public function parse($value, $flags = 0)
147147
if ($isRef) {
148148
$this->refs[$isRef] = end($data);
149149
}
150-
} elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) {
150+
} elseif (
151+
self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
152+
&& (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))
153+
) {
151154
if ($context && 'sequence' == $context) {
152155
throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine);
153156
}
@@ -215,7 +218,7 @@ public function parse($value, $flags = 0)
215218
$data += $parsed; // array union
216219
}
217220
}
218-
} elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
221+
} elseif (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
219222
$isRef = $matches['ref'];
220223
$values['value'] = $matches['value'];
221224
}
@@ -283,27 +286,7 @@ public function parse($value, $flags = 0)
283286
return $value;
284287
}
285288

286-
switch (preg_last_error()) {
287-
case PREG_INTERNAL_ERROR:
288-
$error = 'Internal PCRE error.';
289-
break;
290-
case PREG_BACKTRACK_LIMIT_ERROR:
291-
$error = 'pcre.backtrack_limit reached.';
292-
break;
293-
case PREG_RECURSION_LIMIT_ERROR:
294-
$error = 'pcre.recursion_limit reached.';
295-
break;
296-
case PREG_BAD_UTF8_ERROR:
297-
$error = 'Malformed UTF-8 data.';
298-
break;
299-
case PREG_BAD_UTF8_OFFSET_ERROR:
300-
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
301-
break;
302-
default:
303-
$error = 'Unable to parse.';
304-
}
305-
306-
throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
289+
throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
307290
}
308291
}
309292

@@ -546,7 +529,7 @@ private function parseValue($value, $flags, $context)
546529
return $this->refs[$value];
547530
}
548531

549-
if (preg_match('/^'.self::TAG_PATTERN.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
532+
if (self::preg_match('/^'.self::TAG_PATTERN.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
550533
$modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
551534

552535
$data = $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
@@ -628,7 +611,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0)
628611

629612
// determine indentation if not specified
630613
if (0 === $indentation) {
631-
if (preg_match('/^ +/', $this->currentLine, $matches)) {
614+
if (self::preg_match('/^ +/', $this->currentLine, $matches)) {
632615
$indentation = strlen($matches[0]);
633616
}
634617
}
@@ -639,7 +622,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0)
639622
while (
640623
$notEOF && (
641624
$isCurrentLineBlank ||
642-
preg_match($pattern, $this->currentLine, $matches)
625+
self::preg_match($pattern, $this->currentLine, $matches)
643626
)
644627
) {
645628
if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) {
@@ -862,6 +845,49 @@ private function isStringUnIndentedCollectionItem()
862845
*/
863846
private function isBlockScalarHeader()
864847
{
865-
return (bool) preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
848+
return (bool) self::preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
849+
}
850+
851+
/**
852+
* A local wrapper for `preg_match` which will throw a ParseException if there
853+
* is an internal error in the PCRE engine.
854+
*
855+
* This avoids us needing to check for "false" every time PCRE is used
856+
* in the YAML engine
857+
*
858+
* @throws ParseException on a PCRE internal error
859+
*
860+
* @see preg_last_error()
861+
*
862+
* @internal
863+
*/
864+
public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0)
865+
{
866+
$ret = preg_match($pattern, $subject, $matches, $flags, $offset);
867+
if ($ret === false) {
868+
switch (preg_last_error()) {
869+
case PREG_INTERNAL_ERROR:
870+
$error = 'Internal PCRE error.';
871+
break;
872+
case PREG_BACKTRACK_LIMIT_ERROR:
873+
$error = 'pcre.backtrack_limit reached.';
874+
break;
875+
case PREG_RECURSION_LIMIT_ERROR:
876+
$error = 'pcre.recursion_limit reached.';
877+
break;
878+
case PREG_BAD_UTF8_ERROR:
879+
$error = 'Malformed UTF-8 data.';
880+
break;
881+
case PREG_BAD_UTF8_OFFSET_ERROR:
882+
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
883+
break;
884+
default:
885+
$error = 'Error.';
886+
}
887+
888+
throw new ParseException($error);
889+
}
890+
891+
return $ret;
866892
}
867893
}

Tests/ParserTest.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
class ParserTest extends TestCase
2020
{
21+
/** @var Parser */
2122
protected $parser;
2223

2324
protected function setUp()
@@ -1478,6 +1479,17 @@ public function testParseMultiLineUnquotedString()
14781479

14791480
$this->assertSame(array('foo' => 'bar baz foobar foo', 'bar' => 'baz'), $this->parser->parse($yaml));
14801481
}
1482+
1483+
public function testCanParseVeryLongValue()
1484+
{
1485+
$longStringWithSpaces = str_repeat('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', 20000);
1486+
$trickyVal = array('x' => $longStringWithSpaces);
1487+
1488+
$yamlString = Yaml::dump($trickyVal);
1489+
$arrayFromYaml = $this->parser->parse($yamlString);
1490+
1491+
$this->assertEquals($trickyVal, $arrayFromYaml);
1492+
}
14811493
}
14821494

14831495
class B

0 commit comments

Comments
 (0)