Skip to content

Commit 85fb014

Browse files
Merge branch '3.1'
* 3.1: [VarDumper] Fix source links with latests Twig versions [DomCrawler] Optimize DomCrawler::relativize() [HttpKernel] Fix source links with latests Twig versions [DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions
2 parents 241ffb0 + 59eee3c commit 85fb014

File tree

2 files changed

+49
-14
lines changed

2 files changed

+49
-14
lines changed

Crawler.php

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -970,29 +970,54 @@ private function relativize($xpath)
970970
{
971971
$expressions = array();
972972

973-
$unionPattern = '/\|(?![^\[]*\])/';
974973
// An expression which will never match to replace expressions which cannot match in the crawler
975974
// We cannot simply drop
976975
$nonMatchingExpression = 'a[name() = "b"]';
977976

978-
// Split any unions into individual expressions.
979-
foreach (preg_split($unionPattern, $xpath) as $expression) {
980-
$expression = trim($expression);
981-
$parenthesis = '';
977+
$xpathLen = strlen($xpath);
978+
$openedBrackets = 0;
979+
$startPosition = strspn($xpath, " \t\n\r\0\x0B");
980+
981+
for ($i = $startPosition; $i <= $xpathLen; ++$i) {
982+
$i += strcspn($xpath, '"\'[]|', $i);
983+
984+
if ($i < $xpathLen) {
985+
switch ($xpath[$i]) {
986+
case '"':
987+
case "'":
988+
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
989+
return $xpath; // The XPath expression is invalid
990+
}
991+
continue 2;
992+
case '[':
993+
++$openedBrackets;
994+
continue 2;
995+
case ']':
996+
--$openedBrackets;
997+
continue 2;
998+
}
999+
}
1000+
if ($openedBrackets) {
1001+
continue;
1002+
}
9821003

983-
// If the union is inside some braces, we need to preserve the opening braces and apply
984-
// the change only inside it.
985-
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
986-
$parenthesis = $matches[0];
987-
$expression = substr($expression, strlen($parenthesis));
1004+
if ($startPosition < $xpathLen && '(' === $xpath[$startPosition]) {
1005+
// If the union is inside some braces, we need to preserve the opening braces and apply
1006+
// the change only inside it.
1007+
$j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $startPosition + 1);
1008+
$parenthesis = substr($xpath, $startPosition, $j);
1009+
$startPosition += $j;
1010+
} else {
1011+
$parenthesis = '';
9881012
}
1013+
$expression = rtrim(substr($xpath, $startPosition, $i - $startPosition));
9891014

9901015
if (0 === strpos($expression, 'self::*/')) {
9911016
$expression = './'.substr($expression, 8);
9921017
}
9931018

9941019
// add prefix before absolute element selector
995-
if (empty($expression)) {
1020+
if ('' === $expression) {
9961021
$expression = $nonMatchingExpression;
9971022
} elseif (0 === strpos($expression, '//')) {
9981023
$expression = 'descendant-or-self::'.substr($expression, 2);
@@ -1005,17 +1030,24 @@ private function relativize($xpath)
10051030
} elseif ('/' === $expression[0] || '.' === $expression[0] || 0 === strpos($expression, 'self::')) {
10061031
$expression = $nonMatchingExpression;
10071032
} elseif (0 === strpos($expression, 'descendant::')) {
1008-
$expression = 'descendant-or-self::'.substr($expression, strlen('descendant::'));
1033+
$expression = 'descendant-or-self::'.substr($expression, 12);
10091034
} elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
10101035
// the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
10111036
$expression = $nonMatchingExpression;
10121037
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
10131038
$expression = 'self::'.$expression;
10141039
}
10151040
$expressions[] = $parenthesis.$expression;
1041+
1042+
if ($i === $xpathLen) {
1043+
return implode(' | ', $expressions);
1044+
}
1045+
1046+
$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
1047+
$startPosition = $i + 1;
10161048
}
10171049

1018-
return implode(' | ', $expressions);
1050+
return $xpath; // The XPath expression is invalid
10191051
}
10201052

10211053
/**

Tests/CrawlerTest.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,7 @@ public function testFilterXpathComplexQueries()
430430
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
431431
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
432432
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
433+
$this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]"));
433434
}
434435

435436
public function testFilterXPath()
@@ -596,7 +597,7 @@ public function testFilterXPathWithSelfAxes()
596597

597598
$this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name');
598599
$this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name');
599-
$this->assertCount(9, $crawler->filterXPath('self::*/a'));
600+
$this->assertCount(10, $crawler->filterXPath('self::*/a'));
600601
}
601602

602603
public function testFilter()
@@ -1124,6 +1125,8 @@ public function createTestCrawler($uri = null)
11241125
11251126
<a href="?get=param">GetLink</a>
11261127
1128+
<a href="/example">Klausi|Claudiu</a>
1129+
11271130
<form action="foo" id="FooFormId">
11281131
<input type="text" value="TextValue" name="TextName" />
11291132
<input type="submit" value="FooValue" name="FooName" id="FooId" />

0 commit comments

Comments
 (0)