Skip to content

Commit 59eee3c

Browse files
Merge branch '2.8' into 3.1
* 2.8: [VarDumper] Fix source links with latests Twig versions [DomCrawler] Optimize DomCrawler::relativize() [HttpKernel] Fix source links with latests Twig versions [DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions
2 parents bb7395e + a94f3fe commit 59eee3c

File tree

2 files changed

+49
-14
lines changed

2 files changed

+49
-14
lines changed

Crawler.php

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -940,29 +940,54 @@ private function relativize($xpath)
940940
{
941941
$expressions = array();
942942

943-
$unionPattern = '/\|(?![^\[]*\])/';
944943
// An expression which will never match to replace expressions which cannot match in the crawler
945944
// We cannot simply drop
946945
$nonMatchingExpression = 'a[name() = "b"]';
947946

948-
// Split any unions into individual expressions.
949-
foreach (preg_split($unionPattern, $xpath) as $expression) {
950-
$expression = trim($expression);
951-
$parenthesis = '';
947+
$xpathLen = strlen($xpath);
948+
$openedBrackets = 0;
949+
$startPosition = strspn($xpath, " \t\n\r\0\x0B");
950+
951+
for ($i = $startPosition; $i <= $xpathLen; ++$i) {
952+
$i += strcspn($xpath, '"\'[]|', $i);
953+
954+
if ($i < $xpathLen) {
955+
switch ($xpath[$i]) {
956+
case '"':
957+
case "'":
958+
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
959+
return $xpath; // The XPath expression is invalid
960+
}
961+
continue 2;
962+
case '[':
963+
++$openedBrackets;
964+
continue 2;
965+
case ']':
966+
--$openedBrackets;
967+
continue 2;
968+
}
969+
}
970+
if ($openedBrackets) {
971+
continue;
972+
}
952973

953-
// If the union is inside some braces, we need to preserve the opening braces and apply
954-
// the change only inside it.
955-
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
956-
$parenthesis = $matches[0];
957-
$expression = substr($expression, strlen($parenthesis));
974+
if ($startPosition < $xpathLen && '(' === $xpath[$startPosition]) {
975+
// If the union is inside some braces, we need to preserve the opening braces and apply
976+
// the change only inside it.
977+
$j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $startPosition + 1);
978+
$parenthesis = substr($xpath, $startPosition, $j);
979+
$startPosition += $j;
980+
} else {
981+
$parenthesis = '';
958982
}
983+
$expression = rtrim(substr($xpath, $startPosition, $i - $startPosition));
959984

960985
if (0 === strpos($expression, 'self::*/')) {
961986
$expression = './'.substr($expression, 8);
962987
}
963988

964989
// add prefix before absolute element selector
965-
if (empty($expression)) {
990+
if ('' === $expression) {
966991
$expression = $nonMatchingExpression;
967992
} elseif (0 === strpos($expression, '//')) {
968993
$expression = 'descendant-or-self::'.substr($expression, 2);
@@ -975,17 +1000,24 @@ private function relativize($xpath)
9751000
} elseif ('/' === $expression[0] || '.' === $expression[0] || 0 === strpos($expression, 'self::')) {
9761001
$expression = $nonMatchingExpression;
9771002
} elseif (0 === strpos($expression, 'descendant::')) {
978-
$expression = 'descendant-or-self::'.substr($expression, strlen('descendant::'));
1003+
$expression = 'descendant-or-self::'.substr($expression, 12);
9791004
} elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
9801005
// the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
9811006
$expression = $nonMatchingExpression;
9821007
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
9831008
$expression = 'self::'.$expression;
9841009
}
9851010
$expressions[] = $parenthesis.$expression;
1011+
1012+
if ($i === $xpathLen) {
1013+
return implode(' | ', $expressions);
1014+
}
1015+
1016+
$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
1017+
$startPosition = $i + 1;
9861018
}
9871019

988-
return implode(' | ', $expressions);
1020+
return $xpath; // The XPath expression is invalid
9891021
}
9901022

9911023
/**

Tests/CrawlerTest.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,7 @@ public function testFilterXpathComplexQueries()
430430
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
431431
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
432432
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
433+
$this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]"));
433434
}
434435

435436
public function testFilterXPath()
@@ -596,7 +597,7 @@ public function testFilterXPathWithSelfAxes()
596597

597598
$this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name');
598599
$this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name');
599-
$this->assertCount(9, $crawler->filterXPath('self::*/a'));
600+
$this->assertCount(10, $crawler->filterXPath('self::*/a'));
600601
}
601602

602603
public function testFilter()
@@ -1079,6 +1080,8 @@ public function createTestCrawler($uri = null)
10791080
10801081
<a href="?get=param">GetLink</a>
10811082
1083+
<a href="/example">Klausi|Claudiu</a>
1084+
10821085
<form action="foo" id="FooFormId">
10831086
<input type="text" value="TextValue" name="TextName" />
10841087
<input type="submit" value="FooValue" name="FooName" id="FooId" />

0 commit comments

Comments
 (0)