Skip to content

Commit 0469ea8

Browse files
committed
bug symfony#13145 [DomCrawler] Fix behaviour with <base> tag (dkop, WouterJ)
This PR was merged into the 2.3 branch. Discussion ---------- [DomCrawler] Fix behaviour with <base> tag Finishes symfony#12283 | Q | A | ------------- | --- | Bug fix? | yes | New feature? | no | BC breaks? | no | Deprecations? | no | Tests pass? | yes | Fixed tickets | symfony#12283, symfony#12143, symfony#12144 | License | MIT | Doc PR | - Commits ------- 91447e8 Make fabbot happy 1d35e48 Clean up testing 61f22d7 [DomCrawler] fixed bug symfony#12143
2 parents 459b8b6 + 91447e8 commit 0469ea8

File tree

2 files changed

+62
-30
lines changed

2 files changed

+62
-30
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,27 @@
2323
class Crawler extends \SplObjectStorage
2424
{
2525
/**
26-
* @var string The current URI or the base href value
26+
* @var string The current URI
2727
*/
2828
protected $uri;
2929

30+
/**
31+
* @var string The base href value
32+
*/
33+
private $baseHref;
34+
3035
/**
3136
* Constructor.
3237
*
3338
* @param mixed $node A Node to use as the base for the crawling
34-
* @param string $uri The current URI or the base href value
35-
*
39+
* @param string $currentUri The current URI
40+
* @param string $baseHref The base href value
3641
* @api
3742
*/
38-
public function __construct($node = null, $uri = null)
43+
public function __construct($node = null, $currentUri = null, $baseHref = null)
3944
{
40-
$this->uri = $uri;
45+
$this->uri = $currentUri;
46+
$this->baseHref = $baseHref ?: $currentUri;
4147

4248
$this->add($node);
4349
}
@@ -176,13 +182,13 @@ public function addHtmlContent($content, $charset = 'UTF-8')
176182

177183
$baseHref = current($base);
178184
if (count($base) && !empty($baseHref)) {
179-
if ($this->uri) {
185+
if ($this->baseHref) {
180186
$linkNode = $dom->createElement('a');
181187
$linkNode->setAttribute('href', $baseHref);
182-
$link = new Link($linkNode, $this->uri);
183-
$this->uri = $link->getUri();
188+
$link = new Link($linkNode, $this->baseHref);
189+
$this->baseHref = $link->getUri();
184190
} else {
185-
$this->uri = $baseHref;
191+
$this->baseHref = $baseHref;
186192
}
187193
}
188194
}
@@ -294,11 +300,11 @@ public function eq($position)
294300
{
295301
foreach ($this as $i => $node) {
296302
if ($i == $position) {
297-
return new static($node, $this->uri);
303+
return new static($node, $this->uri, $this->baseHref);
298304
}
299305
}
300306

301-
return new static(null, $this->uri);
307+
return new static(null, $this->uri, $this->baseHref);
302308
}
303309

304310
/**
@@ -323,7 +329,7 @@ public function each(\Closure $closure)
323329
{
324330
$data = array();
325331
foreach ($this as $i => $node) {
326-
$data[] = $closure(new static($node, $this->uri), $i);
332+
$data[] = $closure(new static($node, $this->uri, $this->baseHref), $i);
327333
}
328334

329335
return $data;
@@ -344,12 +350,12 @@ public function reduce(\Closure $closure)
344350
{
345351
$nodes = array();
346352
foreach ($this as $i => $node) {
347-
if (false !== $closure(new static($node, $this->uri), $i)) {
353+
if (false !== $closure(new static($node, $this->uri, $this->baseHref), $i)) {
348354
$nodes[] = $node;
349355
}
350356
}
351357

352-
return new static($nodes, $this->uri);
358+
return new static($nodes, $this->uri, $this->baseHref);
353359
}
354360

355361
/**
@@ -391,7 +397,7 @@ public function siblings()
391397
throw new \InvalidArgumentException('The current node list is empty.');
392398
}
393399

394-
return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri);
400+
return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri, $this->baseHref);
395401
}
396402

397403
/**
@@ -409,7 +415,7 @@ public function nextAll()
409415
throw new \InvalidArgumentException('The current node list is empty.');
410416
}
411417

412-
return new static($this->sibling($this->getNode(0)), $this->uri);
418+
return new static($this->sibling($this->getNode(0)), $this->uri, $this->baseHref);
413419
}
414420

415421
/**
@@ -427,7 +433,7 @@ public function previousAll()
427433
throw new \InvalidArgumentException('The current node list is empty.');
428434
}
429435

430-
return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri);
436+
return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri, $this->baseHref);
431437
}
432438

433439
/**
@@ -454,7 +460,7 @@ public function parents()
454460
}
455461
}
456462

457-
return new static($nodes, $this->uri);
463+
return new static($nodes, $this->uri, $this->baseHref);
458464
}
459465

460466
/**
@@ -474,7 +480,7 @@ public function children()
474480

475481
$node = $this->getNode(0)->firstChild;
476482

477-
return new static($node ? $this->sibling($node) : array(), $this->uri);
483+
return new static($node ? $this->sibling($node) : array(), $this->uri, $this->baseHref);
478484
}
479485

480486
/**
@@ -601,7 +607,7 @@ public function filterXPath($xpath)
601607

602608
// If we dropped all expressions in the XPath while preparing it, there would be no match
603609
if ('' === $xpath) {
604-
return new static(null, $this->uri);
610+
return new static(null, $this->uri, $this->baseHref);
605611
}
606612

607613
return $this->filterRelativeXPath($xpath);
@@ -687,7 +693,7 @@ public function link($method = 'get')
687693

688694
$node = $this->getNode(0);
689695

690-
return new Link($node, $this->uri, $method);
696+
return new Link($node, $this->baseHref, $method);
691697
}
692698

693699
/**
@@ -701,7 +707,7 @@ public function links()
701707
{
702708
$links = array();
703709
foreach ($this as $node) {
704-
$links[] = new Link($node, $this->uri, 'get');
710+
$links[] = new Link($node, $this->baseHref, 'get');
705711
}
706712

707713
return $links;
@@ -792,7 +798,7 @@ public static function xpathLiteral($s)
792798
*/
793799
private function filterRelativeXPath($xpath)
794800
{
795-
$crawler = new static(null, $this->uri);
801+
$crawler = new static(null, $this->uri, $this->baseHref);
796802

797803
foreach ($this as $node) {
798804
$domxpath = new \DOMXPath($node->ownerDocument);

src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -823,16 +823,42 @@ public function testParents()
823823
}
824824
}
825825

826-
public function testBaseTag()
826+
/**
827+
* @dataProvider getBaseTagData
828+
*/
829+
public function testBaseTag($baseValue, $linkValue, $expectedUri, $currentUri = null, $description = null)
827830
{
828-
$crawler = new Crawler('<html><base href="http://base.com"><a href="link"></a></html>');
829-
$this->assertEquals('http://base.com/link', $crawler->filterXPath('//a')->link()->getUri());
831+
$crawler = new Crawler('<html><base href="'.$baseValue.'"><a href="'.$linkValue.'"></a></html>', $currentUri);
832+
$this->assertEquals($expectedUri, $crawler->filterXPath('//a')->link()->getUri(), $description);
833+
}
830834

831-
$crawler = new Crawler('<html><base href="//base.com"><a href="link"></a></html>', 'https://domain.com');
832-
$this->assertEquals('https://base.com/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can use a schema-less URL');
835+
public function getBaseTagData()
836+
{
837+
return array(
838+
array('http://base.com', 'link', 'http://base.com/link'),
839+
array('//base.com', 'link', 'https://base.com/link', 'https://domain.com', '<base> tag can use a schema-less URL'),
840+
array('path/', 'link', 'https://domain.com/path/link', 'https://domain.com', '<base> tag can set a path'),
841+
array('http://base.com', '#', 'http://base.com#', 'http://domain.com/path/link', '<base> tag does work with links to an anchor'),
842+
array('http://base.com', '', 'http://base.com', 'http://domain.com/path/link', '<base> tag does work with empty links'),
843+
);
844+
}
833845

834-
$crawler = new Crawler('<html><base href="path/"><a href="link"></a></html>', 'https://domain.com');
835-
$this->assertEquals('https://domain.com/path/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can set a path');
846+
/**
847+
* @dataProvider getBaseTagWithFormData
848+
*/
849+
public function testBaseTagWithForm($baseValue, $actionValue, $expectedUri, $currentUri = null, $description = null)
850+
{
851+
$crawler = new Crawler('<html><base href="'.$baseValue.'"><form method="post" action="'.$actionValue.'"><button type="submit" name="submit"/></form></html>', $currentUri);
852+
$this->assertEquals($expectedUri, $crawler->filterXPath('//button')->form()->getUri(), $description);
853+
}
854+
855+
public function getBaseTagWithFormData()
856+
{
857+
return array(
858+
array('/basepath', '/registration', 'http://domain.com/registration', 'http://domain.com/registration', '<base> tag does work with a path and form action'),
859+
array('/basepath', '', 'http://domain.com/registration', 'http://domain.com/registration', '<base> tag does work with a path and empty form action'),
860+
array('http://base.com', '', 'http://domain.com/path/form', 'http://domain.com/path/form', '<base> tag does work with a URL and an empty form action'),
861+
);
836862
}
837863

838864
public function createTestCrawler($uri = null)

0 commit comments

Comments
 (0)