Skip to content

Commit 61f22d7

Browse files
Denis KopDenis Kop
authored andcommitted
[DomCrawler] fixed bug symfony#12143
1 parent 8d18c98 commit 61f22d7

File tree

2 files changed

+53
-30
lines changed

2 files changed

+53
-30
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,27 @@
2323
class Crawler extends \SplObjectStorage
2424
{
2525
/**
26-
* @var string The current URI or the base href value
26+
* @var string The current URI
2727
*/
2828
protected $uri;
2929

30+
/**
31+
* @var string The base href value
32+
*/
33+
private $baseHref;
34+
3035
/**
3136
* Constructor.
3237
*
3338
* @param mixed $node A Node to use as the base for the crawling
34-
* @param string $uri The current URI or the base href value
35-
*
39+
* @param string $currentUri The current URI
40+
* @param string $baseHref The base href value
3641
* @api
3742
*/
38-
public function __construct($node = null, $uri = null)
43+
public function __construct($node = null, $currentUri = null, $baseHref = null)
3944
{
40-
$this->uri = $uri;
45+
$this->uri = $currentUri;
46+
$this->baseHref = $baseHref ?: $currentUri;
4147

4248
$this->add($node);
4349
}
@@ -176,13 +182,13 @@ public function addHtmlContent($content, $charset = 'UTF-8')
176182

177183
$baseHref = current($base);
178184
if (count($base) && !empty($baseHref)) {
179-
if ($this->uri) {
185+
if ($this->baseHref) {
180186
$linkNode = $dom->createElement('a');
181187
$linkNode->setAttribute('href', $baseHref);
182-
$link = new Link($linkNode, $this->uri);
183-
$this->uri = $link->getUri();
188+
$link = new Link($linkNode, $this->baseHref);
189+
$this->baseHref = $link->getUri();
184190
} else {
185-
$this->uri = $baseHref;
191+
$this->baseHref = $baseHref;
186192
}
187193
}
188194
}
@@ -294,11 +300,11 @@ public function eq($position)
294300
{
295301
foreach ($this as $i => $node) {
296302
if ($i == $position) {
297-
return new static($node, $this->uri);
303+
return new static($node, $this->uri, $this->baseHref);
298304
}
299305
}
300306

301-
return new static(null, $this->uri);
307+
return new static(null, $this->uri, $this->baseHref);
302308
}
303309

304310
/**
@@ -323,7 +329,7 @@ public function each(\Closure $closure)
323329
{
324330
$data = array();
325331
foreach ($this as $i => $node) {
326-
$data[] = $closure(new static($node, $this->uri), $i);
332+
$data[] = $closure(new static($node, $this->uri, $this->baseHref), $i);
327333
}
328334

329335
return $data;
@@ -344,12 +350,12 @@ public function reduce(\Closure $closure)
344350
{
345351
$nodes = array();
346352
foreach ($this as $i => $node) {
347-
if (false !== $closure(new static($node, $this->uri), $i)) {
353+
if (false !== $closure(new static($node, $this->uri, $this->baseHref), $i)) {
348354
$nodes[] = $node;
349355
}
350356
}
351357

352-
return new static($nodes, $this->uri);
358+
return new static($nodes, $this->uri, $this->baseHref);
353359
}
354360

355361
/**
@@ -391,7 +397,7 @@ public function siblings()
391397
throw new \InvalidArgumentException('The current node list is empty.');
392398
}
393399

394-
return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri);
400+
return new static($this->sibling($this->getNode(0)->parentNode->firstChild), $this->uri, $this->baseHref);
395401
}
396402

397403
/**
@@ -409,7 +415,7 @@ public function nextAll()
409415
throw new \InvalidArgumentException('The current node list is empty.');
410416
}
411417

412-
return new static($this->sibling($this->getNode(0)), $this->uri);
418+
return new static($this->sibling($this->getNode(0)), $this->uri, $this->baseHref);
413419
}
414420

415421
/**
@@ -427,7 +433,7 @@ public function previousAll()
427433
throw new \InvalidArgumentException('The current node list is empty.');
428434
}
429435

430-
return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri);
436+
return new static($this->sibling($this->getNode(0), 'previousSibling'), $this->uri, $this->baseHref);
431437
}
432438

433439
/**
@@ -454,7 +460,7 @@ public function parents()
454460
}
455461
}
456462

457-
return new static($nodes, $this->uri);
463+
return new static($nodes, $this->uri, $this->baseHref);
458464
}
459465

460466
/**
@@ -474,7 +480,7 @@ public function children()
474480

475481
$node = $this->getNode(0)->firstChild;
476482

477-
return new static($node ? $this->sibling($node) : array(), $this->uri);
483+
return new static($node ? $this->sibling($node) : array(), $this->uri, $this->baseHref);
478484
}
479485

480486
/**
@@ -601,7 +607,7 @@ public function filterXPath($xpath)
601607

602608
// If we dropped all expressions in the XPath while preparing it, there would be no match
603609
if ('' === $xpath) {
604-
return new static(null, $this->uri);
610+
return new static(null, $this->uri, $this->baseHref);
605611
}
606612

607613
return $this->filterRelativeXPath($xpath);
@@ -687,7 +693,7 @@ public function link($method = 'get')
687693

688694
$node = $this->getNode(0);
689695

690-
return new Link($node, $this->uri, $method);
696+
return new Link($node, $this->baseHref, $method);
691697
}
692698

693699
/**
@@ -701,7 +707,7 @@ public function links()
701707
{
702708
$links = array();
703709
foreach ($this as $node) {
704-
$links[] = new Link($node, $this->uri, 'get');
710+
$links[] = new Link($node, $this->baseHref, 'get');
705711
}
706712

707713
return $links;
@@ -792,7 +798,7 @@ public static function xpathLiteral($s)
792798
*/
793799
private function filterRelativeXPath($xpath)
794800
{
795-
$crawler = new static(null, $this->uri);
801+
$crawler = new static(null, $this->uri, $this->baseHref);
796802

797803
foreach ($this as $node) {
798804
$domxpath = new \DOMXPath($node->ownerDocument);

src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -824,16 +824,33 @@ public function testParents()
824824
}
825825
}
826826

827-
public function testBaseTag()
827+
/**
828+
* @dataProvider getBaseTagData
829+
*/
830+
public function testBaseTag($baseValue, $linkValue, $expectedUri, $currentUri = null, $description = null)
828831
{
829-
$crawler = new Crawler('<html><base href="http://base.com"><a href="link"></a></html>');
830-
$this->assertEquals('http://base.com/link', $crawler->filterXPath('//a')->link()->getUri());
832+
$crawler = new Crawler('<html><base href="'.$baseValue.'"><a href="'.$linkValue.'"></a></html>', $currentUri);
833+
$this->assertEquals($expectedUri, $crawler->filterXPath('//a')->link()->getUri(), $description);
834+
}
831835

832-
$crawler = new Crawler('<html><base href="//base.com"><a href="link"></a></html>', 'https://domain.com');
833-
$this->assertEquals('https://base.com/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can use a schema-less URL');
836+
public function getBaseTagData()
837+
{
838+
return array(
839+
array('http://base.com', 'link', 'http://base.com/link'),
840+
array('//base.com', 'link', 'https://base.com/link', 'https://domain.com', '<base> tag can use a schema-less URL'),
841+
array('path/', 'link', 'https://domain.com/path/link', 'https://domain.com', '<base> tag can set a path'),
842+
array('http://base.com', '#', 'http://base.com#', 'http://domain.com/path/link', '<base> tag does work with links to an anchor'),
843+
array('http://base.com', '', 'http://base.com', 'http://domain.com/path/link', '<base> tag does work with empty links'),
844+
);
845+
}
846+
847+
public function testBaseTagWithForm()
848+
{
849+
$crawler = new Crawler('<html><base href="/basepath"><form method="post" action="/registration"><button type="submit" name="submit"/></form></html>', 'http://example.com/registration');
850+
$this->assertEquals('http://example.com/registration', $crawler->filterXPath('//button')->form()->getUri());
834851

835-
$crawler = new Crawler('<html><base href="path/"><a href="link"></a></html>', 'https://domain.com');
836-
$this->assertEquals('https://domain.com/path/link', $crawler->filterXPath('//a')->link()->getUri(), '<base> tag can set a path');
852+
$crawler = new Crawler('<html><base href="/basepath"><form method="post"><button type="submit" name="submit"/></form></html>', 'http://example.com/registration');
853+
$this->assertEquals('http://example.com/registration', $crawler->filterXPath('//button')->form()->getUri());
837854
}
838855

839856
public function createTestCrawler($uri = null)

0 commit comments

Comments
 (0)