23
23
class Crawler extends \SplObjectStorage
24
24
{
25
25
/**
26
- * @var string The current URI or the base href value
26
+ * @var string The current URI
27
27
*/
28
28
protected $ uri ;
29
29
30
+ /**
31
+ * @var string The base href value
32
+ */
33
+ private $ baseHref ;
34
+
30
35
/**
31
36
* Constructor.
32
37
*
33
38
* @param mixed $node A Node to use as the base for the crawling
34
- * @param string $uri The current URI or the base href value
35
- *
39
+ * @param string $currentUri The current URI
40
+ * @param string $baseHref The base href value
36
41
* @api
37
42
*/
38
- public function __construct ($ node = null , $ uri = null )
43
+ public function __construct ($ node = null , $ currentUri = null , $ baseHref = null )
39
44
{
40
- $ this ->uri = $ uri ;
45
+ $ this ->uri = $ currentUri ;
46
+ $ this ->baseHref = $ baseHref ?: $ currentUri ;
41
47
42
48
$ this ->add ($ node );
43
49
}
@@ -176,13 +182,13 @@ public function addHtmlContent($content, $charset = 'UTF-8')
176
182
177
183
$ baseHref = current ($ base );
178
184
if (count ($ base ) && !empty ($ baseHref )) {
179
- if ($ this ->uri ) {
185
+ if ($ this ->baseHref ) {
180
186
$ linkNode = $ dom ->createElement ('a ' );
181
187
$ linkNode ->setAttribute ('href ' , $ baseHref );
182
- $ link = new Link ($ linkNode , $ this ->uri );
183
- $ this ->uri = $ link ->getUri ();
188
+ $ link = new Link ($ linkNode , $ this ->baseHref );
189
+ $ this ->baseHref = $ link ->getUri ();
184
190
} else {
185
- $ this ->uri = $ baseHref ;
191
+ $ this ->baseHref = $ baseHref ;
186
192
}
187
193
}
188
194
}
@@ -294,11 +300,11 @@ public function eq($position)
294
300
{
295
301
foreach ($ this as $ i => $ node ) {
296
302
if ($ i == $ position ) {
297
- return new static ($ node , $ this ->uri );
303
+ return new static ($ node , $ this ->uri , $ this -> baseHref );
298
304
}
299
305
}
300
306
301
- return new static (null , $ this ->uri );
307
+ return new static (null , $ this ->uri , $ this -> baseHref );
302
308
}
303
309
304
310
/**
@@ -323,7 +329,7 @@ public function each(\Closure $closure)
323
329
{
324
330
$ data = array ();
325
331
foreach ($ this as $ i => $ node ) {
326
- $ data [] = $ closure (new static ($ node , $ this ->uri ), $ i );
332
+ $ data [] = $ closure (new static ($ node , $ this ->uri , $ this -> baseHref ), $ i );
327
333
}
328
334
329
335
return $ data ;
@@ -344,12 +350,12 @@ public function reduce(\Closure $closure)
344
350
{
345
351
$ nodes = array ();
346
352
foreach ($ this as $ i => $ node ) {
347
- if (false !== $ closure (new static ($ node , $ this ->uri ), $ i )) {
353
+ if (false !== $ closure (new static ($ node , $ this ->uri , $ this -> baseHref ), $ i )) {
348
354
$ nodes [] = $ node ;
349
355
}
350
356
}
351
357
352
- return new static ($ nodes , $ this ->uri );
358
+ return new static ($ nodes , $ this ->uri , $ this -> baseHref );
353
359
}
354
360
355
361
/**
@@ -391,7 +397,7 @@ public function siblings()
391
397
throw new \InvalidArgumentException ('The current node list is empty. ' );
392
398
}
393
399
394
- return new static ($ this ->sibling ($ this ->getNode (0 )->parentNode ->firstChild ), $ this ->uri );
400
+ return new static ($ this ->sibling ($ this ->getNode (0 )->parentNode ->firstChild ), $ this ->uri , $ this -> baseHref );
395
401
}
396
402
397
403
/**
@@ -409,7 +415,7 @@ public function nextAll()
409
415
throw new \InvalidArgumentException ('The current node list is empty. ' );
410
416
}
411
417
412
- return new static ($ this ->sibling ($ this ->getNode (0 )), $ this ->uri );
418
+ return new static ($ this ->sibling ($ this ->getNode (0 )), $ this ->uri , $ this -> baseHref );
413
419
}
414
420
415
421
/**
@@ -427,7 +433,7 @@ public function previousAll()
427
433
throw new \InvalidArgumentException ('The current node list is empty. ' );
428
434
}
429
435
430
- return new static ($ this ->sibling ($ this ->getNode (0 ), 'previousSibling ' ), $ this ->uri );
436
+ return new static ($ this ->sibling ($ this ->getNode (0 ), 'previousSibling ' ), $ this ->uri , $ this -> baseHref );
431
437
}
432
438
433
439
/**
@@ -454,7 +460,7 @@ public function parents()
454
460
}
455
461
}
456
462
457
- return new static ($ nodes , $ this ->uri );
463
+ return new static ($ nodes , $ this ->uri , $ this -> baseHref );
458
464
}
459
465
460
466
/**
@@ -474,7 +480,7 @@ public function children()
474
480
475
481
$ node = $ this ->getNode (0 )->firstChild ;
476
482
477
- return new static ($ node ? $ this ->sibling ($ node ) : array (), $ this ->uri );
483
+ return new static ($ node ? $ this ->sibling ($ node ) : array (), $ this ->uri , $ this -> baseHref );
478
484
}
479
485
480
486
/**
@@ -601,7 +607,7 @@ public function filterXPath($xpath)
601
607
602
608
// If we dropped all expressions in the XPath while preparing it, there would be no match
603
609
if ('' === $ xpath ) {
604
- return new static (null , $ this ->uri );
610
+ return new static (null , $ this ->uri , $ this -> baseHref );
605
611
}
606
612
607
613
return $ this ->filterRelativeXPath ($ xpath );
@@ -687,7 +693,7 @@ public function link($method = 'get')
687
693
688
694
$ node = $ this ->getNode (0 );
689
695
690
- return new Link ($ node , $ this ->uri , $ method );
696
+ return new Link ($ node , $ this ->baseHref , $ method );
691
697
}
692
698
693
699
/**
@@ -701,7 +707,7 @@ public function links()
701
707
{
702
708
$ links = array ();
703
709
foreach ($ this as $ node ) {
704
- $ links [] = new Link ($ node , $ this ->uri , 'get ' );
710
+ $ links [] = new Link ($ node , $ this ->baseHref , 'get ' );
705
711
}
706
712
707
713
return $ links ;
@@ -792,7 +798,7 @@ public static function xpathLiteral($s)
792
798
*/
793
799
private function filterRelativeXPath ($ xpath )
794
800
{
795
- $ crawler = new static (null , $ this ->uri );
801
+ $ crawler = new static (null , $ this ->uri , $ this -> baseHref );
796
802
797
803
foreach ($ this as $ node ) {
798
804
$ domxpath = new \DOMXPath ($ node ->ownerDocument );
0 commit comments