@@ -341,18 +341,20 @@ public function data_provider_sample_documents(): array {
341
341
</body>
342
342
</html>
343
343
' ,
344
- 'open_tags ' => array ( 'HTML ' , 'HEAD ' , 'BODY ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' ),
344
+ 'open_tags ' => array ( 'HTML ' , 'HEAD ' , 'BODY ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , ' DIV ' , ' IMG ' ),
345
345
'xpath_breadcrumbs ' => array (
346
- '/HTML ' => array ( 'HTML ' ),
347
- '/HTML/HEAD ' => array ( 'HTML ' , 'HEAD ' ),
348
- '/HTML/BODY ' => array ( 'HTML ' , 'BODY ' ),
349
- '/HTML/BODY/DIV[@id= \'header \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
346
+ '/HTML ' => array ( 'HTML ' ),
347
+ '/HTML/HEAD ' => array ( 'HTML ' , 'HEAD ' ),
348
+ '/HTML/BODY ' => array ( 'HTML ' , 'BODY ' ),
349
+ '/HTML/BODY/DIV[@id= \'wpadminbar \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
350
+ '/HTML/BODY/DIV[@id= \'wpadminbar \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
351
+ '/HTML/BODY/DIV[@id= \'header \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
350
352
'/HTML/BODY/DIV[@id= \'header \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
351
- '/HTML/BODY/DIV[@id= \'primary \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
353
+ '/HTML/BODY/DIV[@id= \'primary \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
352
354
'/HTML/BODY/DIV[@id= \'primary \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
353
- '/HTML/BODY/DIV[@id= \'secondary \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
355
+ '/HTML/BODY/DIV[@id= \'secondary \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
354
356
'/HTML/BODY/DIV[@id= \'secondary \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
355
- '/HTML/BODY/DIV[@id= \'colophon \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
357
+ '/HTML/BODY/DIV[@id= \'colophon \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
356
358
'/HTML/BODY/DIV[@id= \'colophon \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
357
359
),
358
360
),
@@ -392,25 +394,27 @@ public function data_provider_sample_documents(): array {
392
394
</body>
393
395
</html>
394
396
' ,
395
- 'open_tags ' => array ( 'HTML ' , 'HEAD ' , 'BODY ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' ),
397
+ 'open_tags ' => array ( 'HTML ' , 'HEAD ' , 'BODY ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , 'DIV ' , 'IMG ' , ' DIV ' , ' IMG ' ),
396
398
'xpath_breadcrumbs ' => array (
397
- '/HTML ' => array ( 'HTML ' ),
398
- '/HTML/HEAD ' => array ( 'HTML ' , 'HEAD ' ),
399
- '/HTML/BODY ' => array ( 'HTML ' , 'BODY ' ),
400
- '/HTML/BODY/DIV[@role= \'banner \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
399
+ '/HTML ' => array ( 'HTML ' ),
400
+ '/HTML/HEAD ' => array ( 'HTML ' , 'HEAD ' ),
401
+ '/HTML/BODY ' => array ( 'HTML ' , 'BODY ' ),
402
+ '/HTML/BODY/DIV[@id= \'wpadminbar \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
403
+ '/HTML/BODY/DIV[@id= \'wpadminbar \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
404
+ '/HTML/BODY/DIV[@role= \'banner \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
401
405
'/HTML/BODY/DIV[@role= \'banner \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
402
406
'/HTML/BODY/DIV[@class= \'content-area main \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
403
407
'/HTML/BODY/DIV[@class= \'content-area main \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
404
408
'/HTML/BODY/DIV[@class= \'widget-area \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
405
409
'/HTML/BODY/DIV[@class= \'widget-area \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
406
410
'/HTML/BODY/DIV[@class= \'site-footer \'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
407
411
'/HTML/BODY/DIV[@class= \'site-footer \']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
408
- '/HTML/BODY/DIV[@class= \'\'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
412
+ '/HTML/BODY/DIV[@class= \'\'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
409
413
'/HTML/BODY/DIV[@class= \'\']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
410
- '/HTML/BODY/DIV[@role= \'\'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
414
+ '/HTML/BODY/DIV[@role= \'\'] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
411
415
'/HTML/BODY/DIV[@role= \'\']/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
412
- '/HTML/BODY/DIV ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
413
- '/HTML/BODY/DIV/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
416
+ '/HTML/BODY/DIV ' => array ( 'HTML ' , 'BODY ' , 'DIV ' ),
417
+ '/HTML/BODY/DIV/*[1][self::IMG] ' => array ( 'HTML ' , 'BODY ' , 'DIV ' , 'IMG ' ),
414
418
),
415
419
),
416
420
);
@@ -442,7 +446,7 @@ public function test_next_tag_and_get_xpath( string $document, array $open_tags,
442
446
$ this ->assertSame ( '' , $ p ->get_stored_xpath (), 'Expected empty XPath since iteration has not started. ' );
443
447
$ actual_open_tags = array ();
444
448
$ actual_xpath_breadcrumbs_mapping = array ();
445
- while ( $ p ->next_open_tag ( ) ) {
449
+ while ( $ p ->next_tag ( array ( ' tag_closers ' => ' skip ' ) ) ) {
446
450
$ actual_open_tags [] = $ p ->get_tag ();
447
451
448
452
$ xpath = $ p ->get_stored_xpath ();
@@ -466,14 +470,60 @@ public function test_next_tag_and_get_xpath( string $document, array $open_tags,
466
470
}
467
471
468
472
/**
469
- * Test next_tag() passing query which is invalid .
473
+ * Test next_tag() passing query.
470
474
*
471
475
* @covers ::next_tag
476
+ * @covers ::get_xpath
477
+ * @covers ::get_current_depth
472
478
*/
473
479
public function test_next_tag_with_query (): void {
474
- $ this ->expectException ( InvalidArgumentException::class );
475
- $ p = new OD_HTML_Tag_Processor ( '<html></html> ' );
476
- $ p ->next_tag ( array ( 'tag_name ' => 'HTML ' ) );
480
+ $ html = '
481
+ <!DOCTYPE html>
482
+ <html>
483
+ <head>
484
+ <title></title>
485
+ </head>
486
+ <body>
487
+ <main>
488
+ <p>Hello world</p>
489
+ <figure>
490
+ <img src="https://example.com/img1.jpg">
491
+ </figure>
492
+ <figure>
493
+ <img src="https://example.com/img2.jpg">
494
+ </figure>
495
+ <div class="foo">
496
+ Foo!
497
+ </div>
498
+ </main>
499
+ </body>
500
+ </html>
501
+ ' ;
502
+
503
+ $ p = new OD_HTML_Tag_Processor ( $ html );
504
+ $ this ->assertTrue ( $ p ->next_tag ( array ( 'tag_name ' => 'HTML ' ) ) );
505
+ $ this ->assertTrue ( $ p ->set_bookmark ( 'document_root ' ) );
506
+ $ this ->assertSame ( 1 , $ p ->get_current_depth () );
507
+
508
+ $ this ->assertTrue ( $ p ->next_tag ( array ( 'tag_name ' => 'IMG ' ) ) );
509
+ $ this ->assertEquals ( '/HTML/BODY/MAIN/*[2][self::FIGURE]/*[1][self::IMG] ' , $ p ->get_xpath () );
510
+ $ this ->assertSame ( 5 , $ p ->get_current_depth () );
511
+
512
+ $ this ->assertTrue ( $ p ->next_tag ( array ( 'class_name ' => 'foo ' ) ) );
513
+ $ this ->assertEquals ( '/HTML/BODY/MAIN/*[4][self::DIV] ' , $ p ->get_xpath () );
514
+ $ this ->assertSame ( 4 , $ p ->get_current_depth () );
515
+
516
+ $ this ->assertTrue ( $ p ->seek ( 'document_root ' ) );
517
+ $ this ->assertTrue (
518
+ $ p ->next_tag (
519
+ array (
520
+ 'tag_name ' => 'IMG ' ,
521
+ 'match_offset ' => 2 ,
522
+ )
523
+ )
524
+ );
525
+ $ this ->assertEquals ( '/HTML/BODY/MAIN/*[3][self::FIGURE]/*[1][self::IMG] ' , $ p ->get_xpath () );
526
+ $ this ->assertSame ( 5 , $ p ->get_current_depth () );
477
527
}
478
528
479
529
/**
@@ -484,15 +534,15 @@ public function test_next_tag_with_query(): void {
484
534
public function test_expects_closer (): void {
485
535
$ p = new OD_HTML_Tag_Processor ( '<html><body><hr></body></html> ' );
486
536
$ this ->assertFalse ( $ p ->expects_closer () );
487
- while ( $ p ->next_tag () ) {
537
+ while ( $ p ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) ) {
488
538
if ( 'BODY ' === $ p ->get_tag () ) {
489
539
break ;
490
540
}
491
541
}
492
542
$ this ->assertSame ( 'BODY ' , $ p ->get_tag () );
493
543
$ this ->assertFalse ( $ p ->expects_closer ( 'IMG ' ) );
494
544
$ this ->assertTrue ( $ p ->expects_closer () );
495
- $ p ->next_tag ();
545
+ $ p ->next_tag ( array ( ' tag_closers ' => ' visit ' ) );
496
546
$ this ->assertSame ( 'HR ' , $ p ->get_tag () );
497
547
$ this ->assertFalse ( $ p ->expects_closer () );
498
548
$ this ->assertTrue ( $ p ->expects_closer ( 'DIV ' ) );
@@ -594,7 +644,7 @@ public function test_get_updated_html_when_out_of_bookmarks(): void {
594
644
</html>
595
645
' ;
596
646
$ processor = new OD_HTML_Tag_Processor ( $ html );
597
- $ this ->assertTrue ( $ processor ->next_tag () );
647
+ $ this ->assertTrue ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
598
648
$ this ->assertEquals ( 'HTML ' , $ processor ->get_tag () );
599
649
$ max_bookmarks = max ( WP_HTML_Processor::MAX_BOOKMARKS , WP_HTML_Tag_Processor::MAX_BOOKMARKS );
600
650
for ( $ i = 0 ; $ i < $ max_bookmarks + 1 ; $ i ++ ) {
@@ -710,7 +760,7 @@ public function test_bookmarking_and_seeking(): void {
710
760
if ( $ processor ->get_current_depth () < $ embed_block_depth ) {
711
761
break ;
712
762
}
713
- } while ( $ processor ->next_tag () );
763
+ } while ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
714
764
}
715
765
}
716
766
@@ -790,31 +840,31 @@ public function test_get_cursor_move_count(): void {
790
840
)
791
841
);
792
842
$ this ->assertSame ( 0 , $ processor ->get_cursor_move_count () );
793
- $ this ->assertTrue ( $ processor ->next_tag () );
843
+ $ this ->assertTrue ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
794
844
$ this ->assertSame ( 'HTML ' , $ processor ->get_tag () );
795
845
$ this ->assertTrue ( $ processor ->set_bookmark ( 'document_root ' ) );
796
846
$ this ->assertSame ( 1 , $ processor ->get_cursor_move_count () );
797
- $ this ->assertTrue ( $ processor ->next_tag () );
847
+ $ this ->assertTrue ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
798
848
$ this ->assertSame ( 'HEAD ' , $ processor ->get_tag () );
799
849
$ this ->assertSame ( 3 , $ processor ->get_cursor_move_count () ); // Note that next_token() call #2 was for the whitespace between <html> and <head>.
800
- $ this ->assertTrue ( $ processor ->next_tag () );
850
+ $ this ->assertTrue ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
801
851
$ this ->assertSame ( 'HEAD ' , $ processor ->get_tag () );
802
852
$ this ->assertTrue ( $ processor ->is_tag_closer () );
803
853
$ this ->assertSame ( 4 , $ processor ->get_cursor_move_count () );
804
- $ this ->assertTrue ( $ processor ->next_tag () );
854
+ $ this ->assertTrue ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
805
855
$ this ->assertSame ( 'BODY ' , $ processor ->get_tag () );
806
856
$ this ->assertSame ( 6 , $ processor ->get_cursor_move_count () ); // Note that next_token() call #5 was for the whitespace between </head> and <body>.
807
- $ this ->assertTrue ( $ processor ->next_tag () );
857
+ $ this ->assertTrue ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
808
858
$ this ->assertSame ( 'BODY ' , $ processor ->get_tag () );
809
859
$ this ->assertTrue ( $ processor ->is_tag_closer () );
810
860
$ this ->assertSame ( 7 , $ processor ->get_cursor_move_count () );
811
- $ this ->assertTrue ( $ processor ->next_tag () );
861
+ $ this ->assertTrue ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
812
862
$ this ->assertSame ( 'HTML ' , $ processor ->get_tag () );
813
863
$ this ->assertTrue ( $ processor ->is_tag_closer () );
814
864
$ this ->assertSame ( 9 , $ processor ->get_cursor_move_count () ); // Note that next_token() call #8 was for the whitespace between </body> and <html>.
815
- $ this ->assertFalse ( $ processor ->next_tag () );
865
+ $ this ->assertFalse ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
816
866
$ this ->assertSame ( 10 , $ processor ->get_cursor_move_count () );
817
- $ this ->assertFalse ( $ processor ->next_tag () );
867
+ $ this ->assertFalse ( $ processor ->next_tag ( array ( ' tag_closers ' => ' visit ' ) ) );
818
868
$ this ->assertSame ( 11 , $ processor ->get_cursor_move_count () );
819
869
$ this ->assertTrue ( $ processor ->seek ( 'document_root ' ) );
820
870
$ this ->assertSame ( 12 , $ processor ->get_cursor_move_count () );
0 commit comments