Skip to content

Commit 2a37cf0

Browse files
committed
HTML API: Fix extensibility of WP_HTML_Processor::next_token().
Break out logic from the next_token() method into a private method which may call itself recursively. This allows for subclasses to override the next_token() method and be assured that each call to next_token() corresponds with the consumption of one single token. This also parallels how WP_HTML_Tag_Processor::next_token() wraps a private base_class_next_token() method. Reviewed by jonsurrell. Merges [59285], [59364], and [59747] to 6.7 branch. Props westonruter, jonsurrell, dmsnell, jorbin. git-svn-id: https://develop.svn.wordpress.org/branches/6.7@59757 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 1c02e68 commit 2a37cf0

File tree

3 files changed

+197
-6
lines changed

3 files changed

+197
-6
lines changed

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,22 @@ public function next_tag( $query = null ): bool {
607607
return false;
608608
}
609609

610+
/**
611+
* Finds the next token in the HTML document.
612+
*
613+
* This doesn't currently have a way to represent non-tags and doesn't process
614+
* semantic rules for text nodes. For access to the raw tokens consider using
615+
* WP_HTML_Tag_Processor instead.
616+
*
617+
* @since 6.5.0 Added for internal support; do not use.
618+
* @since 6.7.2 Refactored so subclasses may extend.
619+
*
620+
* @return bool Whether a token was parsed.
621+
*/
622+
public function next_token(): bool {
623+
return $this->next_visitable_token();
624+
}
625+
610626
/**
611627
* Ensures internal accounting is maintained for HTML semantic rules while
612628
* the underlying Tag Processor class is seeking to a bookmark.
@@ -615,13 +631,18 @@ public function next_tag( $query = null ): bool {
615631
* semantic rules for text nodes. For access to the raw tokens consider using
616632
* WP_HTML_Tag_Processor instead.
617633
*
618-
* @since 6.5.0 Added for internal support; do not use.
634+
* Note that this method may call itself recursively. This is why it is not
635+
* implemented as {@see WP_HTML_Processor::next_token()}, which instead calls
636+
* this method similarly to how {@see WP_HTML_Tag_Processor::next_token()}
637+
* calls the {@see WP_HTML_Tag_Processor::base_class_next_token()} method.
638+
*
639+
* @since 6.7.2 Added for internal support.
619640
*
620641
* @access private
621642
*
622643
* @return bool
623644
*/
624-
public function next_token(): bool {
645+
private function next_visitable_token(): bool {
625646
$this->current_element = null;
626647

627648
if ( isset( $this->last_error ) ) {
@@ -639,7 +660,7 @@ public function next_token(): bool {
639660
* tokens works in the meantime and isn't obviously wrong.
640661
*/
641662
if ( empty( $this->element_queue ) && $this->step() ) {
642-
return $this->next_token();
663+
return $this->next_visitable_token();
643664
}
644665

645666
// Process the next event on the queue.
@@ -650,7 +671,7 @@ public function next_token(): bool {
650671
continue;
651672
}
652673

653-
return empty( $this->element_queue ) ? false : $this->next_token();
674+
return empty( $this->element_queue ) ? false : $this->next_visitable_token();
654675
}
655676

656677
$is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;
@@ -661,7 +682,7 @@ public function next_token(): bool {
661682
* the breadcrumbs.
662683
*/
663684
if ( 'root-node' === $this->current_element->token->bookmark_name ) {
664-
return $this->next_token();
685+
return $this->next_visitable_token();
665686
}
666687

667688
// Adjust the breadcrumbs for this event.
@@ -673,7 +694,7 @@ public function next_token(): bool {
673694

674695
// Avoid sending close events for elements which don't expect a closing.
675696
if ( $is_pop && ! $this->expects_closer( $this->current_element->token ) ) {
676-
return $this->next_token();
697+
return $this->next_visitable_token();
677698
}
678699

679700
return true;
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<?php
2+
3+
class Token_Counting_HTML_Processor extends WP_HTML_Processor {
4+
5+
/**
6+
* List of tokens that have already been seen.
7+
*
8+
* @var array<string, int>
9+
*/
10+
public $token_seen_count = array();
11+
12+
/**
13+
* Gets next token.
14+
*
15+
* @return bool Whether next token was matched.
16+
*/
17+
public function next_token(): bool {
18+
$result = parent::next_token();
19+
20+
if ( $this->get_token_type() === '#tag' ) {
21+
$token_name = ( $this->is_tag_closer() ? '-' : '+' ) . $this->get_tag();
22+
} else {
23+
$token_name = $this->get_token_name();
24+
}
25+
26+
if ( ! isset( $this->token_seen_count[ $token_name ] ) ) {
27+
$this->token_seen_count[ $token_name ] = 1;
28+
} else {
29+
++$this->token_seen_count[ $token_name ];
30+
}
31+
32+
return $result;
33+
}
34+
35+
}

tests/phpunit/tests/html-api/wpHtmlProcessor.php

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,141 @@ public function test_ensure_form_tag_closer_token_is_reachable() {
908908
$this->assertTrue( $processor->is_tag_closer() );
909909
}
910910

911+
/**
912+
* Data provider.
913+
*
914+
* @return array
915+
*/
916+
public function data_html_processor_with_extended_next_token() {
917+
return array(
918+
'single_instance_per_tag' => array(
919+
'html' => '
920+
<html>
921+
<head>
922+
<meta charset="utf-8">
923+
<title>Hello World</title>
924+
</head>
925+
<body>
926+
<h1>Hello World!</h1>
927+
<img src="example.png">
928+
<p>Each tag should occur only once in this document.<!--Closing P tag omitted intentionally.-->
929+
<footer>The end.</footer>
930+
</body>
931+
</html>
932+
',
933+
'expected_token_counts' => array(
934+
'+HTML' => 1,
935+
'+HEAD' => 1,
936+
'#text' => 14,
937+
'+META' => 1,
938+
'+TITLE' => 1,
939+
'-HEAD' => 1,
940+
'+BODY' => 1,
941+
'+H1' => 1,
942+
'-H1' => 1,
943+
'+IMG' => 1,
944+
'+P' => 1,
945+
'#comment' => 1,
946+
'-P' => 1,
947+
'+FOOTER' => 1,
948+
'-FOOTER' => 1,
949+
'-BODY' => 1,
950+
'-HTML' => 1,
951+
'' => 1,
952+
),
953+
),
954+
955+
'multiple_tag_instances' => array(
956+
'html' => '
957+
<html>
958+
<body>
959+
<h1>Hello World!</h1>
960+
<p>First
961+
<p>Second
962+
<p>Third
963+
<ul>
964+
<li>1
965+
<li>2
966+
<li>3
967+
</ul>
968+
</body>
969+
</html>
970+
',
971+
'expected_token_counts' => array(
972+
'+HTML' => 1,
973+
'+HEAD' => 1,
974+
'-HEAD' => 1,
975+
'+BODY' => 1,
976+
'#text' => 13,
977+
'+H1' => 1,
978+
'-H1' => 1,
979+
'+P' => 3,
980+
'-P' => 3,
981+
'+UL' => 1,
982+
'+LI' => 3,
983+
'-LI' => 3,
984+
'-UL' => 1,
985+
'-BODY' => 1,
986+
'-HTML' => 1,
987+
'' => 1,
988+
),
989+
),
990+
991+
'extreme_nested_formatting' => array(
992+
'html' => '
993+
<html>
994+
<body>
995+
<p>
996+
<strong><em><strike><i><b><u>FORMAT</u></b></i></strike></em></strong>
997+
</p>
998+
</body>
999+
</html>
1000+
',
1001+
'expected_token_counts' => array(
1002+
'+HTML' => 1,
1003+
'+HEAD' => 1,
1004+
'-HEAD' => 1,
1005+
'+BODY' => 1,
1006+
'#text' => 7,
1007+
'+P' => 1,
1008+
'+STRONG' => 1,
1009+
'+EM' => 1,
1010+
'+STRIKE' => 1,
1011+
'+I' => 1,
1012+
'+B' => 1,
1013+
'+U' => 1,
1014+
'-U' => 1,
1015+
'-B' => 1,
1016+
'-I' => 1,
1017+
'-STRIKE' => 1,
1018+
'-EM' => 1,
1019+
'-STRONG' => 1,
1020+
'-P' => 1,
1021+
'-BODY' => 1,
1022+
'-HTML' => 1,
1023+
'' => 1,
1024+
),
1025+
),
1026+
);
1027+
}
1028+
1029+
/**
1030+
* Ensures that subclasses to WP_HTML_Processor can do bookkeeping by extending the next_token() method.
1031+
*
1032+
* @ticket 62269
1033+
* @dataProvider data_html_processor_with_extended_next_token
1034+
*/
1035+
public function test_ensure_next_token_method_extensibility( $html, $expected_token_counts ) {
1036+
require_once DIR_TESTDATA . '/html-api/token-counting-html-processor.php';
1037+
1038+
$processor = Token_Counting_HTML_Processor::create_full_parser( $html );
1039+
while ( $processor->next_tag() ) {
1040+
continue;
1041+
}
1042+
1043+
$this->assertEquals( $expected_token_counts, $processor->token_seen_count, 'Snapshot: ' . var_export( $processor->token_seen_count, true ) );
1044+
}
1045+
9111046
/**
9121047
* Ensure that lowercased tag_name query matches tags case-insensitively.
9131048
*

0 commit comments

Comments
 (0)