@@ -349,13 +349,19 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul
349349
350350 $ this ->state ->stack_of_open_elements ->set_push_handler (
351351 function ( WP_HTML_Token $ token ) {
352- $ this ->element_queue [] = new WP_HTML_Stack_Event ( $ token , WP_HTML_Stack_Event::PUSH );
352+ $ is_virtual = ! isset ( $ this ->state ->current_token ) || $ this ->is_tag_closer ();
353+ $ same_node = isset ( $ this ->state ->current_token ) && $ token ->node_name === $ this ->state ->current_token ->node_name ;
354+ $ provenance = ( ! $ same_node || $ is_virtual ) ? 'virtual ' : 'real ' ;
355+ $ this ->element_queue [] = new WP_HTML_Stack_Event ( $ token , WP_HTML_Stack_Event::PUSH , $ provenance );
353356 }
354357 );
355358
356359 $ this ->state ->stack_of_open_elements ->set_pop_handler (
357360 function ( WP_HTML_Token $ token ) {
358- $ this ->element_queue [] = new WP_HTML_Stack_Event ( $ token , WP_HTML_Stack_Event::POP );
361+ $ is_virtual = ! isset ( $ this ->state ->current_token ) || ! $ this ->is_tag_closer ();
362+ $ same_node = isset ( $ this ->state ->current_token ) && $ token ->node_name === $ this ->state ->current_token ->node_name ;
363+ $ provenance = ( ! $ same_node || $ is_virtual ) ? 'virtual ' : 'real ' ;
364+ $ this ->element_queue [] = new WP_HTML_Stack_Event ( $ token , WP_HTML_Stack_Event::POP , $ provenance );
359365 }
360366 );
361367
@@ -569,11 +575,26 @@ public function next_token() {
569575 * @return bool Whether the current tag is a tag closer.
570576 */
571577 public function is_tag_closer () {
572- return isset ( $ this ->current_element )
573- ? ( WP_HTML_Stack_Event::POP === $ this ->current_element ->operation )
578+ return $ this ->is_virtual ( )
579+ ? ( WP_HTML_Stack_Event::POP === $ this ->current_element ->operation && ' #tag ' === $ this -> get_token_type () )
574580 : parent ::is_tag_closer ();
575581 }
576582
583+ /**
584+ * Indicates if the currently-matched token is virtual, created by a stack operation
585+ * while processing HTML, rather than a token found in the HTML text itself.
586+ *
587+ * @since 6.6.0
588+ *
589+ * @return bool Whether the current token is virtual.
590+ */
591+ private function is_virtual () {
592+ return (
593+ isset ( $ this ->current_element ->provenance ) &&
594+ 'virtual ' === $ this ->current_element ->provenance
595+ );
596+ }
597+
577598 /**
578599 * Indicates if the currently-matched tag matches the given breadcrumbs.
579600 *
@@ -1440,7 +1461,7 @@ public function get_tag() {
14401461 return null ;
14411462 }
14421463
1443- if ( isset ( $ this ->current_element ) ) {
1464+ if ( $ this ->is_virtual ( ) ) {
14441465 return $ this ->current_element ->token ->node_name ;
14451466 }
14461467
@@ -1459,6 +1480,27 @@ public function get_tag() {
14591480 }
14601481 }
14611482
1483+ /**
1484+ * Indicates if the currently matched tag contains the self-closing flag.
1485+ *
1486+ * No HTML elements ought to have the self-closing flag and for those, the self-closing
1487+ * flag will be ignored. For void elements this is benign because they "self close"
1488+ * automatically. For non-void HTML elements though problems will appear if someone
1489+ * intends to use a self-closing element in place of that element with an empty body.
1490+ * For HTML foreign elements and custom elements the self-closing flag determines if
1491+ * they self-close or not.
1492+ *
1493+ * This function does not determine if a tag is self-closing,
1494+ * but only if the self-closing flag is present in the syntax.
1495+ *
1496+ * @since 6.6.0 Subclassed for the HTML Processor.
1497+ *
1498+ * @return bool Whether the currently matched tag contains the self-closing flag.
1499+ */
1500+ public function has_self_closing_flag () {
1501+ return $ this ->is_virtual () ? false : parent ::has_self_closing_flag ();
1502+ }
1503+
14621504 /**
14631505 * Returns the node name represented by the token.
14641506 *
@@ -1480,11 +1522,9 @@ public function get_tag() {
14801522 * @return string|null Name of the matched token.
14811523 */
14821524 public function get_token_name () {
1483- if ( isset ( $ this ->current_element ) ) {
1484- return $ this ->current_element ->token ->node_name ;
1485- }
1486-
1487- return parent ::get_token_name ();
1525+ return $ this ->is_virtual ()
1526+ ? $ this ->current_element ->token ->node_name
1527+ : parent ::get_token_name ();
14881528 }
14891529
14901530 /**
@@ -1510,9 +1550,16 @@ public function get_token_name() {
15101550 * @return string|null What kind of token is matched, or null.
15111551 */
15121552 public function get_token_type () {
1513- if ( isset ( $ this ->current_element ) ) {
1514- $ node_name = $ this ->current_element ->token ->node_name ;
1515- if ( ctype_upper ( $ node_name [0 ] ) ) {
1553+ if ( $ this ->is_virtual () ) {
1554+ /*
1555+ * This logic comes from the Tag Processor.
1556+ *
1557+ * @todo It would be ideal not to repeat this here, but it's not clearly
1558+ * better to allow passing a token name to `get_token_type()`.
1559+ */
1560+ $ node_name = $ this ->current_element ->token ->node_name ;
1561+ $ starting_char = $ node_name [0 ];
1562+ if ( 'A ' <= $ starting_char && 'Z ' >= $ starting_char ) {
15161563 return '#tag ' ;
15171564 }
15181565
@@ -1546,25 +1593,38 @@ public function get_token_type() {
15461593 * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
15471594 */
15481595 public function get_attribute ( $ name ) {
1549- if ( isset ( $ this ->current_element ) ) {
1550- // Closing tokens cannot contain attributes.
1551- if ( WP_HTML_Stack_Event::POP === $ this ->current_element ->operation ) {
1552- return null ;
1553- }
1554-
1555- $ node_name = $ this ->current_element ->token ->node_name ;
1556-
1557- // Only tags can contain attributes.
1558- if ( 'A ' > $ node_name [0 ] || 'Z ' < $ node_name [0 ] ) {
1559- return null ;
1560- }
1596+ return $ this ->is_virtual () ? null : parent ::get_attribute ( $ name );
1597+ }
15611598
1562- if ( $ this ->current_element ->token ->bookmark_name === (string ) $ this ->bookmark_counter ) {
1563- return parent ::get_attribute ( $ name );
1564- }
1565- }
1599+ /**
1600+ * Updates or creates a new attribute on the currently matched tag with the passed value.
1601+ *
1602+ * For boolean attributes special handling is provided:
1603+ * - When `true` is passed as the value, then only the attribute name is added to the tag.
1604+ * - When `false` is passed, the attribute gets removed if it existed before.
1605+ *
1606+ * For string attributes, the value is escaped using the `esc_attr` function.
1607+ *
1608+ * @since 6.6.0 Subclassed for the HTML Processor.
1609+ *
1610+ * @param string $name The attribute name to target.
1611+ * @param string|bool $value The new attribute value.
1612+ * @return bool Whether an attribute value was set.
1613+ */
1614+ public function set_attribute ( $ name , $ value ) {
1615+ return $ this ->is_virtual () ? false : parent ::set_attribute ( $ name , $ value );
1616+ }
15661617
1567- return null ;
1618+ /**
1619+ * Remove an attribute from the currently-matched tag.
1620+ *
1621+ * @since 6.6.0 Subclassed for HTML Processor.
1622+ *
1623+ * @param string $name The attribute name to remove.
1624+ * @return bool Whether an attribute was removed.
1625+ */
1626+ public function remove_attribute ( $ name ) {
1627+ return $ this ->is_virtual () ? false : parent ::remove_attribute ( $ name );
15681628 }
15691629
15701630 /**
@@ -1594,18 +1654,63 @@ public function get_attribute( $name ) {
15941654 * @return array|null List of attribute names, or `null` when no tag opener is matched.
15951655 */
15961656 public function get_attribute_names_with_prefix ( $ prefix ) {
1597- if ( isset ( $ this ->current_element ) ) {
1598- if ( WP_HTML_Stack_Event::POP === $ this ->current_element ->operation ) {
1599- return null ;
1600- }
1657+ return $ this ->is_virtual () ? null : parent ::get_attribute_names_with_prefix ( $ prefix );
1658+ }
16011659
1602- $ mark = $ this ->bookmarks [ $ this ->current_element ->token ->bookmark_name ];
1603- if ( 0 === $ mark ->length ) {
1604- return null ;
1605- }
1606- }
1660+ /**
1661+ * Adds a new class name to the currently matched tag.
1662+ *
1663+ * @since 6.6.0 Subclassed for the HTML Processor.
1664+ *
1665+ * @param string $class_name The class name to add.
1666+ * @return bool Whether the class was set to be added.
1667+ */
1668+ public function add_class ( $ class_name ) {
1669+ return $ this ->is_virtual () ? false : parent ::add_class ( $ class_name );
1670+ }
1671+
1672+ /**
1673+ * Removes a class name from the currently matched tag.
1674+ *
1675+ * @since 6.6.0 Subclassed for the HTML Processor.
1676+ *
1677+ * @param string $class_name The class name to remove.
1678+ * @return bool Whether the class was set to be removed.
1679+ */
1680+ public function remove_class ( $ class_name ) {
1681+ return $ this ->is_virtual () ? false : parent ::remove_class ( $ class_name );
1682+ }
1683+
1684+ /**
1685+ * Returns if a matched tag contains the given ASCII case-insensitive class name.
1686+ *
1687+ * @since 6.6.0 Subclassed for the HTML Processor.
1688+ *
1689+ * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive.
1690+ * @return bool|null Whether the matched tag contains the given class name, or null if not matched.
1691+ */
1692+ public function has_class ( $ wanted_class ) {
1693+ return $ this ->is_virtual () ? null : parent ::has_class ( $ wanted_class );
1694+ }
16071695
1608- return parent ::get_attribute_names_with_prefix ( $ prefix );
1696+ /**
1697+ * Generator for a foreach loop to step through each class name for the matched tag.
1698+ *
1699+ * This generator function is designed to be used inside a "foreach" loop.
1700+ *
1701+ * Example:
1702+ *
1703+ * $p = WP_HTML_Processor::create_fragment( "<div class='free <egg<\tlang-en'>" );
1704+ * $p->next_tag();
1705+ * foreach ( $p->class_list() as $class_name ) {
1706+ * echo "{$class_name} ";
1707+ * }
1708+ * // Outputs: "free <egg> lang-en "
1709+ *
1710+ * @since 6.6.0 Subclassed for the HTML Processor.
1711+ */
1712+ public function class_list () {
1713+ return $ this ->is_virtual () ? null : parent ::class_list ();
16091714 }
16101715
16111716 /**
@@ -1629,17 +1734,30 @@ public function get_attribute_names_with_prefix( $prefix ) {
16291734 * @return string
16301735 */
16311736 public function get_modifiable_text () {
1632- if ( isset ( $ this ->current_element ) ) {
1633- if ( WP_HTML_Stack_Event::POP === $ this ->current_element ->operation ) {
1634- return '' ;
1635- }
1737+ return $ this ->is_virtual () ? '' : parent ::get_modifiable_text ();
1738+ }
16361739
1637- $ mark = $ this ->bookmarks [ $ this ->current_element ->token ->bookmark_name ];
1638- if ( 0 === $ mark ->length ) {
1639- return '' ;
1640- }
1641- }
1642- return parent ::get_modifiable_text ();
1740+ /**
1741+ * Indicates what kind of comment produced the comment node.
1742+ *
1743+ * Because there are different kinds of HTML syntax which produce
1744+ * comments, the Tag Processor tracks and exposes this as a type
1745+ * for the comment. Nominally only regular HTML comments exist as
1746+ * they are commonly known, but a number of unrelated syntax errors
1747+ * also produce comments.
1748+ *
1749+ * @see self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT
1750+ * @see self::COMMENT_AS_CDATA_LOOKALIKE
1751+ * @see self::COMMENT_AS_INVALID_HTML
1752+ * @see self::COMMENT_AS_HTML_COMMENT
1753+ * @see self::COMMENT_AS_PI_NODE_LOOKALIKE
1754+ *
1755+ * @since 6.6.0 Subclassed for the HTML Processor.
1756+ *
1757+ * @return string|null
1758+ */
1759+ public function get_comment_type () {
1760+ return $ this ->is_virtual () ? null : parent ::get_comment_type ();
16431761 }
16441762
16451763 /**
0 commit comments