diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ed6ac0299b3c3..86caf5a43e08e 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -355,6 +355,115 @@ public static function create_full_parser( $html, $known_definite_encoding = 'UT return $processor; } + public function set_inner_html( ?string $html ) { + if ( $this->is_virtual() ) { + return false; + } + + if ( $this->get_token_type() !== '#tag' ) { + return false; + } + + if ( $this->is_tag_closer() ) { + return false; + } + + if ( ! $this->expects_closer() ) { + return false; + } + + if ( + 'html' !== $this->state->current_token->namespace && + $this->state->current_token->has_self_closing_flag + ) { + return false; + } + + if ( null === $html ) { + $html = ''; + } + if ( '' !== $html ) { + $fragment_parser = $this->spawn_fragment_parser( $html ); + if ( + null === $fragment_parser + ) { + return false; + } + + try { + $html = $fragment_parser->serialize(); + } catch ( Exception $e ) { + return false; + } + } + + // @todo apply modifications if there are any??? + + if ( ! parent::set_bookmark( 'SET_INNER_HTML: opener' ) ) { + return false; + } + + if ( ! $this->seek_to_matching_closer() ) { + parent::seek( 'SET_INNER_HTML: opener' ); + return false; + } + + if ( ! parent::set_bookmark( 'SET_INNER_HTML: closer' ) ) { + return false; + } + + $inner_html_start = $this->bookmarks['SET_INNER_HTML: opener']->start + $this->bookmarks['SET_INNER_HTML: opener']->length; + $inner_html_length = $this->bookmarks['SET_INNER_HTML: closer']->start - $inner_html_start; + + $this->lexical_updates['innerHTML'] = new WP_HTML_Text_Replacement( + $inner_html_start, + $inner_html_length, + $html + ); + + parent::seek( 'SET_INNER_HTML: opener' ); + parent::release_bookmark( 'SET_INNER_HTML: opener' ); + parent::release_bookmark( 'SET_INNER_HTML: closer' ); + + // @todo check for whether that html will make a mess! + // Will it break out of tags? + + return true; + } + + public function seek_to_matching_closer(): bool { + $tag_name = $this->get_tag(); + + if ( null === $tag_name ) { + return false; + } + + if ( $this->is_tag_closer() ) { + return false; + } + + if ( ! $this->expects_closer() ) { + return false; + } + + $breadcrumbs = $this->breadcrumbs; + array_pop( $breadcrumbs ); + + // @todo Can't use these queries together + while ( $this->next_tag( + array( + 'tag_name' => $this->get_tag(), + 'tag_closers' => 'visit', + ) + ) ) { + if ( $this->get_breadcrumbs() === $breadcrumbs ) { + return true; + } + } + return false; + } + + /** * Constructor. * @@ -424,6 +533,61 @@ function ( WP_HTML_Token $token ): void { }; } + /** + * Creates a fragment processor with the current node as its context element. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm + * + * @param string $html Input HTML fragment to process. + * @return static|null The created processor if successful, otherwise null. + */ + public function spawn_fragment_parser( string $html ): ?self { + if ( $this->get_token_type() !== '#tag' ) { + return null; + } + + $namespace = $this->get_namespace(); + + /* + * Prevent creating fragments at "self-contained" nodes. + * + * @see https://github.com/WordPress/wordpress-develop/pull/7141 + * @see https://github.com/WordPress/wordpress-develop/pull/7198 + */ + if ( + 'html' === $namespace && + in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) + ) { + return null; + } + + $fragment_processor = self::create_fragment( $html ); + $fragment_processor->compat_mode = $this->compat_mode; + + $fragment_processor->context_node = clone $this->state->current_token; + $fragment_processor->context_node->bookmark_name = 'context-node'; + $fragment_processor->context_node->on_destroy = null; + + $context_element = array( $fragment_processor->context_node->node_name, array() ); + foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { + $context_element[1][ $name ] = $value; + } + + $fragment_processor->breadcrumbs = array(); + + if ( 'TEMPLATE' === $context_element[0] ) { + $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; + } + + $fragment_processor->reset_insertion_mode_appropriately(); + + // @todo Set the parser's form element pointer. + + $fragment_processor->state->encoding_confidence = 'irrelevant'; + + return $fragment_processor; + } + /** * Stops the parser and terminates its execution when encountering unsupported markup. * @@ -522,6 +686,7 @@ public function get_unsupported_exception() { * 1 for "first" tag, 3 for "third," etc. * Defaults to first tag. * @type string|null $class_name Tag must contain this whole class name to match. + * @type string $tag_name Tag name to match. * @type string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`. * May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`. * } @@ -545,7 +710,7 @@ public function next_tag( $query = null ): bool { } if ( is_string( $query ) ) { - $query = array( 'breadcrumbs' => array( $query ) ); + $query = array( 'tag_name' => $query ); } if ( ! is_array( $query ) ) { diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index ba3407ff84cbf..e04f8cfd7d46d 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -863,4 +863,101 @@ public function test_adjusts_for_mathml_integration_points() { 'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.' ); } + + /** + * @ticket TBD + * + * @dataProvider data_set_inner_html + */ + public function test_set_inner_html( string $html, ?string $replacement, string $expected ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + while ( $processor->next_tag() ) { + if ( $processor->get_attribute( 'target' ) ) { + break; + } + } + + $this->assertTrue( $processor->set_inner_html( $replacement ) ); + $this->assertSame( $expected, $processor->get_updated_html() ); + } + + public static function data_set_inner_html() { + return array( + array( + '
replace me
', + 'with me!', + '
with me!
', + ), + array( + '

replace me

', + 'with me!', + '
with me!
', + ), + array( + '
replace me
', + 'with me!', + '
with me!
', + ), + ); + } + + /** + * @ticket TBD + * + * @dataProvider data_set_inner_html_not_allowed + */ + public function test_set_inner_html_not_allowed( string $html, string $replacement ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + while ( $processor->next_tag() ) { + if ( $processor->get_attribute( 'target' ) ) { + break; + } + } + $this->assertFalse( $processor->set_inner_html( $replacement ), "Should have failed but produced: {$processor->get_updated_html()}" ); + $this->assertSame( $html, $processor->get_updated_html() ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_set_inner_html_not_allowed(): array { + return array( + 'not allowed in void tags' => array( + '
', + 'anything', + ), + 'not allowed in self-closing tags' => array( + '', + 'anything', + ), + 'must have closing tag' => array( + '
', + 'anything', + ), + + 'a in a' => array( + '', + '', + ), + 'a nested in a' => array( + '', + 'A cannot nest inside a', + ), + + 'text in table' => array( + '
hello
', + 'text triggers forstering - not allowed', + ), + 'text in thead' => array( + '', + 'text triggers forstering - not allowed', + ), + 'text in tr' => array( + '
hello
hello', + 'text triggers forstering - not allowed', + ), + ); + } } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 808fa39d17f26..041132ed50c20 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -138,10 +138,6 @@ public function data_external_html5lib_tests() { * @return bool True if the test case should be skipped. False otherwise. */ private static function should_skip_test( ?string $test_context_element, string $test_name ): bool { - if ( null !== $test_context_element && 'body' !== $test_context_element ) { - return true; - } - if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) { return true; } @@ -157,11 +153,77 @@ private static function should_skip_test( ?string $test_context_element, string * @return string|null Tree structure of parsed HTML, if supported, else null. */ private static function build_tree_representation( ?string $fragment_context, string $html ) { - $processor = $fragment_context - ? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" ) - : WP_HTML_Processor::create_full_parser( $html ); - if ( null === $processor ) { - throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); + $processor = null; + if ( $fragment_context ) { + if ( 'body' === $fragment_context ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + } else { + + /* + * If the string of characters starts with "svg ", the context + * element is in the SVG namespace and the substring after + * "svg " is the local name. If the string of characters starts + * with "math ", the context element is in the MathML namespace + * and the substring after "math " is the local name. + * Otherwise, the context element is in the HTML namespace and + * the string is the local name. + */ + if ( str_starts_with( $fragment_context, 'svg ' ) ) { + $tag_name = substr( $fragment_context, 4 ); + if ( 'svg' === $tag_name ) { + $parent_processor = WP_HTML_Processor::create_full_parser( '' ); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); + } + $parent_processor->next_tag( $tag_name ); + } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { + $tag_name = substr( $fragment_context, 5 ); + if ( 'math' === $tag_name ) { + $parent_processor = WP_HTML_Processor::create_full_parser( '' ); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); + } + $parent_processor->next_tag( $tag_name ); + } else { + if ( in_array( + $fragment_context, + array( + 'caption', + 'col', + 'colgroup', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr', + ), + true + ) ) { + $parent_processor = WP_HTML_Processor::create_full_parser( "
<{$fragment_context}>" ); + $parent_processor->next_tag(); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$fragment_context}>" ); + } + $parent_processor->next_tag( $fragment_context ); + } + if ( null !== $parent_processor->get_unsupported_exception() ) { + throw $parent_processor->get_unsupported_exception(); + } + if ( null !== $parent_processor->get_last_error() ) { + throw new Exception( $parent_processor->get_last_error() ); + } + $processor = $parent_processor->spawn_fragment_parser( $html ); + } + + if ( null === $processor ) { + throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); + } + } else { + $processor = WP_HTML_Processor::create_full_parser( $html ); + if ( null === $processor ) { + throw new Exception( 'Could not create a full parser.' ); + } } /*