Skip to content

Commit 253b971

Browse files
committed
Merge branch 'html-api/auto-escape-javascript-json' into scripts/use-html-api-for-script-tags
2 parents aaacd6f + 501d201 commit 253b971

File tree

2 files changed

+262
-25
lines changed

2 files changed

+262
-25
lines changed

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3868,16 +3868,14 @@ static function ( $matches ) {
38683868
/**
38693869
* JSON can be safely escaped.
38703870
*
3871-
* The following replacement may appear insuficcient, "<" is replaced
3871+
* The following replacement may appear insufficient, "<" is replaced
38723872
* with its JSON escape sequence "\u003C" without considering whether
3873-
* the "<" is preceded by an escaping slash. JSON does not support
3874-
* arbitrary character escaping (like JavaScript strings) so "\<"
3875-
* is invalid JSON and would have to be preceded by
3876-
* an escaped backslash: "\\<".
3873+
* the "<" is preceded by an escaping backslash. JSON does not support
3874+
* arbitrary character escaping in strings (unlike JavaScript) so "\<"
3875+
* is invalid JSON and does not need to be considered.
38773876
*
38783877
* @see https://www.json.org/json-en.html
38793878
*/
3880-
38813879
$plaintext_content = strtr(
38823880
$plaintext_content,
38833881
array( '<' => '\\u003C' )
@@ -3969,16 +3967,12 @@ public function is_javascript_script_tag(): bool {
39693967
*/
39703968
$type_attr = $this->get_attribute( 'type' );
39713969
$language_attr = $this->get_attribute( 'language' );
3972-
39733970
if ( true === $type_attr || '' === $type_attr ) {
39743971
return true;
39753972
}
39763973
if (
3977-
null === $type_attr && (
3978-
true === $language_attr ||
3979-
'' === $language_attr ||
3980-
null === $language_attr
3981-
)
3974+
null === $type_attr
3975+
&& ( null === $language_attr || true === $language_attr || '' === $language_attr )
39823976
) {
39833977
return true;
39843978
}
@@ -3989,7 +3983,7 @@ public function is_javascript_script_tag(): bool {
39893983
* > Otherwise, el has a non-empty language attribute; let the script block's type string
39903984
* > be the concatenation of "text/" and the value of el's language attribute.
39913985
*/
3992-
$type_string = $type_attr ? trim( $type_attr, " \t\f\r\n" ) : "text/{$language_attr}";
3986+
$type_string = null !== $type_attr ? trim( $type_attr, " \t\f\r\n" ) : "text/{$language_attr}";
39933987

39943988
/*
39953989
* > If the script block's type string is a JavaScript MIME type essence match, then
@@ -4050,10 +4044,10 @@ public function is_javascript_script_tag(): bool {
40504044
}
40514045

40524046
/*
4053-
* > - Otherwise, if the script block's type string is an ASCII case-insensitive match for
4054-
* > the string "importmap", then set el's type to "importmap".
4047+
* > Otherwise, if the script block's type string is an ASCII case-insensitive match for the string "importmap", then set el's type to "importmap".
4048+
* > Otherwise, if the script block's type string is an ASCII case-insensitive match for the string "speculationrules", then set el's type to "speculationrules".
40554049
*
4056-
* An importmap is JSON and not evaluated as JavaScript. This case is not handled here.
4050+
* These conditions indicate JSON content.
40574051
*/
40584052

40594053
/*
@@ -4074,13 +4068,11 @@ public function is_json_script_tag(): bool {
40744068
return false;
40754069
}
40764070

4077-
$type_attr = $this->get_attribute( 'type' );
4078-
4079-
if ( empty( $type_attr ) || true === $type_attr ) {
4071+
$type = $this->get_attribute( 'type' );
4072+
if ( null === $type || true === $type || '' === $type ) {
40804073
return false;
40814074
}
4082-
4083-
$type_string = strtolower( trim( $type_attr, " \t\f\r\n" ) );
4075+
$type = strtolower( trim( $type, " \t\f\r\n" ) );
40844076

40854077
/*
40864078
* > …
@@ -4097,10 +4089,10 @@ public function is_json_script_tag(): bool {
40974089
* @see https://mimesniff.spec.whatwg.org/#json-mime-type
40984090
*/
40994091
if (
4100-
'application/json' === $type_string
4101-
|| 'importmap' === $type_string
4102-
|| 'speculationrules' === $type_string
4103-
|| 'text/json' === $type_string
4092+
'application/json' === $type
4093+
|| 'importmap' === $type
4094+
|| 'speculationrules' === $type
4095+
|| 'text/json' === $type
41044096
) {
41054097
return true;
41064098
}
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
<?php
2+
/**
3+
* Unit tests covering WP_HTML_Tag_Processor script tag functionality.
4+
*
5+
* @package WordPress
6+
* @subpackage HTML-API
7+
*
8+
* @group html-api
9+
*
10+
* @coversDefaultClass WP_HTML_Tag_Processor
11+
*/
12+
class Tests_HtmlApi_WpHtmlTagProcessorScriptTag extends WP_UnitTestCase {
13+
/**
14+
* @ticket 64419
15+
*
16+
* @covers WP_HTML_Tag_Processor::is_javascript_script_tag
17+
*
18+
* @dataProvider data_is_javascript_script_tag
19+
*
20+
* @param string $html HTML containing a script tag.
21+
* @param bool $expected_result Whether the script tag should be identified as JavaScript.
22+
*/
23+
public function test_is_javascript_script_tag( string $html, bool $expected_result ) {
24+
$processor = new WP_HTML_Tag_Processor( $html );
25+
$processor->next_tag();
26+
$this->assertSame(
27+
$expected_result,
28+
$processor->is_javascript_script_tag(),
29+
'Failed to correctly identify JavaScript script tag'
30+
);
31+
}
32+
33+
/**
34+
* Data provider for test_is_javascript_script_tag.
35+
*
36+
* @return array[]
37+
*/
38+
public static function data_is_javascript_script_tag(): array {
39+
return array(
40+
// Script tags without type or language attributes - should be JavaScript.
41+
'Script tag without attributes' => array( '<script></script>', true ),
42+
'Script tag with other attributes' => array( '<script id="test"></script>', true ),
43+
44+
// Script tags with empty type attribute - should be JavaScript.
45+
'Script tag with empty type attribute' => array( '<script type=""></script>', true ),
46+
'Script tag with boolean type attribute' => array( '<script type></script>', true ),
47+
48+
// Script tags with falsy but non-empty type attribute.
49+
'Script tag with type="0"' => array( '<script type="0"></script>', false ),
50+
51+
// Script tags without type but with language attribute - should be JavaScript.
52+
'Script tag with empty language attribute' => array( '<script language=""></script>', true ),
53+
'Script tag with boolean language attribute' => array( '<script language></script>', true ),
54+
55+
// Script tags with falsy but non-empty language attribute.
56+
'Script tag with language="0"' => array( '<script language="0"></script>', false ),
57+
58+
// Script tags with JavaScript MIME essence - should be JavaScript.
59+
'Script tag with application/ecmascript' => array( '<script type="application/ecmascript"></script>', true ),
60+
'Script tag with application/javascript' => array( '<script type="application/javascript"></script>', true ),
61+
'Script tag with application/x-ecmascript' => array( '<script type="application/x-ecmascript"></script>', true ),
62+
'Script tag with application/x-javascript' => array( '<script type="application/x-javascript"></script>', true ),
63+
'Script tag with text/ecmascript' => array( '<script type="text/ecmascript"></script>', true ),
64+
'Script tag with text/javascript' => array( '<script type="text/javascript"></script>', true ),
65+
'Script tag with text/javascript1.0' => array( '<script type="text/javascript1.0"></script>', true ),
66+
'Script tag with text/javascript1.1' => array( '<script type="text/javascript1.1"></script>', true ),
67+
'Script tag with text/javascript1.2' => array( '<script type="text/javascript1.2"></script>', true ),
68+
'Script tag with text/javascript1.3' => array( '<script type="text/javascript1.3"></script>', true ),
69+
'Script tag with text/javascript1.4' => array( '<script type="text/javascript1.4"></script>', true ),
70+
'Script tag with text/javascript1.5' => array( '<script type="text/javascript1.5"></script>', true ),
71+
'Script tag with text/jscript' => array( '<script type="text/jscript"></script>', true ),
72+
'Script tag with text/livescript' => array( '<script type="text/livescript"></script>', true ),
73+
'Script tag with text/x-ecmascript' => array( '<script type="text/x-ecmascript"></script>', true ),
74+
'Script tag with text/x-javascript' => array( '<script type="text/x-javascript"></script>', true ),
75+
76+
// Case-insensitive matching for JavaScript MIME essence.
77+
'Script tag with UPPERCASE type' => array( '<script type="TEXT/JAVASCRIPT"></script>', true ),
78+
'Script tag with MixedCase type' => array( '<script type="Text/JavaScript"></script>', true ),
79+
'Script tag with APPLICATION/JAVASCRIPT' => array( '<script type="APPLICATION/JAVASCRIPT"></script>', true ),
80+
81+
// Script tags with module type - should be JavaScript.
82+
'Script tag with module type' => array( '<script type="module"></script>', true ),
83+
'Script tag with MODULE type uppercase' => array( '<script type="MODULE"></script>', true ),
84+
'Script tag with MoDuLe type mixed case' => array( '<script type="MoDuLe"></script>', true ),
85+
86+
// Script tags with whitespace around type - should strip whitespace.
87+
'Script tag with leading whitespace' => array( '<script type=" text/javascript"></script>', true ),
88+
'Script tag with trailing whitespace' => array( '<script type="text/javascript "></script>', true ),
89+
'Script tag with surrounding whitespace' => array( '<script type=" text/javascript "></script>', true ),
90+
'Script tag with tab whitespace' => array( "<script type=\"\ttext/javascript\t\"></script>", true ),
91+
'Script tag with newline whitespace' => array( "<script type=\"\ntext/javascript\n\"></script>", true ),
92+
'Script tag with mixed whitespace' => array( "<script type=\" \t\ntext/javascript \t\n\"></script>", true ),
93+
94+
// Script tags with language attribute and non-empty value - should use text/{language}.
95+
'Script tag with language="javascript"' => array( '<script language="javascript"></script>', true ),
96+
'Script tag with language="JavaScript"' => array( '<script language="JavaScript"></script>', true ),
97+
'Script tag with language="ecmascript"' => array( '<script language="ecmascript"></script>', true ),
98+
'Script tag with language="jscript"' => array( '<script language="jscript"></script>', true ),
99+
'Script tag with language="livescript"' => array( '<script language="livescript"></script>', true ),
100+
101+
// Non-JavaScript script tags - should NOT be JavaScript.
102+
'Script tag with importmap type' => array( '<script type="importmap"></script>', false ),
103+
'Script tag with speculationrules type' => array( '<script type="speculationrules"></script>', false ),
104+
'Script tag with application/json type' => array( '<script type="application/json"></script>', false ),
105+
'Script tag with text/json type' => array( '<script type="text/json"></script>', false ),
106+
'Script tag with unknown MIME type' => array( '<script type="text/plain"></script>', false ),
107+
'Script tag with application/xml type' => array( '<script type="application/xml"></script>', false ),
108+
'Script tag with random type' => array( '<script type="random/type"></script>', false ),
109+
110+
// Non-script tags - should NOT be JavaScript.
111+
'DIV tag' => array( '<div></div>', false ),
112+
'SPAN tag' => array( '<span></span>', false ),
113+
'P tag' => array( '<p></p>', false ),
114+
);
115+
}
116+
117+
/**
118+
* @ticket 64419
119+
*
120+
* @covers WP_HTML_Tag_Processor::is_javascript_script_tag
121+
*/
122+
public function test_is_javascript_script_tag_returns_false_before_finding_tags() {
123+
$processor = new WP_HTML_Tag_Processor( 'Just some text' );
124+
$processor->next_token();
125+
126+
$this->assertFalse(
127+
$processor->is_javascript_script_tag(),
128+
'Should return false when not stopped on script tag'
129+
);
130+
}
131+
132+
/**
133+
* @ticket 64419
134+
*
135+
* @covers WP_HTML_Tag_Processor::is_javascript_script_tag
136+
*/
137+
public function test_is_javascript_script_tag_returns_false_for_non_html_namespace() {
138+
$processor = new WP_HTML_Tag_Processor( '<script></script>' );
139+
$processor->change_parsing_namespace( 'svg' );
140+
$this->assertFalse(
141+
$processor->is_javascript_script_tag(),
142+
'Should return false for script tags in non-HTML namespace'
143+
);
144+
}
145+
146+
/**
147+
* @ticket 64419
148+
*
149+
* @covers WP_HTML_Tag_Processor::is_json_script_tag
150+
*
151+
* @dataProvider data_is_json_script_tag
152+
*
153+
* @param string $html HTML containing a script tag.
154+
* @param bool $expected_result Whether the script tag should be identified as JSON.
155+
*/
156+
public function test_is_json_script_tag( string $html, bool $expected_result ) {
157+
$processor = new WP_HTML_Tag_Processor( $html );
158+
$processor->next_tag();
159+
$this->assertSame(
160+
$expected_result,
161+
$processor->is_json_script_tag(),
162+
'Failed to correctly identify JSON script tag'
163+
);
164+
}
165+
166+
/**
167+
* Data provider for test_is_json_script_tag.
168+
*
169+
* @return array[]
170+
*/
171+
public static function data_is_json_script_tag(): array {
172+
return array(
173+
// JSON MIME types - should be JSON.
174+
'Script tag with application/json type' => array( '<script type="application/json"></script>', true ),
175+
'Script tag with text/json type' => array( '<script type="text/json"></script>', true ),
176+
177+
// importmap and speculationrules - should be JSON.
178+
'Script tag with importmap type' => array( '<script type="importmap"></script>', true ),
179+
'Script tag with speculationrules type' => array( '<script type="speculationrules"></script>', true ),
180+
181+
// Case-insensitive matching for JSON types.
182+
'Script tag with APPLICATION/JSON uppercase' => array( '<script type="APPLICATION/JSON"></script>', true ),
183+
'Script tag with Text/Json mixed case' => array( '<script type="Text/Json"></script>', true ),
184+
'Script tag with IMPORTMAP uppercase' => array( '<script type="IMPORTMAP"></script>', true ),
185+
'Script tag with ImportMap mixed case' => array( '<script type="ImportMap"></script>', true ),
186+
'Script tag with SPECULATIONRULES uppercase' => array( '<script type="SPECULATIONRULES"></script>', true ),
187+
'Script tag with SpeculationRules mixed' => array( '<script type="SpeculationRules"></script>', true ),
188+
189+
// Whitespace handling - should strip whitespace.
190+
'Script tag with leading whitespace' => array( '<script type=" application/json"></script>', true ),
191+
'Script tag with trailing whitespace' => array( '<script type="application/json "></script>', true ),
192+
'Script tag with surrounding whitespace' => array( '<script type=" application/json "></script>', true ),
193+
'Script tag with tab whitespace' => array( "<script type=\"\tapplication/json\t\"></script>", true ),
194+
'Script tag with newline whitespace' => array( "<script type=\"\napplication/json\n\"></script>", true ),
195+
'Script tag with mixed whitespace' => array( "<script type=\" \t\napplication/json \t\n\"></script>", true ),
196+
197+
// Non-JSON script tags - should NOT be JSON.
198+
'Script tag without type attribute' => array( '<script></script>', false ),
199+
'Script tag with empty type attribute' => array( '<script type=""></script>', false ),
200+
'Script tag with boolean type attribute' => array( '<script type></script>', false ),
201+
202+
// Script tags with falsy but non-empty type attribute.
203+
'Script tag with type="0"' => array( '<script type="0"></script>', false ),
204+
205+
'Script tag with text/javascript type' => array( '<script type="text/javascript"></script>', false ),
206+
'Script tag with module type' => array( '<script type="module"></script>', false ),
207+
'Script tag with unknown MIME type' => array( '<script type="text/plain"></script>', false ),
208+
'Script tag with application/xml type' => array( '<script type="application/xml"></script>', false ),
209+
210+
// Non-script tags - should NOT be JSON.
211+
'DIV tag' => array( '<div></div>', false ),
212+
'SPAN tag' => array( '<span></span>', false ),
213+
'P tag' => array( '<p></p>', false ),
214+
);
215+
}
216+
217+
/**
218+
* @ticket 64419
219+
*
220+
* @covers WP_HTML_Tag_Processor::is_json_script_tag
221+
*/
222+
public function test_is_json_script_tag_returns_false_before_finding_tags() {
223+
$processor = new WP_HTML_Tag_Processor( 'Just some text' );
224+
$processor->next_token();
225+
$this->assertFalse(
226+
$processor->is_json_script_tag(),
227+
'Should return false when not stopped on script tag'
228+
);
229+
}
230+
231+
/**
232+
* @ticket 64419
233+
*
234+
* @covers WP_HTML_Tag_Processor::is_json_script_tag
235+
*/
236+
public function test_is_json_script_tag_returns_false_for_non_html_namespace() {
237+
$processor = new WP_HTML_Tag_Processor( '<script></script>' );
238+
$processor->change_parsing_namespace( 'svg' );
239+
$processor->next_tag();
240+
$this->assertFalse(
241+
$processor->is_json_script_tag(),
242+
'Should return false for script tags in non-HTML namespace'
243+
);
244+
}
245+
}

0 commit comments

Comments
 (0)