Skip to content

Commit 13d5244

Browse files
committed
Improve legibility of JSON-encoded Interactivity API store data.
The Interactivity API has been rendering client data in a SCRIPT element with the type `application/json` so that it's not executed as a script, but is available to one. The data runs through `wp_json_encode()` and is encoded with some flags to ensure that potentially-dangerous characters are escaped. However, this can lead to some challenges. Eagerly escaping when not necessary can make the data difficult to comprehend when reading the output HTML. For example, all non-ASCII Unicode characters are escaped with their code point equivalent. This results in `\ud83c\udd70` instead of `🅰`. In this patch, the flags for JSON encoding are refined to ensure what's necessary while relaxing other rules (leaving in those Unicode characters if the blog charset is UTF-8). This makes for Interactivity API data that's quicker as a human reader to decipher and diagnose. In summary: - This data is JSON encoded and printed in a `<script type="application/json">` tag. - If we ensure that `<` is never printed inside the data, it should be impossible to break out of the script tag and the browser treats everything as the element's `textContent`. - All other escaping becomes unnecessary at that point, including unicode escaping if the page uses the UTF-8 charset (the same encoding as JSON). See WordPress#6433 (review) Developed in WordPress#6520 Discussed in https://core.trac.wordpress.org/ticket/61170 Fixes: #61170 Follow-up to: [57563]. Props: bjorsch, dmsnell, jonsurrell, sabernhardt, westonruter. git-svn-id: https://develop.svn.wordpress.org/trunk@58159 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 3d0a228 commit 13d5244

File tree

2 files changed

+96
-7
lines changed

2 files changed

+96
-7
lines changed

src/wp-includes/interactivity-api/class-wp-interactivity-api.php

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,41 @@ public function print_client_interactivity_data() {
167167
}
168168

169169
if ( ! empty( $interactivity_data ) ) {
170+
/*
171+
* This data will be printed as JSON inside a script tag like this:
172+
* <script type="application/json"></script>
173+
*
174+
* A script tag must be closed by a sequence beginning with `</`. It's impossible to
175+
* close a script tag without using `<`. We ensure that `<` is escaped and `/` can
176+
* remain unescaped, so `</script>` will be printed as `\u003C/script\u00E3`.
177+
*
178+
* - JSON_HEX_TAG: All < and > are converted to \u003C and \u003E.
179+
* - JSON_UNESCAPED_SLASHES: Don't escape /.
180+
*
181+
* If the page will use UTF-8 encoding, it's safe to print unescaped unicode:
182+
*
183+
* - JSON_UNESCAPED_UNICODE: Encode multibyte Unicode characters literally (instead of as `\uXXXX`).
184+
* - JSON_UNESCAPED_LINE_TERMINATORS: The line terminators are kept unescaped when
185+
* JSON_UNESCAPED_UNICODE is supplied. It uses the same behaviour as it was
186+
* before PHP 7.1 without this constant. Available as of PHP 7.1.0.
187+
*
188+
* The JSON specification requires encoding in UTF-8, so if the generated HTML page
189+
* is not encoded in UTF-8 then it's not safe to include those literals. They must
190+
* be escaped to avoid encoding issues.
191+
*
192+
* @see https://www.rfc-editor.org/rfc/rfc8259.html for details on encoding requirements.
193+
* @see https://www.php.net/manual/en/json.constants.php for details on these constants.
194+
* @see https://html.spec.whatwg.org/#script-data-state for details on script tag parsing.
195+
*/
196+
$json_encode_flags = JSON_HEX_TAG | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_LINE_TERMINATORS;
197+
if ( ! is_utf8_charset() ) {
198+
$json_encode_flags = JSON_HEX_TAG | JSON_UNESCAPED_SLASHES;
199+
}
200+
170201
wp_print_inline_script_tag(
171202
wp_json_encode(
172203
$interactivity_data,
173-
JSON_HEX_TAG | JSON_HEX_AMP
204+
$json_encode_flags
174205
),
175206
array(
176207
'type' => 'application/json',

tests/phpunit/tests/interactivity-api/wpInteractivityAPI.php

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ public function set_up() {
2727
$this->interactivity = new WP_Interactivity_API();
2828
}
2929

30+
public function charset_iso_8859_1() {
31+
return 'iso-8859-1';
32+
}
33+
3034
/**
3135
* Tests that the state and config methods return an empty array at the
3236
* beginning.
@@ -349,22 +353,76 @@ public function test_config_printed_correctly_with_nested_empty_array() {
349353
* properly escaped.
350354
*
351355
* @ticket 60356
356+
* @ticket 61170
352357
*
353358
* @covers ::state
354359
* @covers ::config
355360
* @covers ::print_client_interactivity_data
356361
*/
357362
public function test_state_and_config_escape_special_characters() {
358-
$this->interactivity->state( 'myPlugin', array( 'amps' => 'http://site.test/?foo=1&baz=2' ) );
359-
$this->interactivity->config( 'myPlugin', array( 'tags' => 'Tags: <!-- <script>' ) );
363+
$this->interactivity->state(
364+
'myPlugin',
365+
array(
366+
'ampersand' => '&',
367+
'less-than sign' => '<',
368+
'greater-than sign' => '>',
369+
'solidus' => '/',
370+
'line separator' => "\u{2028}",
371+
'paragraph separator' => "\u{2029}",
372+
'flag of england' => "\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}\u{E007F}",
373+
'malicious script closer' => '</script>',
374+
'entity-encoded malicious script closer' => '&lt;/script&gt;',
375+
)
376+
);
377+
$this->interactivity->config( 'myPlugin', array( 'chars' => '&<>/' ) );
360378

361379
$interactivity_data_markup = get_echo( array( $this->interactivity, 'print_client_interactivity_data' ) );
362-
preg_match( '/<script type="application\/json" id="wp-interactivity-data">.*?(\{.*\}).*?<\/script>/s', $interactivity_data_markup, $interactivity_data_string );
380+
preg_match( '~<script type="application/json" id="wp-interactivity-data">\s*(\{.*\})\s*</script>~s', $interactivity_data_markup, $interactivity_data_string );
363381

364-
$this->assertEquals(
365-
'{"config":{"myPlugin":{"tags":"Tags: \u003C!-- \u003Cscript\u003E"}},"state":{"myPlugin":{"amps":"http:\/\/site.test\/?foo=1\u0026baz=2"}}}',
366-
$interactivity_data_string[1]
382+
$expected = <<<"JSON"
383+
{"config":{"myPlugin":{"chars":"&\\u003C\\u003E/"}},"state":{"myPlugin":{"ampersand":"&","less-than sign":"\\u003C","greater-than sign":"\\u003E","solidus":"/","line separator":"\u{2028}","paragraph separator":"\u{2029}","flag of england":"\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}\u{E007F}","malicious script closer":"\\u003C/script\\u003E","entity-encoded malicious script closer":"&lt;/script&gt;"}}}
384+
JSON;
385+
$this->assertEquals( $expected, $interactivity_data_string[1] );
386+
}
387+
388+
/**
389+
* Tests that special characters in the initial state and configuration are
390+
* properly escaped when the blog_charset is not UTF-8 (unicode compatible).
391+
*
392+
* This this test, unicode and line terminators should be escaped to their
393+
* JSON unicode sequences.
394+
*
395+
* @ticket 61170
396+
*
397+
* @covers ::state
398+
* @covers ::config
399+
* @covers ::print_client_interactivity_data
400+
*/
401+
public function test_state_and_config_escape_special_characters_non_utf8() {
402+
add_filter( 'pre_option_blog_charset', array( $this, 'charset_iso_8859_1' ) );
403+
$this->interactivity->state(
404+
'myPlugin',
405+
array(
406+
'ampersand' => '&',
407+
'less-than sign' => '<',
408+
'greater-than sign' => '>',
409+
'solidus' => '/',
410+
'line separator' => "\u{2028}",
411+
'paragraph separator' => "\u{2029}",
412+
'flag of england' => "\u{1F3F4}\u{E0067}\u{E0062}\u{E0065}\u{E006E}\u{E0067}\u{E007F}",
413+
'malicious script closer' => '</script>',
414+
'entity-encoded malicious script closer' => '&lt;/script&gt;',
415+
)
367416
);
417+
$this->interactivity->config( 'myPlugin', array( 'chars' => '&<>/' ) );
418+
419+
$interactivity_data_markup = get_echo( array( $this->interactivity, 'print_client_interactivity_data' ) );
420+
preg_match( '~<script type="application/json" id="wp-interactivity-data">\s*(\{.*\})\s*</script>~s', $interactivity_data_markup, $interactivity_data_string );
421+
422+
$expected = <<<"JSON"
423+
{"config":{"myPlugin":{"chars":"&\\u003C\\u003E/"}},"state":{"myPlugin":{"ampersand":"&","less-than sign":"\\u003C","greater-than sign":"\\u003E","solidus":"/","line separator":"\\u2028","paragraph separator":"\\u2029","flag of england":"\\ud83c\\udff4\\udb40\\udc67\\udb40\\udc62\\udb40\\udc65\\udb40\\udc6e\\udb40\\udc67\\udb40\\udc7f","malicious script closer":"\\u003C/script\\u003E","entity-encoded malicious script closer":"&lt;/script&gt;"}}}
424+
JSON;
425+
$this->assertEquals( $expected, $interactivity_data_string[1] );
368426
}
369427

370428
/**

0 commit comments

Comments
 (0)