Skip to content

Commit 54d5c8e

Browse files
committed
HTML API: Reduce length checks in skip_script_data.
Apply an optimization to remove several repeated string length checks in `WP_HTML_Tag_Processor::skip_script_data()`. Developed in WordPress/wordpress-develop#9230. Props jonsurrell, dmsnell. See #63738. Built from https://develop.svn.wordpress.org/trunk@60617 git-svn-id: https://core.svn.wordpress.org/trunk@59953 1a063a9b-81f0-0310-95a4-ce76da25c4cd
1 parent 6c2bd04 commit 54d5c8e

File tree

2 files changed

+37
-13
lines changed

2 files changed

+37
-13
lines changed

wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,13 +1496,48 @@ private function skip_script_data(): bool {
14961496
while ( false !== $at && $at < $doc_length ) {
14971497
$at += strcspn( $html, '-<', $at );
14981498

1499+
/*
1500+
* Optimization: Terminating a complete script element requires at least eight
1501+
* additional bytes in the document. Some checks below may cause local escaped
1502+
* state transitions when processing shorter strings, but those transitions are
1503+
* irrelevant if the script tag is incomplete and the function must return false.
1504+
*
1505+
* This may need updating if those transitions become significant or exported from
1506+
* this function in some way, such as when building safe methods to embed JavaScript
1507+
* or data inside a SCRIPT element.
1508+
*
1509+
* $at may be here.
1510+
* ↓
1511+
* ...</script>
1512+
* ╰──┬───╯
1513+
* $at + 8 additional bytes are required for a non-false return value.
1514+
*
1515+
* This single check eliminates the need to check lengths for the shorter spans:
1516+
*
1517+
* $at may be here.
1518+
* ↓
1519+
* <script><!-- --></script>
1520+
* ├╯
1521+
* $at + 2 additional characters does not require a length check.
1522+
*
1523+
* The transition from "escaped" to "unescaped" is not relevant if the document ends:
1524+
*
1525+
* $at may be here.
1526+
* ↓
1527+
* <script><!-- -->[[END-OF-DOCUMENT]]
1528+
* ╰──┬───╯
1529+
* $at + 8 additional bytes is not satisfied, return false.
1530+
*/
1531+
if ( $at + 8 >= $doc_length ) {
1532+
return false;
1533+
}
1534+
14991535
/*
15001536
* For all script states a "-->" transitions
15011537
* back into the normal unescaped script mode,
15021538
* even if that's the current state.
15031539
*/
15041540
if (
1505-
$at + 2 < $doc_length &&
15061541
'-' === $html[ $at ] &&
15071542
'-' === $html[ $at + 1 ] &&
15081543
'>' === $html[ $at + 2 ]
@@ -1512,10 +1547,6 @@ private function skip_script_data(): bool {
15121547
continue;
15131548
}
15141549

1515-
if ( $at + 1 >= $doc_length ) {
1516-
return false;
1517-
}
1518-
15191550
/*
15201551
* Everything of interest past here starts with "<".
15211552
* Check this character and advance position regardless.
@@ -1537,7 +1568,6 @@ private function skip_script_data(): bool {
15371568
* parsing after updating the state.
15381569
*/
15391570
if (
1540-
$at + 2 < $doc_length &&
15411571
'!' === $html[ $at ] &&
15421572
'-' === $html[ $at + 1 ] &&
15431573
'-' === $html[ $at + 2 ]
@@ -1561,7 +1591,6 @@ private function skip_script_data(): bool {
15611591
* proceed scanning to the next potential token in the text.
15621592
*/
15631593
if ( ! (
1564-
$at + 6 < $doc_length &&
15651594
( 's' === $html[ $at ] || 'S' === $html[ $at ] ) &&
15661595
( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) &&
15671596
( 'r' === $html[ $at + 2 ] || 'R' === $html[ $at + 2 ] ) &&
@@ -1579,9 +1608,6 @@ private function skip_script_data(): bool {
15791608
* "<script123" should not end a script region even though
15801609
* "<script" is found within the text.
15811610
*/
1582-
if ( $at + 6 >= $doc_length ) {
1583-
continue;
1584-
}
15851611
$at += 6;
15861612
$c = $html[ $at ];
15871613
if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) {
@@ -1611,8 +1637,6 @@ private function skip_script_data(): bool {
16111637
}
16121638

16131639
if ( $this->bytes_already_parsed >= $doc_length ) {
1614-
$this->parser_state = self::STATE_INCOMPLETE_INPUT;
1615-
16161640
return false;
16171641
}
16181642

wp-includes/version.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
*
1717
* @global string $wp_version
1818
*/
19-
$wp_version = '6.9-alpha-60616';
19+
$wp_version = '6.9-alpha-60617';
2020

2121
/**
2222
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.

0 commit comments

Comments
 (0)