-
Notifications
You must be signed in to change notification settings - Fork 3k
HTML API: Improve script tag escape state processing #9397
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from 15 commits
6ad9951
b3b3177
ca16e0e
0456be7
ea6f7d3
4be62b9
df2affa
d0cbb00
69f3bce
c509f9d
de91e09
f041a9c
2b6833c
bba0547
728d13f
1b4478f
d22ef9a
360d896
9fd074f
840f6aa
f7bcfb4
e9dd022
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1496,13 +1496,39 @@ private function skip_script_data(): bool { | |
while ( false !== $at && $at < $doc_length ) { | ||
$at += strcspn( $html, '-<', $at ); | ||
|
||
/* | ||
* *IMPORTANT:* Any changes to this loop *must* ensure the conditions described in this | ||
* comment remain valid. | ||
* | ||
* The rest of this loop matches different byte sequences. If a script close tag is not | ||
* found, the function will return false. The script close tag is the longest byte | ||
* sequenced to match. Therefore, a single length check for at least 8 additional | ||
* bytes allows for an early `false` return OR subsequent matches without length checks. | ||
* | ||
* $at may be here. | ||
* ↓ | ||
* </script> | ||
* ╰──┬───╯ | ||
* $at + 8 additional bytes are required for a non-false return value. | ||
* | ||
* The length of shorter matches is already satisfied: | ||
* | ||
* $at may be here. | ||
* ↓ | ||
* --> | ||
* ├╯ | ||
* $at + 2 additional characters does not require an additional length check. | ||
*/ | ||
if ( $at + 8 >= $doc_length ) { | ||
return false; | ||
} | ||
|
||
/* | ||
* For all script states a "-->" transitions | ||
* back into the normal unescaped script mode, | ||
* even if that's the current state. | ||
*/ | ||
if ( | ||
$at + 2 < $doc_length && | ||
'-' === $html[ $at ] && | ||
'-' === $html[ $at + 1 ] && | ||
'>' === $html[ $at + 2 ] | ||
|
@@ -1512,10 +1538,6 @@ private function skip_script_data(): bool { | |
continue; | ||
} | ||
|
||
if ( $at + 1 >= $doc_length ) { | ||
return false; | ||
} | ||
|
||
/* | ||
* Everything of interest past here starts with "<". | ||
* Check this character and advance position regardless. | ||
|
@@ -1537,13 +1559,29 @@ private function skip_script_data(): bool { | |
* parsing after updating the state. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do these changes interact with the existing comment? Did we get this comment wrong the first time? Did you review the comment above for updates? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment isn't wrong, although I don't understand exactly what the last paragraph is trying to say. I've pushed a change to clarify and simplify it. |
||
*/ | ||
if ( | ||
$at + 2 < $doc_length && | ||
'unescaped' === $state && | ||
'!' === $html[ $at ] && | ||
'-' === $html[ $at + 1 ] && | ||
'-' === $html[ $at + 2 ] | ||
) { | ||
$at += 3; | ||
$state = 'unescaped' === $state ? 'escaped' : $state; | ||
$at += 3; | ||
|
||
/* | ||
* The parser is ready to enter the `escaped` state but may remain in the | ||
* `unescaped` state if there is immediately is a sequence of any number of 0 or | ||
* more "-" characters followed by ">". This is similar to abruptly closed HTML | ||
* comments like "<!-->" or "<!--->". | ||
* | ||
* Note that this check may have advanced the position significantly and requires | ||
* a length check to prevent bad offsets on inputs like `<script><!---------`. | ||
*/ | ||
$at += strspn( $html, '-', $at ); | ||
if ( $at < $doc_length && '>' === $html[ $at ] ) { | ||
++$at; | ||
continue; | ||
} | ||
|
||
$state = 'escaped'; | ||
continue; | ||
} | ||
|
||
|
@@ -1561,7 +1599,6 @@ private function skip_script_data(): bool { | |
* proceed scanning to the next potential token in the text. | ||
*/ | ||
if ( ! ( | ||
$at + 6 < $doc_length && | ||
( 's' === $html[ $at ] || 'S' === $html[ $at ] ) && | ||
( 'c' === $html[ $at + 1 ] || 'C' === $html[ $at + 1 ] ) && | ||
( 'r' === $html[ $at + 2 ] || 'R' === $html[ $at + 2 ] ) && | ||
|
@@ -1579,13 +1616,9 @@ private function skip_script_data(): bool { | |
* "<script123" should not end a script region even though | ||
* "<script" is found within the text. | ||
*/ | ||
if ( $at + 6 >= $doc_length ) { | ||
continue; | ||
} | ||
$at += 6; | ||
$c = $html[ $at ]; | ||
if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) { | ||
++$at; | ||
continue; | ||
} | ||
|
||
|
@@ -1611,8 +1644,6 @@ private function skip_script_data(): bool { | |
} | ||
|
||
if ( $this->bytes_already_parsed >= $doc_length ) { | ||
$this->parser_state = self::STATE_INCOMPLETE_INPUT; | ||
|
||
return false; | ||
} | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.