@@ -1556,24 +1556,33 @@ private function skip_script_data(): bool {
1556
1556
}
1557
1557
1558
1558
/*
1559
- * Unlike with "-->", the "<!--" only transitions
1560
- * into the escaped mode if not already there.
1561
- *
1562
- * Inside the escaped modes it will be ignored; and
1563
- * should never break out of the double-escaped
1564
- * mode and back into the escaped mode.
1565
- *
1566
- * While this requires a mode change, it does not
1567
- * impact the parsing otherwise, so continue
1568
- * parsing after updating the state.
1559
+ * "<!--" only transitions from _unescaped_ to _escaped_. This byte sequence is only
1560
+ * significant in the _unescaped_ state and is ignored in any other state.
1569
1561
*/
1570
1562
if (
1563
+ 'unescaped ' === $ state &&
1571
1564
'! ' === $ html [ $ at ] &&
1572
1565
'- ' === $ html [ $ at + 1 ] &&
1573
1566
'- ' === $ html [ $ at + 2 ]
1574
1567
) {
1575
- $ at += 3 ;
1576
- $ state = 'unescaped ' === $ state ? 'escaped ' : $ state ;
1568
+ $ at += 3 ;
1569
+
1570
+ /*
1571
+ * The parser is ready to enter the _escaped_ state, but may remain in the
1572
+ * _unescaped_ state. This occurs when "<!--" is immediately followed by a
1573
+ * sequence of 0 or more "-" followed by ">". This is similar to abruptly closed
1574
+ * HTML comments like "<!-->" or "<!--->".
1575
+ *
1576
+ * Note that this check may advance the position significantly and requires a
1577
+ * length check to prevent bad offsets on inputs like `<script><!---------`.
1578
+ */
1579
+ $ at += strspn ( $ html , '- ' , $ at );
1580
+ if ( $ at < $ doc_length && '> ' === $ html [ $ at ] ) {
1581
+ ++$ at ;
1582
+ continue ;
1583
+ }
1584
+
1585
+ $ state = 'escaped ' ;
1577
1586
continue ;
1578
1587
}
1579
1588
@@ -1610,8 +1619,30 @@ private function skip_script_data(): bool {
1610
1619
*/
1611
1620
$ at += 6 ;
1612
1621
$ c = $ html [ $ at ];
1613
- if ( ' ' !== $ c && "\t" !== $ c && "\r" !== $ c && "\n" !== $ c && '/ ' !== $ c && '> ' !== $ c ) {
1614
- ++$ at ;
1622
+ if (
1623
+ /**
1624
+ * These characters trigger state transitions of interest:
1625
+ *
1626
+ * - @see {https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state}
1627
+ * - @see {https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state}
1628
+ * - @see {https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state}
1629
+ * - @see {https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state}
1630
+ *
1631
+ * The "\r" character is not present in the above references. However, "\r" must be
1632
+ * treated the same as "\n". This is because the HTML Standard requires newline
1633
+ * normalization during preprocessing which applies this replacement.
1634
+ *
1635
+ * - @see https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
1636
+ * - @see https://infra.spec.whatwg.org/#normalize-newlines
1637
+ */
1638
+ '> ' !== $ c &&
1639
+ ' ' !== $ c &&
1640
+ "\n" !== $ c &&
1641
+ '/ ' !== $ c &&
1642
+ "\t" !== $ c &&
1643
+ "\f" !== $ c &&
1644
+ "\r" !== $ c
1645
+ ) {
1615
1646
continue ;
1616
1647
}
1617
1648
0 commit comments