@@ -1496,13 +1496,48 @@ private function skip_script_data(): bool {
1496
1496
while ( false !== $ at && $ at < $ doc_length ) {
1497
1497
$ at += strcspn ( $ html , '-< ' , $ at );
1498
1498
1499
+ /*
1500
+ * Optimization: Terminating a complete script element requires at least eight
1501
+ * additional bytes in the document. Some checks below may cause local escaped
1502
+ * state transitions when processing shorter strings, but those transitions are
1503
+ * irrelevant if the script tag is incomplete and the function must return false.
1504
+ *
1505
+ * This may need updating if those transitions become significant or exported from
1506
+ * this function in some way, such as when building safe methods to embed JavaScript
1507
+ * or data inside a SCRIPT element.
1508
+ *
1509
+ * $at may be here.
1510
+ * ↓
1511
+ * ...</script>
1512
+ * ╰──┬───╯
1513
+ * $at + 8 additional bytes are required for a non-false return value.
1514
+ *
1515
+ * This single check eliminates the need to check lengths for the shorter spans:
1516
+ *
1517
+ * $at may be here.
1518
+ * ↓
1519
+ * <script><!-- --></script>
1520
+ * ├╯
1521
+ * $at + 2 additional characters does not require a length check.
1522
+ *
1523
+ * The transition from "escaped" to "unescaped" is not relevant if the document ends:
1524
+ *
1525
+ * $at may be here.
1526
+ * ↓
1527
+ * <script><!-- -->[[END-OF-DOCUMENT]]
1528
+ * ╰──┬───╯
1529
+ * $at + 8 additional bytes is not satisfied, return false.
1530
+ */
1531
+ if ( $ at + 8 >= $ doc_length ) {
1532
+ return false ;
1533
+ }
1534
+
1499
1535
/*
1500
1536
* For all script states a "-->" transitions
1501
1537
* back into the normal unescaped script mode,
1502
1538
* even if that's the current state.
1503
1539
*/
1504
1540
if (
1505
- $ at + 2 < $ doc_length &&
1506
1541
'- ' === $ html [ $ at ] &&
1507
1542
'- ' === $ html [ $ at + 1 ] &&
1508
1543
'> ' === $ html [ $ at + 2 ]
@@ -1512,10 +1547,6 @@ private function skip_script_data(): bool {
1512
1547
continue ;
1513
1548
}
1514
1549
1515
- if ( $ at + 1 >= $ doc_length ) {
1516
- return false ;
1517
- }
1518
-
1519
1550
/*
1520
1551
* Everything of interest past here starts with "<".
1521
1552
* Check this character and advance position regardless.
@@ -1537,7 +1568,6 @@ private function skip_script_data(): bool {
1537
1568
* parsing after updating the state.
1538
1569
*/
1539
1570
if (
1540
- $ at + 2 < $ doc_length &&
1541
1571
'! ' === $ html [ $ at ] &&
1542
1572
'- ' === $ html [ $ at + 1 ] &&
1543
1573
'- ' === $ html [ $ at + 2 ]
@@ -1561,7 +1591,6 @@ private function skip_script_data(): bool {
1561
1591
* proceed scanning to the next potential token in the text.
1562
1592
*/
1563
1593
if ( ! (
1564
- $ at + 6 < $ doc_length &&
1565
1594
( 's ' === $ html [ $ at ] || 'S ' === $ html [ $ at ] ) &&
1566
1595
( 'c ' === $ html [ $ at + 1 ] || 'C ' === $ html [ $ at + 1 ] ) &&
1567
1596
( 'r ' === $ html [ $ at + 2 ] || 'R ' === $ html [ $ at + 2 ] ) &&
@@ -1579,9 +1608,6 @@ private function skip_script_data(): bool {
1579
1608
* "<script123" should not end a script region even though
1580
1609
* "<script" is found within the text.
1581
1610
*/
1582
- if ( $ at + 6 >= $ doc_length ) {
1583
- continue ;
1584
- }
1585
1611
$ at += 6 ;
1586
1612
$ c = $ html [ $ at ];
1587
1613
if ( ' ' !== $ c && "\t" !== $ c && "\r" !== $ c && "\n" !== $ c && '/ ' !== $ c && '> ' !== $ c ) {
@@ -1611,8 +1637,6 @@ private function skip_script_data(): bool {
1611
1637
}
1612
1638
1613
1639
if ( $ this ->bytes_already_parsed >= $ doc_length ) {
1614
- $ this ->parser_state = self ::STATE_INCOMPLETE_INPUT ;
1615
-
1616
1640
return false ;
1617
1641
}
1618
1642
0 commit comments