Skip to content

Commit 22799c3

Browse files
fix(codegen): escape </script (oxc-project#11782)
Fixes oxc-project#10334. Replace oxc-project#10340 to cover the escape for both template literals and comments. We don’t need to handle regex, which is already covered by existing codegen. Please let me know anything needed to merge this PR. Thanks. --------- Co-authored-by: overlookmotel <[email protected]>
1 parent 7a54493 commit 22799c3

File tree

5 files changed

+107
-36
lines changed

5 files changed

+107
-36
lines changed

crates/oxc_codegen/src/comment.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,15 +128,15 @@ impl Codegen<'_> {
128128
let comment_source = comment.span.source_text(source_text);
129129
match comment.kind {
130130
CommentKind::Line => {
131-
self.print_str(comment_source);
131+
self.print_str_escaping_script_close_tag(comment_source);
132132
}
133133
CommentKind::Block => {
134134
// Print block comments with our own indentation.
135135
for line in comment_source.split(is_line_terminator) {
136136
if !line.starts_with("/*") {
137137
self.print_indent();
138138
}
139-
self.print_str(line.trim_start());
139+
self.print_str_escaping_script_close_tag(line.trim_start());
140140
if !line.ends_with("*/") {
141141
self.print_hard_newline();
142142
}

crates/oxc_codegen/src/gen.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2083,7 +2083,7 @@ impl Gen for TemplateLiteral<'_> {
20832083

20842084
for quasi in &self.quasis {
20852085
p.add_source_mapping(quasi.span);
2086-
p.print_str(quasi.value.raw.as_str());
2086+
p.print_str_escaping_script_close_tag(quasi.value.raw.as_str());
20872087
p.add_source_mapping_end(quasi.span);
20882088

20892089
if let Some(expr) = expressions.next() {

crates/oxc_codegen/src/lib.rs

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@ use oxc_syntax::{
2626
};
2727

2828
use crate::{
29-
binary_expr_visitor::BinaryExpressionVisitor, comment::CommentsMap, operator::Operator,
30-
sourcemap_builder::SourcemapBuilder, str::Quote,
29+
binary_expr_visitor::BinaryExpressionVisitor,
30+
comment::CommentsMap,
31+
operator::Operator,
32+
sourcemap_builder::SourcemapBuilder,
33+
str::{Quote, is_script_close_tag},
3134
};
3235
pub use crate::{
3336
context::Context,
@@ -230,6 +233,40 @@ impl<'a> Codegen<'a> {
230233
self.code.print_str(s);
231234
}
232235

236+
/// Push str into the buffer, escaping `</script` to `<\/script`.
237+
#[inline]
238+
pub fn print_str_escaping_script_close_tag(&mut self, s: &str) {
239+
let slice = s.as_bytes();
240+
let mut consumed = 0;
241+
let mut i = 0;
242+
243+
// Only check when remaining string has length larger than 8.
244+
while i + 8 <= slice.len() {
245+
if is_script_close_tag(&slice[i..i + 8]) {
246+
// Push str up to and including `<`. Skip `/`. Write `\/` instead.
247+
// SAFETY:
248+
// The slice guarantees to be a valid UTF-8 string.
249+
// The consumed index is always pointed to a UTF-8 char boundary.
250+
// Current byte is `<`, a UTF-8 char boundary.
251+
unsafe {
252+
self.code.print_bytes_unchecked(&slice[consumed..=i]);
253+
}
254+
self.code.print_str("\\/");
255+
consumed = i + 2;
256+
i += 8;
257+
} else {
258+
i += 1;
259+
}
260+
}
261+
262+
// SAFETY:
263+
// The slice guarantees to be a valid UTF-8 string.
264+
// The consumed index is always pointed to a UTF-8 char boundary.
265+
unsafe {
266+
self.code.print_bytes_unchecked(&slice[consumed..]);
267+
}
268+
}
269+
233270
/// Print a single [`Expression`], adding it to the code generator's
234271
/// internal buffer. Unlike [`Codegen::build`], this does not consume `self`.
235272
#[inline]

crates/oxc_codegen/src/str.rs

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -340,9 +340,10 @@ enum Escape {
340340
DQ = 11, // " - Double quote
341341
BQ = 12, // ` - Backtick quote
342342
DO = 13, // $ - Dollar sign
343-
LS = 14, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte)
344-
NB = 15, // NBSP - Non-breaking space (first byte)
345-
LO = 16, // � - U+FFFD lossy replacement character (first byte)
343+
LT = 14, // < - Less-than sign
344+
LS = 15, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte)
345+
NB = 16, // NBSP - Non-breaking space (first byte)
346+
LO = 17, // � - U+FFFD lossy replacement character (first byte)
346347
}
347348

348349
/// Struct which ensures content is aligned on 128.
@@ -362,7 +363,7 @@ static ESCAPES: Aligned128<[Escape; 256]> = {
362363
NU, __, __, __, __, __, __, BE, BK, __, NL, VT, FF, CR, __, __, // 0
363364
__, __, __, __, __, __, __, __, __, __, __, ES, __, __, __, __, // 1
364365
__, __, DQ, __, DO, __, __, SQ, __, __, __, __, __, __, __, __, // 2
365-
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
366+
__, __, __, __, __, __, __, __, __, __, __, __, LT, __, __, __, // 3
366367
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
367368
__, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
368369
BQ, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
@@ -385,9 +386,10 @@ type ByteHandler = unsafe fn(&mut Codegen, &mut PrintStringState);
385386
/// Indexed by `escape as usize - 1` (where `escape` is not `Escape::__`).
386387
/// Must be in same order as discriminants in `Escape`.
387388
///
388-
/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 128 bytes in total.
389-
/// Aligned on 128, so occupies a pair of L1 cache lines.
390-
static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
389+
/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 136 bytes in total.
390+
/// Aligned on 128, so first 16 occupy a pair of L1 cache lines.
391+
/// The last will be in separate cache line, but it should be vanishingly rare that it's accessed.
392+
static BYTE_HANDLERS: Aligned128<[ByteHandler; 17]> = Aligned128([
391393
print_null,
392394
print_bell,
393395
print_backspace,
@@ -401,6 +403,7 @@ static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
401403
print_double_quote,
402404
print_backtick,
403405
print_dollar,
406+
print_less_than,
404407
print_ls_or_ps,
405408
print_non_breaking_space,
406409
print_lossy_replacement,
@@ -579,6 +582,29 @@ unsafe fn print_dollar(codegen: &mut Codegen, state: &mut PrintStringState) {
579582
}
580583
}
581584

585+
// <
586+
unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) {
587+
debug_assert_eq!(state.peek(), Some(b'<'));
588+
589+
// Get slice of remaining bytes, including leading `<`
590+
let slice = state.bytes.as_slice();
591+
592+
// SAFETY: Next byte is `<`, which is ASCII
593+
unsafe { state.consume_byte_unchecked() };
594+
595+
if slice.len() >= 8 && is_script_close_tag(&slice[0..8]) {
596+
// Flush up to and including `<`. Skip `/`. Write `\/` instead. Then skip over `script`.
597+
// Next chunk starts with `script`.
598+
// SAFETY: We already consumed `<`. Next byte is `/`, which is ASCII.
599+
unsafe { state.flush_and_consume_byte(codegen) };
600+
codegen.print_str("\\/");
601+
// SAFETY: The check above ensures there are 6 bytes left, after consuming 2 already.
602+
// `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator
603+
// positioned on UTF-8 char boundary.
604+
unsafe { state.consume_bytes_unchecked::<6>() };
605+
}
606+
}
607+
582608
// 0xE2 - first byte of <LS> or <PS>
583609
unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) {
584610
debug_assert_eq!(state.peek(), Some(0xE2));
@@ -696,3 +722,20 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
696722
pub fn cold_branch<F: FnOnce() -> T, T>(f: F) -> T {
697723
f()
698724
}
725+
726+
/// Check if the slice is `</script` regardless of case.
727+
pub fn is_script_close_tag(slice: &[u8]) -> bool {
728+
if slice.len() == 8 {
729+
// Compiler condenses these operations to an 8-byte read, u64 AND, and u64 compare.
730+
// https://godbolt.org/z/oGG16fK6v
731+
let mut slice: [u8; 8] = slice.try_into().unwrap();
732+
for b in slice.iter_mut().skip(2) {
733+
// `| 32` converts ASCII upper case letters to lower case.
734+
*b |= 32;
735+
}
736+
737+
slice == *b"</script"
738+
} else {
739+
false
740+
}
741+
}

crates/oxc_codegen/tests/integration/esbuild.rs

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,45 +1014,36 @@ fn test_jsx_single_line() {
10141014
}
10151015

10161016
#[test]
1017-
#[ignore]
10181017
fn test_avoid_slash_script() {
10191018
// Positive cases
10201019
test("x = '</script'", "x = \"<\\/script\";\n");
1020+
test("x = '</SCRIPT'", "x = \"<\\/SCRIPT\";\n");
1021+
test("x = '</ScRiPt'", "x = \"<\\/ScRiPt\";\n");
1022+
test("x = 'abc </script def'", "x = \"abc <\\/script def\";\n");
1023+
test("x = 'abc </ScRiPt def'", "x = \"abc <\\/ScRiPt def\";\n");
10211024
test("x = `</script`", "x = `<\\/script`;\n");
10221025
test("x = `</SCRIPT`", "x = `<\\/SCRIPT`;\n");
10231026
test("x = `</ScRiPt`", "x = `<\\/ScRiPt`;\n");
10241027
test("x = `</script${y}`", "x = `<\\/script${y}`;\n");
10251028
test("x = `${y}</script`", "x = `${y}<\\/script`;\n");
1029+
test("x = `<</script`", "x = `<<\\/script`;\n");
1030+
test("x = `</</script`", "x = `</<\\/script`;\n");
1031+
test("x = `</script</script`", "x = `<\\/script<\\/script`;\n");
10261032
test_minify("x = 1 < /script/.exec(y).length", "x=1< /script/.exec(y).length;");
10271033
test_minify("x = 1 < /SCRIPT/.exec(y).length", "x=1< /SCRIPT/.exec(y).length;");
10281034
test_minify("x = 1 < /ScRiPt/.exec(y).length", "x=1< /ScRiPt/.exec(y).length;");
10291035
test_minify("x = 1 << /script/.exec(y).length", "x=1<< /script/.exec(y).length;");
10301036
test("//! </script\n//! >/script\n//! /script", "//! <\\/script\n//! >/script\n//! /script\n");
10311037
test("//! </SCRIPT\n//! >/SCRIPT\n//! /SCRIPT", "//! <\\/SCRIPT\n//! >/SCRIPT\n//! /SCRIPT\n");
10321038
test("//! </ScRiPt\n//! >/ScRiPt\n//! /ScRiPt", "//! <\\/ScRiPt\n//! >/ScRiPt\n//! /ScRiPt\n");
1033-
test("/*! </script \n </script */", "/*! <\\/script \n <\\/script */\n");
1034-
test("/*! </SCRIPT \n </SCRIPT */", "/*! <\\/SCRIPT \n <\\/SCRIPT */\n");
1035-
test("/*! </ScRiPt \n </ScRiPt */", "/*! <\\/ScRiPt \n <\\/ScRiPt */\n");
1036-
test(
1037-
"String.raw`</script`",
1038-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\"])));\n",
1039-
);
1040-
test(
1041-
"String.raw`</script${a}`",
1042-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\", \"\"])), a);\n",
1043-
);
1044-
test(
1045-
"String.raw`${a}</script`",
1046-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"\", \"<\\/script\"])), a);\n",
1047-
);
1048-
test(
1049-
"String.raw`</SCRIPT`",
1050-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/SCRIPT\"])));\n",
1051-
);
1052-
test(
1053-
"String.raw`</ScRiPt`",
1054-
"import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/ScRiPt\"])));\n",
1055-
);
1039+
test("/*! </script \n</script */", "/*! <\\/script \n<\\/script */");
1040+
test("/*! </SCRIPT \n</SCRIPT */", "/*! <\\/SCRIPT \n<\\/SCRIPT */");
1041+
test("/*! </ScRiPt \n</ScRiPt */", "/*! <\\/ScRiPt \n<\\/ScRiPt */");
1042+
test("String.raw`</script`", "String.raw`<\\/script`;\n");
1043+
test("String.raw`</script${a}`", "String.raw`<\\/script${a}`;\n");
1044+
test("String.raw`${a}</script`", "String.raw`${a}<\\/script`;\n");
1045+
test("String.raw`</SCRIPT`", "String.raw`<\\/SCRIPT`;\n");
1046+
test("String.raw`</ScRiPt`", "String.raw`<\\/ScRiPt`;\n");
10561047

10571048
// Negative cases
10581049
test("x = '</'", "x = \"</\";\n");

0 commit comments

Comments
 (0)