fix(codegen): escape </script (oxc-project#11782)

xu-cheng · overlookmotel · web-flow · commit 22799c3f4173 · 2025-07-02T18:10:54.000+01:00
Fixes oxc-project#10334. Replace oxc-project#10340 to cover the escape for both template literals and comments. We don’t need to handle regex, which is already covered by existing codegen. Please let me know anything needed to merge this PR. Thanks. --------- Co-authored-by: overlookmotel <theoverlookmotel@gmail.com>
diff --git a/crates/oxc_codegen/src/comment.rs b/crates/oxc_codegen/src/comment.rs
@@ -128,15 +128,15 @@ impl Codegen<'_> {
         let comment_source = comment.span.source_text(source_text);
         match comment.kind {
             CommentKind::Line => {
-                self.print_str(comment_source);
+                self.print_str_escaping_script_close_tag(comment_source);
             }
             CommentKind::Block => {
                 // Print block comments with our own indentation.
                 for line in comment_source.split(is_line_terminator) {
                     if !line.starts_with("/*") {
                         self.print_indent();
                     }
-                    self.print_str(line.trim_start());
+                    self.print_str_escaping_script_close_tag(line.trim_start());
                     if !line.ends_with("*/") {
                         self.print_hard_newline();
                     }
diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs
@@ -2083,7 +2083,7 @@ impl Gen for TemplateLiteral<'_> {
 
         for quasi in &self.quasis {
             p.add_source_mapping(quasi.span);
-            p.print_str(quasi.value.raw.as_str());
+            p.print_str_escaping_script_close_tag(quasi.value.raw.as_str());
             p.add_source_mapping_end(quasi.span);
 
             if let Some(expr) = expressions.next() {
diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs
@@ -26,8 +26,11 @@ use oxc_syntax::{
 };
 
 use crate::{
-    binary_expr_visitor::BinaryExpressionVisitor, comment::CommentsMap, operator::Operator,
-    sourcemap_builder::SourcemapBuilder, str::Quote,
+    binary_expr_visitor::BinaryExpressionVisitor,
+    comment::CommentsMap,
+    operator::Operator,
+    sourcemap_builder::SourcemapBuilder,
+    str::{Quote, is_script_close_tag},
 };
 pub use crate::{
     context::Context,
@@ -230,6 +233,40 @@ impl<'a> Codegen<'a> {
         self.code.print_str(s);
     }
 
+    /// Push str into the buffer, escaping `</script` to `<\/script`.
+    #[inline]
+    pub fn print_str_escaping_script_close_tag(&mut self, s: &str) {
+        let slice = s.as_bytes();
+        let mut consumed = 0;
+        let mut i = 0;
+
+        // Only check when remaining string has length larger than 8.
+        while i + 8 <= slice.len() {
+            if is_script_close_tag(&slice[i..i + 8]) {
+                // Push str up to and including `<`. Skip `/`. Write `\/` instead.
+                // SAFETY:
+                // The slice guarantees to be a valid UTF-8 string.
+                // The consumed index is always pointed to a UTF-8 char boundary.
+                // Current byte is `<`, a UTF-8 char boundary.
+                unsafe {
+                    self.code.print_bytes_unchecked(&slice[consumed..=i]);
+                }
+                self.code.print_str("\\/");
+                consumed = i + 2;
+                i += 8;
+            } else {
+                i += 1;
+            }
+        }
+
+        // SAFETY:
+        // The slice guarantees to be a valid UTF-8 string.
+        // The consumed index is always pointed to a UTF-8 char boundary.
+        unsafe {
+            self.code.print_bytes_unchecked(&slice[consumed..]);
+        }
+    }
+
     /// Print a single [`Expression`], adding it to the code generator's
     /// internal buffer. Unlike [`Codegen::build`], this does not consume `self`.
     #[inline]
diff --git a/crates/oxc_codegen/src/str.rs b/crates/oxc_codegen/src/str.rs
@@ -340,9 +340,10 @@ enum Escape {
     DQ = 11, // "     - Double quote
     BQ = 12, // `     - Backtick quote
     DO = 13, // $     - Dollar sign
-    LS = 14, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte)
-    NB = 15, // NBSP  - Non-breaking space (first byte)
-    LO = 16, // �     - U+FFFD lossy replacement character (first byte)
+    LT = 14, // <     - Less-than sign
+    LS = 15, // LS/PS - U+2028 LINE SEPARATOR or U+2029 PARAGRAPH SEPARATOR (first byte)
+    NB = 16, // NBSP  - Non-breaking space (first byte)
+    LO = 17, // �     - U+FFFD lossy replacement character (first byte)
 }
 
 /// Struct which ensures content is aligned on 128.
@@ -362,7 +363,7 @@ static ESCAPES: Aligned128<[Escape; 256]> = {
         NU, __, __, __, __, __, __, BE, BK, __, NL, VT, FF, CR, __, __, // 0
         __, __, __, __, __, __, __, __, __, __, __, ES, __, __, __, __, // 1
         __, __, DQ, __, DO, __, __, SQ, __, __, __, __, __, __, __, __, // 2
-        __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
+        __, __, __, __, __, __, __, __, __, __, __, __, LT, __, __, __, // 3
         __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
         __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
         BQ, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
@@ -385,9 +386,10 @@ type ByteHandler = unsafe fn(&mut Codegen, &mut PrintStringState);
 /// Indexed by `escape as usize - 1` (where `escape` is not `Escape::__`).
 /// Must be in same order as discriminants in `Escape`.
 ///
-/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 128 bytes in total.
-/// Aligned on 128, so occupies a pair of L1 cache lines.
-static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
+/// Function pointers are 8 bytes each, so `BYTE_HANDLERS` is 136 bytes in total.
+/// Aligned on 128, so first 16 occupy a pair of L1 cache lines.
+/// The last will be in separate cache line, but it should be vanishingly rare that it's accessed.
+static BYTE_HANDLERS: Aligned128<[ByteHandler; 17]> = Aligned128([
     print_null,
     print_bell,
     print_backspace,
@@ -401,6 +403,7 @@ static BYTE_HANDLERS: Aligned128<[ByteHandler; 16]> = Aligned128([
     print_double_quote,
     print_backtick,
     print_dollar,
+    print_less_than,
     print_ls_or_ps,
     print_non_breaking_space,
     print_lossy_replacement,
@@ -579,6 +582,29 @@ unsafe fn print_dollar(codegen: &mut Codegen, state: &mut PrintStringState) {
     }
 }
 
+// <
+unsafe fn print_less_than(codegen: &mut Codegen, state: &mut PrintStringState) {
+    debug_assert_eq!(state.peek(), Some(b'<'));
+
+    // Get slice of remaining bytes, including leading `<`
+    let slice = state.bytes.as_slice();
+
+    // SAFETY: Next byte is `<`, which is ASCII
+    unsafe { state.consume_byte_unchecked() };
+
+    if slice.len() >= 8 && is_script_close_tag(&slice[0..8]) {
+        // Flush up to and including `<`. Skip `/`. Write `\/` instead. Then skip over `script`.
+        // Next chunk starts with `script`.
+        // SAFETY: We already consumed `<`. Next byte is `/`, which is ASCII.
+        unsafe { state.flush_and_consume_byte(codegen) };
+        codegen.print_str("\\/");
+        // SAFETY: The check above ensures there are 6 bytes left, after consuming 2 already.
+        // `script` / `SCRIPT` is all ASCII bytes, so skipping them leaves `bytes` iterator
+        // positioned on UTF-8 char boundary.
+        unsafe { state.consume_bytes_unchecked::<6>() };
+    }
+}
+
 // 0xE2 - first byte of <LS> or <PS>
 unsafe fn print_ls_or_ps(codegen: &mut Codegen, state: &mut PrintStringState) {
     debug_assert_eq!(state.peek(), Some(0xE2));
@@ -696,3 +722,20 @@ unsafe fn print_lossy_replacement(codegen: &mut Codegen, state: &mut PrintString
 pub fn cold_branch<F: FnOnce() -> T, T>(f: F) -> T {
     f()
 }
+
+/// Check if the slice is `</script` regardless of case.
+pub fn is_script_close_tag(slice: &[u8]) -> bool {
+    if slice.len() == 8 {
+        // Compiler condenses these operations to an 8-byte read, u64 AND, and u64 compare.
+        // https://godbolt.org/z/oGG16fK6v
+        let mut slice: [u8; 8] = slice.try_into().unwrap();
+        for b in slice.iter_mut().skip(2) {
+            // `| 32` converts ASCII upper case letters to lower case.
+            *b |= 32;
+        }
+
+        slice == *b"</script"
+    } else {
+        false
+    }
+}
diff --git a/crates/oxc_codegen/tests/integration/esbuild.rs b/crates/oxc_codegen/tests/integration/esbuild.rs
@@ -1014,45 +1014,36 @@ fn test_jsx_single_line() {
 }
 
 #[test]
-#[ignore]
 fn test_avoid_slash_script() {
     // Positive cases
     test("x = '</script'", "x = \"<\\/script\";\n");
+    test("x = '</SCRIPT'", "x = \"<\\/SCRIPT\";\n");
+    test("x = '</ScRiPt'", "x = \"<\\/ScRiPt\";\n");
+    test("x = 'abc </script def'", "x = \"abc <\\/script def\";\n");
+    test("x = 'abc </ScRiPt def'", "x = \"abc <\\/ScRiPt def\";\n");
     test("x = `</script`", "x = `<\\/script`;\n");
     test("x = `</SCRIPT`", "x = `<\\/SCRIPT`;\n");
     test("x = `</ScRiPt`", "x = `<\\/ScRiPt`;\n");
     test("x = `</script${y}`", "x = `<\\/script${y}`;\n");
     test("x = `${y}</script`", "x = `${y}<\\/script`;\n");
+    test("x = `<</script`", "x = `<<\\/script`;\n");
+    test("x = `</</script`", "x = `</<\\/script`;\n");
+    test("x = `</script</script`", "x = `<\\/script<\\/script`;\n");
     test_minify("x = 1 < /script/.exec(y).length", "x=1< /script/.exec(y).length;");
     test_minify("x = 1 < /SCRIPT/.exec(y).length", "x=1< /SCRIPT/.exec(y).length;");
     test_minify("x = 1 < /ScRiPt/.exec(y).length", "x=1< /ScRiPt/.exec(y).length;");
     test_minify("x = 1 << /script/.exec(y).length", "x=1<< /script/.exec(y).length;");
     test("//! </script\n//! >/script\n//! /script", "//! <\\/script\n//! >/script\n//! /script\n");
     test("//! </SCRIPT\n//! >/SCRIPT\n//! /SCRIPT", "//! <\\/SCRIPT\n//! >/SCRIPT\n//! /SCRIPT\n");
     test("//! </ScRiPt\n//! >/ScRiPt\n//! /ScRiPt", "//! <\\/ScRiPt\n//! >/ScRiPt\n//! /ScRiPt\n");
-    test("/*! </script \n </script */", "/*! <\\/script \n <\\/script */\n");
-    test("/*! </SCRIPT \n </SCRIPT */", "/*! <\\/SCRIPT \n <\\/SCRIPT */\n");
-    test("/*! </ScRiPt \n </ScRiPt */", "/*! <\\/ScRiPt \n <\\/ScRiPt */\n");
-    test(
-        "String.raw`</script`",
-        "import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\"])));\n",
-    );
-    test(
-        "String.raw`</script${a}`",
-        "import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/script\", \"\"])), a);\n",
-    );
-    test(
-        "String.raw`${a}</script`",
-        "import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"\", \"<\\/script\"])), a);\n",
-    );
-    test(
-        "String.raw`</SCRIPT`",
-        "import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/SCRIPT\"])));\n",
-    );
-    test(
-        "String.raw`</ScRiPt`",
-        "import { __template } from \"<runtime>\";\nvar _a;\nString.raw(_a || (_a = __template([\"<\\/ScRiPt\"])));\n",
-    );
+    test("/*! </script \n</script */", "/*! <\\/script \n<\\/script */");
+    test("/*! </SCRIPT \n</SCRIPT */", "/*! <\\/SCRIPT \n<\\/SCRIPT */");
+    test("/*! </ScRiPt \n</ScRiPt */", "/*! <\\/ScRiPt \n<\\/ScRiPt */");
+    test("String.raw`</script`", "String.raw`<\\/script`;\n");
+    test("String.raw`</script${a}`", "String.raw`<\\/script${a}`;\n");
+    test("String.raw`${a}</script`", "String.raw`${a}<\\/script`;\n");
+    test("String.raw`</SCRIPT`", "String.raw`<\\/SCRIPT`;\n");
+    test("String.raw`</ScRiPt`", "String.raw`<\\/ScRiPt`;\n");
 
     // Negative cases
     test("x = '</'", "x = \"</\";\n");

Original file line number	Diff line number	Diff line change
`@@ -128,15 +128,15 @@ impl Codegen<'_> {`
`128`	`128`	`let comment_source = comment.span.source_text(source_text);`
`129`	`129`	`match comment.kind {`
`130`	`130`	`CommentKind::Line => {`
`131`		`- self.print_str(comment_source);`
	`131`	`+ self.print_str_escaping_script_close_tag(comment_source);`
`132`	`132`	`}`
`133`	`133`	`CommentKind::Block => {`
`134`	`134`	`// Print block comments with our own indentation.`
`135`	`135`	`for line in comment_source.split(is_line_terminator) {`
`136`	`136`	`if !line.starts_with("/*") {`
`137`	`137`	`self.print_indent();`
`138`	`138`	`}`
`139`		`- self.print_str(line.trim_start());`
	`139`	`+ self.print_str_escaping_script_close_tag(line.trim_start());`
`140`	`140`	`if !line.ends_with("*/") {`
`141`	`141`	`self.print_hard_newline();`
`142`	`142`	`}`