rescript-lang · mediremi · Aug 28, 2025 · Aug 22, 2025 · Aug 27, 2025 · Aug 27, 2025
diff --git a/compiler/syntax/src/res_scanner.ml b/compiler/syntax/src/res_scanner.ml
@@ -580,9 +580,53 @@ let scan_regex scanner =
       bring_buf_up_to_date ~start_offset:last_char_offset;
       Buffer.contents buf)
   in
-  let rec scan () =
+  (* Look ahead from a given absolute offset to see if a valid class closer
+     exists on the same line.
+     Semantics:
+       - Applies BOS rules: an initial '^' does not count as content; the
+         very first ']' after '[' or after '[^' is treated as literal.
+       - Skips escaped characters (\\.) while scanning.
+       - Returns true only if a subsequent unescaped ']' (after some content)
+         is found before a line break or EOF. *)
+  let has_valid_class_closer_ahead ~from_offset =
+    let src = scanner.src in
+    let len = String.length src in
+    let i = ref (from_offset + 1) in
+    (* start scanning after current '[' *)
+    let bos = ref true in
+    let rec loop () =
+      if !i >= len then false
+      else
+        match String.unsafe_get src !i with
-        match String.unsafe_get src !i with
+        match String.get src !i with
-        match String.unsafe_get src !i with
+        match String.get src !i with
+        | '\n' | '\r' -> false
+        | '\\' ->
+          if !i + 1 < len then (
+            i := !i + 2;
+            loop ())
+          else false
+        | '^' when !bos ->
+          incr i;
+          loop ()
+        | ']' when !bos ->
+          (* Leading ']' is literal content; after that, we're no longer at BOS. *)
+          bos := false;
+          incr i;
+          loop ()
+        | ']' -> true
+        | _ ->
+          bos := false;
+          incr i;
+          loop ()
+    in
+    loop ()
+  in
-  in
+    (* Validate that from_offset points to '[' *)
+    if from_offset < 0 || from_offset >= len || String.unsafe_get src from_offset <> '[' then
+      false
+    else
+      let i = ref (from_offset + 1) in
+      (* start scanning after current '[' *)
+      let bos = ref true in
+      let rec loop () =
+        if !i >= len then false
+        else
+          match String.unsafe_get src !i with
+          | '\n' | '\r' -> false
+          | '\\' ->
+            if !i + 1 < len then (
+              i := !i + 2;
+              loop ())
+            else false
+          | '^' when !bos ->
+            incr i;
+            loop ()
+          | ']' when !bos ->
+            (* Leading ']' is literal content; after that, we're no longer at BOS. *)
+            bos := false;
+            incr i;
+            loop ()
+          | ']' -> true
+          | _ ->
+            bos := false;
+            incr i;
+            loop ()
+      in
+      loop ()
-  in
+    (* Validate that from_offset points to '[' *)
+    if from_offset < 0 || from_offset >= len || String.unsafe_get src from_offset <> '[' then
+      false
+    else
+      let i = ref (from_offset + 1) in
+      (* start scanning after current '[' *)
+      let bos = ref true in
+      let rec loop () =
+        if !i >= len then false
+        else
+          match String.unsafe_get src !i with
+          | '\n' | '\r' -> false
+          | '\\' ->
+            if !i + 1 < len then (
+              i := !i + 2;
+              loop ())
+            else false
+          | '^' when !bos ->
+            incr i;
+            loop ()
+          | ']' when !bos ->
+            (* Leading ']' is literal content; after that, we're no longer at BOS. *)
+            bos := false;
+            incr i;
+            loop ()
+          | ']' -> true
+          | _ ->
+            bos := false;
+            incr i;
+            loop ()
+      in
+      loop ()
+
+  (* Scan until closing '/' that is not inside a character class. Only enter
+     character-class mode when a valid ']' is present ahead (same line).
+     Track beginning-of-class to allow a leading ']' (or leading '^' then ']'). *)
+  let rec scan ~in_class ~class_at_bos =
     match scanner.ch with
-    | '/' ->
+    | '/' when not in_class ->
       let last_char_offset = scanner.offset in
       next scanner;
       let pattern = result ~first_char_offset ~last_char_offset in
@@ -606,12 +650,34 @@ let scan_regex scanner =
     | '\\' ->
       next scanner;
       next scanner;
-      scan ()
+      (* Escapes count as content when inside a class; clear BOS. *)
+      scan ~in_class ~class_at_bos:(if in_class then false else class_at_bos)
+    | '[' when not in_class ->
+      (* Only enter a character class if a closing ']' exists ahead on the
+         same line. Otherwise treat '[' as a normal char. *)
+      if has_valid_class_closer_ahead ~from_offset:scanner.offset then (
+        next scanner;
+        scan ~in_class:true ~class_at_bos:true)
+      else (
+        next scanner;
+        scan ~in_class ~class_at_bos)
+    | '^' when in_class && class_at_bos ->
+      (* Leading caret does not count as content. *)
+      next scanner;
+      scan ~in_class ~class_at_bos:true
+    | ']' when in_class && class_at_bos ->
+      (* First ']' after '[' or '[^' is literal, not a closer. *)
+      next scanner;
+      scan ~in_class ~class_at_bos:false
+    | ']' when in_class ->
+      (* Leave character class. *)
+      next scanner;
+      scan ~in_class:false ~class_at_bos:false
     | _ ->
       next scanner;
-      scan ()
+      scan ~in_class ~class_at_bos:(if in_class then false else class_at_bos)
   in
-  let pattern, flags = scan () in
+  let pattern, flags = scan ~in_class:false ~class_at_bos:false in
   let end_pos = position scanner in
   (start_pos, end_pos, Token.Regex (pattern, flags))
 

diff --git a/tests/syntax_tests/data/parsing/grammar/expressions/expected/regex.res.txt b/tests/syntax_tests/data/parsing/grammar/expressions/expected/regex.res.txt
@@ -499,4 +499,12 @@ let re = [%re {js|/^a*?$/|js}]
 let re = [%re {js|/^((a)c)?(ab)$/|js}]
 let re = [%re {js|/^([ab]*?)(?=(b)?)c/|js}]
 let re = [%re {js|/^([ab]*?)(?!(b))c/|js}]
-let re = [%re {js|/^([ab]*?)(?<!(a))c/|js}]
+let re = [%re {js|/^([ab]*?)(?<!(a))c/|js}]
+let re = [%re {js|/\.[^/.]+$/|js}]
+let re = [%re {js|/[]/]/|js}]
+let re = [%re {js|/[^]]/|js}]
+let re = [%re {js|/[/]/|js}]
+let re = [%re {js|/[]]/|js}]
+let re = [%re {js|/[\]]/|js}]
+let re = [%re {js|/[[]]/|js}]
+let re = [%re {js|/[^]/]/|js}]
diff --git a/tests/syntax_tests/data/parsing/grammar/expressions/regex.res b/tests/syntax_tests/data/parsing/grammar/expressions/regex.res
@@ -607,3 +607,16 @@ let re = /^((a)c)?(ab)$/
 let re = /^([ab]*?)(?=(b)?)c/
 let re = /^([ab]*?)(?!(b))c/
 let re = /^([ab]*?)(?<!(a))c/
+
+let re = /\.[^/.]+$/
+
+// Leading ']' is literal; '/' inside class must not terminate
+let re = /[]/]/
+let re = /[^]]/
+let re = /[/]/
+
+// Additional leading ']' edge cases
+let re = /[]]/
+let re = /[\]]/
+let re = /[[]]/
+let re = /[^]/]/