From d8eb4285cb38a8c4c9c91deb2a3c2687c8112f7e Mon Sep 17 00:00:00 2001 From: Christoph Knittel Date: Sat, 6 Sep 2025 14:10:59 +0200 Subject: [PATCH 1/2] Fix code generation for emojis in polyvars and labels --- compiler/core/js_dump_string.ml | 94 ++++++++++++++----- .../data/printer/expr/exoticIdent.res | 6 ++ .../printer/expr/expected/exoticIdent.res.txt | 6 ++ tests/tests/src/exotic_labels_test.mjs | 5 + tests/tests/src/exotic_labels_test.res | 6 ++ tests/tests/src/poly_variant_test.mjs | 4 + tests/tests/src/poly_variant_test.res | 7 ++ 7 files changed, 105 insertions(+), 23 deletions(-) diff --git a/compiler/core/js_dump_string.ml b/compiler/core/js_dump_string.ml index 25899961a0..4022c2d1d7 100644 --- a/compiler/core/js_dump_string.ml +++ b/compiler/core/js_dump_string.ml @@ -24,6 +24,8 @@ module P = Ext_pp +open Ext_utf8 + (** Avoid to allocate single char string too many times*) let array_str1 = Array.init 256 (fun i -> String.make 1 (Char.chr i)) @@ -56,40 +58,86 @@ let ( +> ) = Ext_buffer.add_string let escape_to_buffer f (* ?(utf=false)*) s = let pp_raw_string f (* ?(utf=false)*) s = let l = String.length s in - for i = 0 to l - 1 do - let c = String.unsafe_get s i in + let i = ref 0 in + while !i < l do + let c = String.unsafe_get s !i in match c with - | '\b' -> f +> "\\b" - | '\012' -> f +> "\\f" - | '\n' -> f +> "\\n" - | '\r' -> f +> "\\r" - | '\t' -> f +> "\\t" + | '\b' -> + f +> "\\b"; + incr i + | '\012' -> + f +> "\\f"; + incr i + | '\n' -> + f +> "\\n"; + incr i + | '\r' -> + f +> "\\r"; + incr i + | '\t' -> + f +> "\\t"; + incr i (* This escape sequence is not supported by IE < 9 | '\011' -> "\\v" - IE < 9 treats '\v' as 'v' instead of a vertical tab ('\x0B'). - If cross-browser compatibility is a concern, use \x0B instead of \v. + IE < 9 treats '\v' as 'v' instead of a vertical tab ('\x0B'). + If cross-browser compatibility is a concern, use \x0B instead of \v. - Another thing to note is that the \v and \0 escapes are not allowed in JSON strings. - *) + Another thing to note is that the \v and \0 escapes are not allowed in JSON strings. + *) | '\000' - when i = l - 1 + when !i = l - 1 || - let next = String.unsafe_get s (i + 1) in + let next = String.unsafe_get s (!i + 1) in next < '0' || next > '9' -> - f +> "\\0" - | '\\' (* when not utf*) -> f +> "\\\\" + f +> "\\0"; + incr i + | '\\' (* when not utf*) -> + f +> "\\\\"; + incr i | '\000' .. '\031' | '\127' -> let c = Char.code c in f +> "\\x"; f +> Array.unsafe_get array_conv (c lsr 4); - f +> Array.unsafe_get array_conv (c land 0xf) - | '\128' .. '\255' (* when not utf*) -> - let c = Char.code c in - f +> "\\x"; - f +> Array.unsafe_get array_conv (c lsr 4); - f +> Array.unsafe_get array_conv (c land 0xf) - | '\"' -> f +> "\\\"" (* quote*) - | _ -> f +> Array.unsafe_get array_str1 (Char.code c) + f +> Array.unsafe_get array_conv (c land 0xf); + incr i + | '\128' .. '\255' -> ( + (* Check if this is part of a valid UTF-8 sequence *) + let utf8_byte = classify c in + match utf8_byte with + | Single _ -> + (* Single byte >= 128, escape it *) + let c = Char.code c in + f +> "\\x"; + f +> Array.unsafe_get array_conv (c lsr 4); + f +> Array.unsafe_get array_conv (c land 0xf); + incr i + | Leading (n, _) -> + (* Start of UTF-8 sequence, output the whole sequence as-is *) + let rec output_utf8_sequence pos remaining = + if remaining > 0 && pos < l then ( + let byte = String.unsafe_get s pos in + f +> Array.unsafe_get array_str1 (Char.code byte); + output_utf8_sequence (pos + 1) (remaining - 1)) + in + output_utf8_sequence !i (n + 1); + (* Skip the continuation bytes *) + i := !i + n + 1 + | Cont _ -> + (* Continuation byte, should be handled as part of Leading case *) + incr i + | Invalid -> + (* Invalid UTF-8 byte, escape it *) + let c = Char.code c in + f +> "\\x"; + f +> Array.unsafe_get array_conv (c lsr 4); + f +> Array.unsafe_get array_conv (c land 0xf); + incr i) + | '\"' -> + f +> "\\\""; + incr i (* quote*) + | _ -> + f +> Array.unsafe_get array_str1 (Char.code c); + incr i done in f +> "\""; diff --git a/tests/syntax_tests/data/printer/expr/exoticIdent.res b/tests/syntax_tests/data/printer/expr/exoticIdent.res index f1fda7ccc5..66b14152e1 100644 --- a/tests/syntax_tests/data/printer/expr/exoticIdent.res +++ b/tests/syntax_tests/data/printer/expr/exoticIdent.res @@ -62,3 +62,9 @@ let dict = { \"key": 42, \"KEY": 42, } + +type t = {\"🎉": int} + +let x = { + \"🎉": 42, +} diff --git a/tests/syntax_tests/data/printer/expr/expected/exoticIdent.res.txt b/tests/syntax_tests/data/printer/expr/expected/exoticIdent.res.txt index 4db60c9739..9ba0e0c3d3 100644 --- a/tests/syntax_tests/data/printer/expr/expected/exoticIdent.res.txt +++ b/tests/syntax_tests/data/printer/expr/expected/exoticIdent.res.txt @@ -74,3 +74,9 @@ let dict = { key: 42, \"KEY": 42, } + +type t = {\"🎉": int} + +let x = { + \"🎉": 42, +} diff --git a/tests/tests/src/exotic_labels_test.mjs b/tests/tests/src/exotic_labels_test.mjs index 6aceb11261..254e7ad862 100644 --- a/tests/tests/src/exotic_labels_test.mjs +++ b/tests/tests/src/exotic_labels_test.mjs @@ -10,8 +10,13 @@ let dict = { KEY: 1 }; +let x = { + "🎉": 42 +}; + export { fn1, dict, + x, } /* No side effect */ diff --git a/tests/tests/src/exotic_labels_test.res b/tests/tests/src/exotic_labels_test.res index c805daf98d..39624e9ba7 100644 --- a/tests/tests/src/exotic_labels_test.res +++ b/tests/tests/src/exotic_labels_test.res @@ -16,3 +16,9 @@ let dict = { key: 1, \"KEY": 1, } + +type t = {\"🎉": int} + +let x = { + \"🎉": 42, +} diff --git a/tests/tests/src/poly_variant_test.mjs b/tests/tests/src/poly_variant_test.mjs index 93b031b3ea..8321b6bb4b 100644 --- a/tests/tests/src/poly_variant_test.mjs +++ b/tests/tests/src/poly_variant_test.mjs @@ -96,6 +96,10 @@ eq("File \"poly_variant_test.res\", line 161, characters 5-12", 3, p_is_int_test VAL: 2 })); +eq("File \"poly_variant_test.res\", line 183, characters 5-12", "🚀", "🚀"); + +eq("File \"poly_variant_test.res\", line 184, characters 5-12", "🔥", "🔥"); + Mt.from_pair_suites("Poly_variant_test", suites.contents); /* Not a pure module */ diff --git a/tests/tests/src/poly_variant_test.res b/tests/tests/src/poly_variant_test.res index 7aa2887a7b..a043ec5af0 100644 --- a/tests/tests/src/poly_variant_test.res +++ b/tests/tests/src/poly_variant_test.res @@ -177,4 +177,11 @@ let hey = x => Js.log(v) } +type t = [#"🚀" | #"🔥"] + +let () = { + eq(__LOC__, "🚀", (#"🚀": t :> string)) + eq(__LOC__, "🔥", (#"🔥": t :> string)) +} + let () = Mt.from_pair_suites(__MODULE__, suites.contents) From ca31863cdbe88d40d39f2c3afc6551a0c65b0105 Mon Sep 17 00:00:00 2001 From: Christoph Knittel Date: Sun, 7 Sep 2025 19:23:42 +0200 Subject: [PATCH 2/2] CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e289bbefc..edd491c6e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ #### :bug: Bug fix +- Fix code generation for emojis in polyvars and labels. https://github.com/rescript-lang/rescript/pull/7853 + #### :memo: Documentation #### :nail_care: Polish