diff --git a/Jenkinsfile b/Jenkinsfile index 73781ee890..29f6e37de7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -50,9 +50,9 @@ def runPerformanceTests(String testsPath, String stancFlags = ""){ cd cmdstan; make clean-all; """ - if (stancFlags?.trim()) { - sh "cd performance-tests-cmdstan/cmdstan && echo 'STANCFLAGS= $stancFlags' >> make/local" - } + // if (stancFlags?.trim()) { + sh "cd performance-tests-cmdstan/cmdstan && echo 'STANCFLAGS= --allow-unicode $stancFlags' >> make/local" + // } sh """ cd performance-tests-cmdstan/cmdstan @@ -115,7 +115,7 @@ pipeline { } environment { CXX = 'clang++-6.0' - MACOS_SWITCH = 'stanc3-4.14' + MACOS_SWITCH = 'stanc3-4.14-unicode' PARALLEL = 4 GIT_AUTHOR_NAME = 'Stan Jenkins' GIT_AUTHOR_EMAIL = 'mc.stanislaw@gmail.com' @@ -803,7 +803,7 @@ pipeline { dir '.' label 'linux && emulation' args "${qemuArchFlag(ARCHITECTURE)} --group-add=987 --group-add=980 --group-add=988 --entrypoint='' -v /var/run/docker.sock:/var/run/docker.sock" - additionalBuildArgs "${qemuArchFlag(ARCHITECTURE)} --build-arg PUID=\$(id -u) --build-arg PGID=\$(id -g)" + additionalBuildArgs "${qemuArchFlag(ARCHITECTURE)} --ulimit stack=67108864 --build-arg PUID=\$(id -u) --build-arg PGID=\$(id -g)" } } steps { diff --git a/dune-project b/dune-project index 5d152b8afb..5165a94b96 100644 --- a/dune-project +++ b/dune-project @@ -26,6 +26,10 @@ (= 2.1.0)) (cmdliner (= 1.3.0)) + (uucp + (= 16.0.0)) + (uunf + (= 16.0.0)) (ocamlformat (and :with-test diff --git a/scripts/install_build_deps.sh b/scripts/install_build_deps.sh index f7beebbe29..18564b8c4b 100755 --- a/scripts/install_build_deps.sh +++ b/scripts/install_build_deps.sh @@ -7,6 +7,7 @@ eval $(opam env) opam pin -y core v0.16.0 --no-action -opam install -y dune core.v0.16.0 menhir.20230608 ppx_deriving.5.2.1 fmt.0.9.0 yojson.2.1.0 cmdliner.1.3.0 +opam install -y dune core.v0.16.0 menhir.20230608 ppx_deriving.5.2.1 fmt.0.9.0 yojson.2.1.0\ + cmdliner.1.3.0 uucp.16.0.0 uunf.16.0.0 eval $(opam env) diff --git a/scripts/install_build_deps_windows.sh b/scripts/install_build_deps_windows.sh index 05413d4fa2..e4c5e98585 100755 --- a/scripts/install_build_deps_windows.sh +++ b/scripts/install_build_deps_windows.sh @@ -9,14 +9,12 @@ eval $(opam env) # Add windows repository opam repository add windows http://github.com/ocaml-cross/opam-cross-windows.git -# Request the compiler to be built with flambda optimizers -opam install -y conf-flambda-windows - # Install the compiler opam install -y "ocaml-windows64=4.14.1" # Install dependencies opam install -y core.v0.16.1 core-windows.v0.16.1 menhir.20230608 menhir-windows.20230608 ppx_deriving.5.2.1 ppx_deriving-windows.5.2.1\ - fmt.0.9.0 fmt-windows.0.9.0 yojson.2.1.0 yojson-windows.2.1.0 cmdliner.1.3.0 cmdliner-windows.1.3.0 + fmt.0.9.0 fmt-windows.0.9.0 yojson.2.1.0 yojson-windows.2.1.0 cmdliner.1.3.0 cmdliner-windows.1.3.0 uucp.16.0.0 uucp-windows.16.0.0\ + uunf.16.0.0 uunf-windows.16.0.0 eval $(opam env) diff --git a/src/common/Unicode.ml b/src/common/Unicode.ml new file mode 100644 index 0000000000..9e5dad9678 --- /dev/null +++ b/src/common/Unicode.ml @@ -0,0 +1,107 @@ +let pp_uchar ppf u = + let u_int = Uchar.to_int u in + if u_int < 128 then Fmt.string ppf (Char.chr u_int |> Char.escaped) + else Fmt.pf ppf "U+%04X" u_int + +let is_ascii s = + let rec loop max b i = + if i > max then true + else if Bytes.get_uint8 b i < 128 then loop max b (i + 1) + else false in + let b = Bytes.of_string s in + loop (Bytes.length b - 1) b 0 + +let normalize = Uunf_string.normalize_utf_8 `NFKC + +let foldi_uchars ~f acc str = + let len = String.length str in + let rec loop pos acc = + if pos == len then acc + else + let decode = String.get_utf_8_uchar str pos in + let char_length = Uchar.utf_decode_length decode in + let uchar = Uchar.utf_decode_uchar decode in + let acc = f acc pos uchar in + loop (pos + char_length) acc in + loop 0 acc + +let iteri_uchars ~f str = + let f' buf pos c = + f pos c; + Buffer.add_utf_8_uchar buf c; + buf in + let s_after = + Buffer.contents + @@ foldi_uchars ~f:f' (Buffer.create (String.length str)) str in + (* another sanity check *) + if not (String.equal str s_after) then + Core.( + ICE.internal_compiler_error + [%message + "Failed to round-trip unicode string!" + (str : string) + (s_after : string)]) + +(* WIP: + + While not strictly necessary, there are some additional restrictions which + are good to implement for validation and preventing strings that are visually + identical from being distinct identifiers. + A good summary can be found here: https://perl11.org/blog/unicode-identifiers.html + + Most of these are only a problem if you assume maliciousness of the user, + so they may not be important for an initial version in Stan. +*) + +(* Defined in https://www.unicode.org/reports/tr39/#Confusable_Detection *) +let confusable x y = + let skeleton x = + let x = Uunf_string.normalize_utf_8 `NFD x in + let f acc _ c = + if Uucp.Gen.is_default_ignorable c then () + else + (* TODO!! replace with prototype - need data? *) + Buffer.add_utf_8_uchar acc c; + acc in + let buf = foldi_uchars ~f (Buffer.create (String.length x)) x in + let x = Buffer.contents buf in + let x = Uunf_string.normalize_utf_8 `NFD x in + x in + String.compare (skeleton x) (skeleton y) + +module ScriptSet = Set.Make (Uucp.Script) + +(** copied from UUCP's definition of [Uucp.Script.t] *) +let all = + ScriptSet.of_list + [ `Adlm; `Aghb; `Ahom; `Arab; `Armi; `Armn; `Avst; `Bali; `Bamu; `Bass; `Batk + ; `Beng; `Bhks; `Bopo; `Brah; `Brai; `Bugi; `Buhd; `Cakm; `Cans; `Cari + ; `Cham; `Cher; `Chrs; `Copt; `Cpmn; `Cprt; `Cyrl; `Deva; `Diak; `Dogr + ; `Dsrt; `Dupl; `Egyp; `Elba; `Elym; `Ethi; `Geor; `Glag; `Gong; `Gonm + ; `Goth; `Gran; `Grek; `Gujr; `Guru; `Hang; `Hani; `Hano; `Hatr; `Hebr + ; `Hira; `Hluw; `Hmng; `Hmnp; `Hrkt; `Hung; `Ital; `Java; `Kali; `Kana + ; `Kawi; `Khar; `Khmr; `Khoj; `Knda; `Kthi; `Kits; `Lana; `Laoo; `Latn + ; `Lepc; `Limb; `Lina; `Linb; `Lisu; `Lyci; `Lydi; `Mahj; `Maka; `Mand + ; `Mani; `Marc; `Medf; `Mend; `Merc; `Mero; `Mlym; `Modi; `Mong; `Mroo + ; `Mtei; `Mult; `Mymr; `Nagm; `Nand; `Narb; `Nbat; `Newa; `Nkoo; `Nshu + ; `Ogam; `Olck; `Orkh; `Orya; `Osge; `Osma; `Ougr; `Palm; `Pauc; `Perm + ; `Phag; `Phli; `Phlp; `Phnx; `Plrd; `Prti; `Qaai; `Rjng; `Rohg; `Runr + ; `Samr; `Sarb; `Saur; `Sgnw; `Shaw; `Shrd; `Sidd; `Sind; `Sinh; `Sogd + ; `Sogo; `Sora; `Soyo; `Sund; `Sylo; `Syrc; `Tagb; `Takr; `Tale; `Talu + ; `Taml; `Tang; `Tavt; `Telu; `Tfng; `Tglg; `Thaa; `Thai; `Tibt; `Tirh + ; `Tnsa; `Toto; `Ugar; `Vaii; `Vith; `Wara; `Wcho; `Xpeo; `Xsux; `Yezi + ; `Yiii; `Zanb; `Zinh; `Zyyy; `Zzzz ] + +let extended s = + if ScriptSet.mem `Zyyy s || ScriptSet.mem `Zinh s then all else s + +(* Defined in https://www.unicode.org/reports/tr39/#Restriction_Level_Detection *) +let restriction_level x = + let f acc _ c = + let scripts = + Uucp.Script.script_extensions c |> ScriptSet.of_list |> extended in + scripts :: acc in + let soss = foldi_uchars ~f [] x in + let resolved = List.fold_right ScriptSet.inter soss all in + if not @@ ScriptSet.is_empty resolved then `Single + else `Unrestricted (* TODO implement levels 3-5 *) diff --git a/src/common/dune b/src/common/dune index c0acc0cfd4..0c8c2956b2 100644 --- a/src/common/dune +++ b/src/common/dune @@ -1,7 +1,7 @@ (library (name common) (public_name stanc.common) - (libraries core fmt) + (libraries core fmt uunf uucp) (instrumentation (backend bisect_ppx)) (inline_tests) diff --git a/src/driver/Entry.ml b/src/driver/Entry.ml index b837fd5ac0..87ac73d7f5 100644 --- a/src/driver/Entry.ml +++ b/src/driver/Entry.ml @@ -31,6 +31,7 @@ let set_model_name model_name = let reset_mutable_states model_name (flags : Flags.t) = Common.Gensym.reset_danger_use_cautiously (); + Identifiers.allow_unicode := flags.allow_unicode; Include_files.include_provider := flags.include_source; set_model_name model_name; Typechecker.check_that_all_functions_have_definition := diff --git a/src/driver/Flags.ml b/src/driver/Flags.ml index be567713ad..a2f9349454 100644 --- a/src/driver/Flags.ml +++ b/src/driver/Flags.ml @@ -7,6 +7,7 @@ type t = ; standalone_functions: bool ; use_opencl: bool ; include_source: Frontend.Include_files.t + ; allow_unicode: bool ; info: bool ; version: bool ; auto_format: bool @@ -49,6 +50,7 @@ let default = ; standalone_functions= false ; use_opencl= false ; include_source= Frontend.Include_files.FileSystemPaths [] + ; allow_unicode= false ; info= false ; version= false ; auto_format= false diff --git a/src/driver/Flags.mli b/src/driver/Flags.mli index 64611836b4..dcd91a3d7a 100644 --- a/src/driver/Flags.mli +++ b/src/driver/Flags.mli @@ -9,6 +9,7 @@ type t = ; standalone_functions: bool ; use_opencl: bool ; include_source: Frontend.Include_files.t + ; allow_unicode: bool (* ------------------------- *) (* flags which switch compiler "modes" *) ; info: bool diff --git a/src/frontend/Errors.ml b/src/frontend/Errors.ml index bd60684661..fa6846cfc0 100644 --- a/src/frontend/Errors.ml +++ b/src/frontend/Errors.ml @@ -4,7 +4,7 @@ open Core (** Our type of syntax error information *) type syntax_error = - | Lexing of Middle.Location.t + | Lexing of string * Middle.Location.t | UnexpectedEOF of Middle.Location.t | Include of string * Middle.Location.t | Parsing of string * Middle.Location_span.t @@ -57,12 +57,12 @@ let pp_syntax_error ?printed_filename ?code ppf = function (Middle.Location_span.to_string ?printed_filename loc_span) (pp_context_with_message ?code) (message, loc_span.begin_loc) - | Lexing loc -> + | Lexing (message, loc) -> Fmt.pf ppf "Syntax error in %s, lexing error:@,%a@." (Middle.Location.to_string ?printed_filename {loc with col_num= loc.col_num - 1}) (pp_context_with_message ?code) - ("Invalid character found.", loc) + (message, loc) | UnexpectedEOF loc -> Fmt.pf ppf "Syntax error in %s, lexing error:@,%a@." (Middle.Location.to_string ?printed_filename diff --git a/src/frontend/Errors.mli b/src/frontend/Errors.mli index 3dacb831c8..fab3377adc 100644 --- a/src/frontend/Errors.mli +++ b/src/frontend/Errors.mli @@ -2,7 +2,7 @@ (** Our type of syntax error information *) type syntax_error = - | Lexing of Middle.Location.t + | Lexing of string * Middle.Location.t | UnexpectedEOF of Middle.Location.t | Include of string * Middle.Location.t | Parsing of string * Middle.Location_span.t diff --git a/src/frontend/Identifiers.ml b/src/frontend/Identifiers.ml new file mode 100644 index 0000000000..8e16ee7425 --- /dev/null +++ b/src/frontend/Identifiers.ml @@ -0,0 +1,45 @@ +open Common.Unicode + +let allow_unicode = ref false + +let error ~loc msg = + raise + (Errors.SyntaxError + (Errors.Lexing (msg, Preprocessor.location_of_position loc))) + +let validate_ascii_id ~loc id = + Debugging.lexer_logger ("ascii id: " ^ id); + let first = String.get_uint8 id 0 in + if + (first >= Char.code 'A' && first <= Char.code 'Z') + || (first >= Char.code 'a' && first <= Char.code 'z') + then id + else error ~loc "Invalid character found." + +(* Validation based on the + Unicode Standard Annex #31: Unicode Identifiers and Syntax + https://www.unicode.org/reports/tr31 *) + +let validate_utf8_id ~loc id = + if not !allow_unicode then + error ~loc + "Unicode identifiers are not supported without the (experimental) \ + allow-unicode flag"; + if not (String.is_valid_utf_8 id) then + error ~loc "Identifier is not valid UTF-8 string"; + Debugging.lexer_logger ("unicode id: " ^ id); + (* normalize to NFKC as recommended *) + let id = normalize id in + let f pos uchar = + if pos == 0 then ( + if not (Uucp.Id.is_xid_start uchar) then + error ~loc (Fmt.str "Invalid character: '%a'" pp_uchar uchar)) + else if not (Uucp.Id.is_xid_continue uchar) then + error ~loc + (Fmt.str "Invalid character in identifier at offset %d: '%a'" pos + pp_uchar uchar) in + iteri_uchars ~f id; + id + +let validate loc id = + if is_ascii id then validate_ascii_id ~loc id else validate_utf8_id ~loc id diff --git a/src/frontend/Identifiers.mli b/src/frontend/Identifiers.mli new file mode 100644 index 0000000000..d6ef740c5a --- /dev/null +++ b/src/frontend/Identifiers.mli @@ -0,0 +1,2 @@ +val allow_unicode : bool ref +val validate : Lexing.position -> string -> string diff --git a/src/frontend/dune b/src/frontend/dune index 32b6436923..4acc2bb098 100644 --- a/src/frontend/dune +++ b/src/frontend/dune @@ -1,14 +1,20 @@ (library (name frontend) (public_name stanc.frontend) - (libraries core menhirLib yojson fmt middle stan_math_signatures) + (libraries core menhirLib uucp yojson fmt middle stan_math_signatures) (instrumentation (backend bisect_ppx)) (inline_tests) (preprocess (pps ppx_jane ppx_deriving.fold ppx_deriving.map))) -(ocamllex lexer) +(rule + (target lexer.ml) + (deps lexer.mll) + (action + (chdir + %{workspace_root} + (run %{bin:ocamllex} -ml -o %{target} %{deps})))) (rule (targets parsing_errors.ml) diff --git a/src/frontend/lexer.mll b/src/frontend/lexer.mll index 19024bdbd6..a371d56e4a 100644 --- a/src/frontend/lexer.mll +++ b/src/frontend/lexer.mll @@ -37,9 +37,40 @@ , location_span_of_positions (lexbuf.lex_start_p, lexbuf.lex_curr_p) ) } +(* + OCamllex does not know about unicode, it just operates over bytes. + So, we can define all 'valid' byte sequences for UTF-8 like so +*) +(* 110xxxxx *) +let utf8_head_byte2 = ['\192'-'\223'] +(* 1110xxxx *) +let utf8_head_byte3 = ['\224'-'\239'] +(* 11110xxx *) +let utf8_head_byte4 = ['\240'-'\247'] +(* 10xxxxxx *) +let utf8_tail_byte = ['\128'-'\191'] + +(* utf8_1 is ascii *) +let ascii_allowed = ['a'-'z' 'A'-'Z' '0'-'9' '_'] +(* 11 bits of payload *) +let utf8_2 = utf8_head_byte2 utf8_tail_byte +(* 16 bits of payload *) +let utf8_3 = utf8_head_byte3 utf8_tail_byte utf8_tail_byte +(* 21 bits of payload *) +let utf8_4 = utf8_head_byte4 utf8_tail_byte utf8_tail_byte utf8_tail_byte + +(* Any UTF-8-encoded code point, outside the ASCII range. + This set includes more than it should for simplicity. +*) +let utf8_nonascii = utf8_2 | utf8_3 | utf8_4 + +(* identifiers here are overly permissive, and are checked + in the semantic action of the rule that matches here. +*) +let identifier = (ascii_allowed | utf8_nonascii)+ + (* Some auxiliary definition for variables and constants *) let string_literal = '"' [^ '"' '\r' '\n']* '"' -let identifier = ['a'-'z' 'A'-'Z'] ['a'-'z' 'A'-'Z' '0'-'9' '_']* (* TODO: We should probably expand the alphabet *) let integer_constant = ['0'-'9']+ ('_' ['0'-'9']+)* @@ -198,8 +229,10 @@ rule token = parse | string_literal as s { lexer_logger ("string_literal " ^ s) ; Parser.STRINGLITERAL (lexeme lexbuf) } | identifier as id { lexer_logger ("identifier " ^ id) ; - lexer_pos_logger (lexeme_start_p lexbuf); - Parser.IDENTIFIER (lexeme lexbuf) } + let loc = (lexeme_start_p lexbuf) in + lexer_pos_logger loc; + let canonical_id = Identifiers.validate loc id in + Parser.IDENTIFIER (canonical_id) } (* End of file *) | eof { lexer_logger "eof" ; if Preprocessor.size () = 1 @@ -210,7 +243,8 @@ rule token = parse | _ { raise (Errors.SyntaxError (Errors.Lexing - (location_of_position + ("Invalid character found.", + location_of_position (lexeme_start_p (current_buffer ()))))) } diff --git a/src/stan_math_backend/Cpp.ml b/src/stan_math_backend/Cpp.ml index 25453b5679..8c52a391a8 100644 --- a/src/stan_math_backend/Cpp.ml +++ b/src/stan_math_backend/Cpp.ml @@ -389,7 +389,17 @@ module Printing = struct open Fmt let trailing_space (t : 'a Fmt.t) : 'a Fmt.t = fun ppf -> pf ppf "%a@ " t - let pp_identifier ppf = string ppf + + let pp_identifier ppf s = + if Common.Unicode.is_ascii s then string ppf s + else + (* so called "Universal character names" - not required on newer compilers + but hopefully more backward-compatible *) + let f _ c = + let uchar_int = Uchar.to_scalar c in + if uchar_int < 128 then Fmt.char ppf (Char.of_int_exn uchar_int) + else Fmt.pf ppf "\\u%04X" (Uchar.to_scalar c) in + Common.Unicode.iteri_uchars ~f s let rec pp_type_ ppf t = match t with @@ -473,8 +483,8 @@ module Printing = struct pf ppf "<@,%a>" (list ~sep:comma pp_type_) types in match e with | Literal s -> pf ppf "%s" s - | Var id -> string ppf id - | VarRef id -> pf ppf "&%s" id + | Var id -> pp_identifier ppf id + | VarRef id -> pf ppf "&%a" pp_identifier id | Parens e -> pf ppf "(%a)" pp_expr e | Cast (t, e) -> pf ppf "@[(%a)@ %a@]" pp_type_ t pp_expr e | Constructor (t, es) -> @@ -491,7 +501,7 @@ module Printing = struct | StreamInsertion (e, es) -> pf ppf "%a <<@[@ %a@]" pp_expr e (list ~sep:comma pp_expr) es | FunCall (fn, tys, es) -> - pf ppf "@[%s%a(@,%a@])" fn maybe_templates tys + pf ppf "@[%a%a(@,%a@])" pp_identifier fn maybe_templates tys (list ~sep:comma pp_expr) es | MethodCall (e, fn, tys, es) -> pf ppf "@[%a.%s%a(%a)@]" pp_expr e fn maybe_templates tys @@ -519,8 +529,8 @@ module Printing = struct pf ppf "{@[%a@]}" (list ~sep:comma pp_expr) es in let static = if static then "static " else "" in let constexpr = if constexpr then "constexpr " else "" in - pf ppf "@[%s%s%a@ %s%a@]" static constexpr pp_type_ type_ name - pp_init init + pf ppf "@[%s%s%a@ %a%a@]" static constexpr pp_type_ type_ + pp_identifier name pp_init init let rec pp_stmt ppf s = match s with diff --git a/src/stan_math_backend/Cpp_Json.ml b/src/stan_math_backend/Cpp_Json.ml index 652a6daa9b..cf1e694e14 100644 --- a/src/stan_math_backend/Cpp_Json.ml +++ b/src/stan_math_backend/Cpp_Json.ml @@ -65,10 +65,10 @@ let%expect_test "outvar to json pretty" = unslash the ones near a plus*) let replace_cpp_expr s = s - |> String.substr_replace_all ~pattern:{|"|} ~with_:{|\"|} + |> String.substr_replace_all ~pattern:"\\n" ~with_:"" + |> Cpp_str.escaped |> String.substr_replace_all ~pattern:{|\"+|} ~with_:{|" +|} |> String.substr_replace_all ~pattern:{|+\"|} ~with_:{|+ "|} - |> String.substr_replace_all ~pattern:"\\n" ~with_:"" let wrap_in_quotes s = "\"" ^ s ^ "\"" @@ -79,9 +79,12 @@ let out_var_interpolated_json_str vars = let%expect_test "outvar to json" = let var x = {Expr.Fixed.pattern= Var x; meta= Expr.Typed.Meta.empty} in [ ( "var_one" - , SizedType.SArray (SVector (AoS, var "N"), var "K") + , SizedType.SArray + ( SVector + (AoS, Expr.Helpers.binop (var "N") Operator.Minus Expr.Helpers.one) + , var "K" ) , Program.Parameters ) ] |> out_var_interpolated_json_str |> print_endline; [%expect {| - "[{\"name\":\"var_one\",\"type\":{\"name\":\"array\",\"length\":" + std::to_string(K) + ",\"element_type\":{\"name\":\"vector\",\"length\":" + std::to_string(N) + "}},\"block\":\"parameters\"}]" |}] + "[{\"name\":\"var_one\",\"type\":{\"name\":\"array\",\"length\":" + std::to_string(K) + ",\"element_type\":{\"name\":\"vector\",\"length\":" + std::to_string((N -1)) + "}},\"block\":\"parameters\"}]" |}] diff --git a/src/stanc/CLI.ml b/src/stanc/CLI.ml index 7e5b379e93..87cbb5c949 100644 --- a/src/stanc/CLI.ml +++ b/src/stanc/CLI.ml @@ -39,6 +39,12 @@ module Options = struct means the definition will be provided later as a C++ function." in Arg.(value & flag & info ["allow-undefined"] ~doc) + let allow_unicode = + let doc = + "$(i,(Experimental)) Allow unicode characters in the names of functions \ + and variables" in + Arg.(value & flag & info ["allow-unicode"] ~doc) + let auto_format = let doc = "Output a formatted version of the Stan program. The output can be \ @@ -359,6 +365,7 @@ module Conversion = struct let open Options in let+ optimization_level = optimization_level and+ allow_undefined = allow_undefined + and+ allow_unicode = allow_unicode and+ standalone_functions = standalone_functions and+ use_opencl = use_opencl and+ include_source = include_paths @@ -374,6 +381,7 @@ module Conversion = struct Driver.Flags. { optimization_level ; allow_undefined + ; allow_unicode ; functions_only= false ; standalone_functions ; use_opencl diff --git a/src/stancjs/stancjs.ml b/src/stancjs/stancjs.ml index 63bc785249..5a567edaaf 100644 --- a/src/stancjs/stancjs.ml +++ b/src/stancjs/stancjs.ml @@ -140,6 +140,7 @@ let process_flags (flags : 'a Js.opt) includes : (Driver.Flags.t, string) result else if is_flag_set "Oexperimental" then Optimize.Oexperimental else Optimize.O0) ; allow_undefined= is_flag_set "allow-undefined" + ; allow_unicode= is_flag_set "allow-unicode" ; functions_only= is_flag_set "functions-only" ; standalone_functions= is_flag_set "standalone-functions" ; use_opencl= is_flag_set "use-opencl" diff --git a/stanc.opam b/stanc.opam index ca929f57da..bc51a6c96b 100644 --- a/stanc.opam +++ b/stanc.opam @@ -10,6 +10,8 @@ depends: [ "fmt" {= "0.9.0"} "yojson" {= "2.1.0"} "cmdliner" {= "1.3.0"} + "uucp" {= "16.0.0"} + "uunf" {= "16.0.0"} "ocamlformat" {with-test & = "0.26.1"} "bisect_ppx" {with-test} "merlin" {with-test} diff --git a/test/integration/bad/new/stanc.expected b/test/integration/bad/new/stanc.expected index aa3b20cd3b..4a7c673f71 100644 --- a/test/integration/bad/new/stanc.expected +++ b/test/integration/bad/new/stanc.expected @@ -1450,7 +1450,7 @@ Syntax error in 'lexing_error.stan', line 1, column 6, lexing error: ^ ------------------------------------------------- -Invalid character found. +Unicode identifiers are not supported without the (experimental) allow-unicode flag [exit 1] $ ../../../../../install/default/bin/stanc location-scale-bad1.stan Syntax error in 'location-scale-bad1.stan', line 1, column 27 to column 32, parsing error: diff --git a/test/integration/bad/numeric-literal/stanc.expected b/test/integration/bad/numeric-literal/stanc.expected index d016f72709..fc08f62fbd 100644 --- a/test/integration/bad/numeric-literal/stanc.expected +++ b/test/integration/bad/numeric-literal/stanc.expected @@ -1,20 +1,20 @@ $ ../../../../../install/default/bin/stanc int-bad1.stan -Syntax error in 'int-bad1.stan', line 2, column 17, lexing error: +Syntax error in 'int-bad1.stan', line 2, column 11, lexing error: ------------------------------------------------- 1: transformed data { 2: int n = 10_000_; - ^ + ^ 3: } ------------------------------------------------- Invalid character found. [exit 1] $ ../../../../../install/default/bin/stanc int-bad2.stan -Syntax error in 'int-bad2.stan', line 2, column 13, lexing error: +Syntax error in 'int-bad2.stan', line 2, column 11, lexing error: ------------------------------------------------- 1: transformed data { 2: int n = 10__000; - ^ + ^ 3: } ------------------------------------------------- @@ -65,11 +65,11 @@ Syntax error in 'real-bad1.stan', line 2, column 12, lexing error: Invalid character found. [exit 1] $ ../../../../../install/default/bin/stanc real-bad2.stan -Syntax error in 'real-bad2.stan', line 2, column 14, lexing error: +Syntax error in 'real-bad2.stan', line 2, column 12, lexing error: ------------------------------------------------- 1: transformed data { 2: real x = 12_.345; - ^ + ^ 3: } ------------------------------------------------- diff --git a/test/integration/bad/stanc.expected b/test/integration/bad/stanc.expected index 271afcd433..f1cd2c5258 100644 --- a/test/integration/bad/stanc.expected +++ b/test/integration/bad/stanc.expected @@ -2969,6 +2969,19 @@ Semantic error in 'typo.stan', line 2, column 17 to column 34: A returning function was expected but an undeclared identifier 'to_vetor' was supplied. A similar known identifier is 'to_vector' +[exit 1] + $ ../../../../install/default/bin/stanc unicode-without-flag.stan +Syntax error in 'unicode-without-flag.stan', line 4, column 24, lexing error: + ------------------------------------------------- + 2: int J; // number of schools + 3: array[J] real y; // estimated treatment effect (school j) + 4: array[J] real σ; // std err of effect estimate (school j) + ^ + 5: } + 6: parameters { + ------------------------------------------------- + +Unicode identifiers are not supported without the (experimental) allow-unicode flag [exit 1] $ ../../../../install/default/bin/stanc validate_add_expression_dimss_bad.stan Semantic error in 'validate_add_expression_dimss_bad.stan', line 4, column 6 to column 13: diff --git a/test/integration/bad/unicode-without-flag.stan b/test/integration/bad/unicode-without-flag.stan new file mode 100644 index 0000000000..819d17dfbb --- /dev/null +++ b/test/integration/bad/unicode-without-flag.stan @@ -0,0 +1,14 @@ +data { + int J; // number of schools + array[J] real y; // estimated treatment effect (school j) + array[J] real σ; // std err of effect estimate (school j) +} +parameters { + real μ; + array[J] real θ; + real Ï„; +} +model { + θ ~ normal(μ, Ï„); + y ~ normal(θ, σ); +} diff --git a/test/integration/bad/unicode/dune b/test/integration/bad/unicode/dune new file mode 100644 index 0000000000..83b71c6378 --- /dev/null +++ b/test/integration/bad/unicode/dune @@ -0,0 +1,15 @@ +(rule + (targets stanc.output) + (deps + (package stanc) + (:stanfiles + (glob_files *.stan))) + (action + (with-stdout-to + %{targets} + (run %{bin:run_bin_on_args} "%{bin:stanc} --allow-unicode" %{stanfiles})))) + +(rule + (alias runtest) + (action + (diff stanc.expected stanc.output))) diff --git a/test/integration/bad/unicode/invalid-character.stan b/test/integration/bad/unicode/invalid-character.stan new file mode 100644 index 0000000000..0409b1357d --- /dev/null +++ b/test/integration/bad/unicode/invalid-character.stan @@ -0,0 +1,4 @@ +data { + // not in XID data + real â„§; +} diff --git a/test/integration/bad/unicode/invalid-utf8-1.stan b/test/integration/bad/unicode/invalid-utf8-1.stan new file mode 100644 index 0000000000..43f6d8ae4c --- /dev/null +++ b/test/integration/bad/unicode/invalid-utf8-1.stan @@ -0,0 +1,3 @@ +data { + int Ã(; +} diff --git a/test/integration/bad/unicode/invalid-utf8-2.stan b/test/integration/bad/unicode/invalid-utf8-2.stan new file mode 100644 index 0000000000..744c7f770d --- /dev/null +++ b/test/integration/bad/unicode/invalid-utf8-2.stan @@ -0,0 +1,3 @@ +data { + int ð(Œ(; +} diff --git a/test/integration/bad/unicode/invalid-utf8-3.stan b/test/integration/bad/unicode/invalid-utf8-3.stan new file mode 100644 index 0000000000..8b420d0fac --- /dev/null +++ b/test/integration/bad/unicode/invalid-utf8-3.stan @@ -0,0 +1,3 @@ +data { + int ß¿; +} diff --git a/test/integration/bad/unicode/invalid-utf8-4.stan b/test/integration/bad/unicode/invalid-utf8-4.stan new file mode 100644 index 0000000000..3402a20037 --- /dev/null +++ b/test/integration/bad/unicode/invalid-utf8-4.stan @@ -0,0 +1,3 @@ +data { + int ñ߿; +} diff --git a/test/integration/bad/unicode/invalid-utf8-5.stan b/test/integration/bad/unicode/invalid-utf8-5.stan new file mode 100644 index 0000000000..222adb014e --- /dev/null +++ b/test/integration/bad/unicode/invalid-utf8-5.stan @@ -0,0 +1,3 @@ +data { + int _ñ; +} diff --git a/test/integration/bad/unicode/invalid-utf8.stan b/test/integration/bad/unicode/invalid-utf8.stan new file mode 100644 index 0000000000..e46cd033b3 --- /dev/null +++ b/test/integration/bad/unicode/invalid-utf8.stan @@ -0,0 +1,3 @@ +data { + int í¾¿; +} diff --git a/test/integration/bad/unicode/non-utf-8/dune b/test/integration/bad/unicode/non-utf-8/dune new file mode 100644 index 0000000000..856a7fccef --- /dev/null +++ b/test/integration/bad/unicode/non-utf-8/dune @@ -0,0 +1 @@ +(include ../dune) diff --git a/test/integration/bad/unicode/non-utf-8/stanc.expected b/test/integration/bad/unicode/non-utf-8/stanc.expected new file mode 100644 index 0000000000..05a1d88e5f Binary files /dev/null and b/test/integration/bad/unicode/non-utf-8/stanc.expected differ diff --git a/test/integration/bad/unicode/non-utf-8/utf-16be.stan b/test/integration/bad/unicode/non-utf-8/utf-16be.stan new file mode 100644 index 0000000000..1a0857d47d Binary files /dev/null and b/test/integration/bad/unicode/non-utf-8/utf-16be.stan differ diff --git a/test/integration/bad/unicode/non-utf-8/utf-16le.stan b/test/integration/bad/unicode/non-utf-8/utf-16le.stan new file mode 100644 index 0000000000..c217953814 Binary files /dev/null and b/test/integration/bad/unicode/non-utf-8/utf-16le.stan differ diff --git a/test/integration/bad/unicode/stanc.expected b/test/integration/bad/unicode/stanc.expected new file mode 100644 index 0000000000..91e67d37ac --- /dev/null +++ b/test/integration/bad/unicode/stanc.expected @@ -0,0 +1,90 @@ + $ ../../../../../install/default/bin/stanc --allow-unicode invalid-character.stan +Syntax error in 'invalid-character.stan', line 3, column 6, lexing error: + ------------------------------------------------- + 1: data { + 2: // not in XID data + 3: real â„§; + ^ + 4: } + ------------------------------------------------- + +Invalid character: 'U+2127' +[exit 1] + $ ../../../../../install/default/bin/stanc --allow-unicode invalid-utf8-1.stan +Syntax error in 'invalid-utf8-1.stan', line 2, column 5, lexing error: + ------------------------------------------------- + 1: data { + 2: int Ã(; + ^ + 3: } + ------------------------------------------------- + +Invalid character found. +[exit 1] + $ ../../../../../install/default/bin/stanc --allow-unicode invalid-utf8-2.stan +Syntax error in 'invalid-utf8-2.stan', line 2, column 5, lexing error: + ------------------------------------------------- + 1: data { + 2: int ð(Œ(; + ^ + 3: } + ------------------------------------------------- + +Invalid character found. +[exit 1] + $ ../../../../../install/default/bin/stanc --allow-unicode invalid-utf8-3.stan +Syntax error in 'invalid-utf8-3.stan', line 2, column 5, lexing error: + ------------------------------------------------- + 1: data { + 2: int ß¿; + ^ + 3: } + ------------------------------------------------- + +Invalid character: 'U+07FF' +[exit 1] + $ ../../../../../install/default/bin/stanc --allow-unicode invalid-utf8-4.stan +Syntax error in 'invalid-utf8-4.stan', line 2, column 5, lexing error: + ------------------------------------------------- + 1: data { + 2: int ñ߿; + ^ + 3: } + ------------------------------------------------- + +Invalid character in identifier at offset 2: 'U+07FF' +[exit 1] + $ ../../../../../install/default/bin/stanc --allow-unicode invalid-utf8-5.stan +Syntax error in 'invalid-utf8-5.stan', line 2, column 5, lexing error: + ------------------------------------------------- + 1: data { + 2: int _ñ; + ^ + 3: } + ------------------------------------------------- + +Invalid character: '_' +[exit 1] + $ ../../../../../install/default/bin/stanc --allow-unicode invalid-utf8.stan +Syntax error in 'invalid-utf8.stan', line 2, column 5, lexing error: + ------------------------------------------------- + 1: data { + 2: int í¾¿; + ^ + 3: } + ------------------------------------------------- + +Identifier is not valid UTF-8 string +[exit 1] + $ ../../../../../install/default/bin/stanc --allow-unicode unicode_normalization.stan +Semantic error in 'unicode_normalization.stan', line 4, column 7 to column 13: + ------------------------------------------------- + 2: real ñabc; + 3: // this is a different encoding than above, should be prevented still! + 4: real ñabc; + ^ + 5: } + ------------------------------------------------- + +Identifier 'ñabc' is already in use. +[exit 1] diff --git a/test/integration/bad/unicode/unicode_normalization.stan b/test/integration/bad/unicode/unicode_normalization.stan new file mode 100644 index 0000000000..a2ef6fe2c0 --- /dev/null +++ b/test/integration/bad/unicode/unicode_normalization.stan @@ -0,0 +1,5 @@ +data { + real ñabc; + // this is a different encoding than above, should be prevented still! + real ñabc; +} diff --git a/test/integration/cli-args/debug-flags.t/run.t b/test/integration/cli-args/debug-flags.t/run.t index 828503554f..47ec2e01f3 100644 --- a/test/integration/cli-args/debug-flags.t/run.t +++ b/test/integration/cli-args/debug-flags.t/run.t @@ -17,6 +17,7 @@ Flags not used elsewhere in the tests Lexer: space Lexer: identifier N {fname=basic.stan; line=2} + Lexer: ascii id: N Lexer: ; Lexer: newline {fname=basic.stan; line=2} @@ -26,6 +27,7 @@ Flags not used elsewhere in the tests Lexer: [ Lexer: identifier N {fname=basic.stan; line=3} + Lexer: ascii id: N Lexer: ] Lexer: space Lexer: int @@ -42,6 +44,7 @@ Flags not used elsewhere in the tests Lexer: space Lexer: identifier y {fname=basic.stan; line=3} + Lexer: ascii id: y Lexer: ; Lexer: newline {fname=basic.stan; line=3} @@ -69,6 +72,7 @@ Flags not used elsewhere in the tests Lexer: space Lexer: identifier theta {fname=basic.stan; line=6} + Lexer: ascii id: theta Lexer: ; Lexer: newline {fname=basic.stan; line=6} @@ -84,11 +88,13 @@ Flags not used elsewhere in the tests Lexer: space Lexer: identifier theta {fname=basic.stan; line=9} + Lexer: ascii id: theta Lexer: space Lexer: ~ Lexer: space Lexer: identifier beta {fname=basic.stan; line=9} + Lexer: ascii id: beta Lexer: ( Lexer: int_constant 1 Lexer: , @@ -103,14 +109,17 @@ Flags not used elsewhere in the tests Lexer: space Lexer: identifier y {fname=basic.stan; line=10} + Lexer: ascii id: y Lexer: space Lexer: ~ Lexer: space Lexer: identifier bernoulli {fname=basic.stan; line=10} + Lexer: ascii id: bernoulli Lexer: ( Lexer: identifier theta {fname=basic.stan; line=10} + Lexer: ascii id: theta Lexer: ) Lexer: ; Lexer: newline diff --git a/test/integration/cli-args/stanc.t b/test/integration/cli-args/stanc.t index ee766e7d4d..7c8029a8bb 100644 --- a/test/integration/cli-args/stanc.t +++ b/test/integration/cli-args/stanc.t @@ -35,6 +35,10 @@ Show help usually means the definition will be provided later as a C++ function. + --allow-unicode + (Experimental) Allow unicode characters in the names of functions + and variables + --auto-format Output a formatted version of the Stan program. The output can be tweaked using --max-line-length and --canonicalize. @@ -201,6 +205,7 @@ Show help + Qmark alias $ stanc -? plain | head NAME @@ -224,7 +229,6 @@ Error when no file passed Usage: %%NAME%% [OPTION]… [MODEL_FILE] Try '%%NAME%% --help' for more information. [124] - Error when multiple files passed $ stanc foo.stan foo2.stan %%NAME%%: too many arguments, don't know what to do with 'foo2.stan' @@ -267,6 +271,7 @@ Can read from stdin + Filename is set to stdin when reading from stdin $ echo 'parameters {real y}' | stanc - Syntax error in 'stdin', line 1, column 18 to column 19, parsing error: diff --git a/test/integration/good/code-gen/cpp.expected b/test/integration/good/code-gen/cpp.expected index fe6d330d45..d4988cabe3 100644 --- a/test/integration/good/code-gen/cpp.expected +++ b/test/integration/good/code-gen/cpp.expected @@ -1,4 +1,4 @@ - $ ../../../../../install/default/bin/stanc --print-cpp 8_schools_ncp.stan + $ ../../../../../install/default/bin/stanc --allow-unicode --print-cpp 8_schools_ncp.stan // Code generated by %%NAME%% %%VERSION%% #include namespace _8_schools_ncp_model_namespace { @@ -88,7 +88,7 @@ class _8_schools_ncp_model final : public model_base_crtp<_8_schools_ncp_model> } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace _8start_with_number_model_namespace { @@ -570,7 +570,7 @@ class _8start_with_number_model final : public model_base_crtp<_8start_with_numb } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace complex_tuples_model_namespace { @@ -1047,7 +1047,7 @@ class complex_tuples_model final : public model_base_crtp } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace container_promotion_model_namespace { @@ -1377,7 +1377,7 @@ class container_promotion_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace cpp_reserved_words_model_namespace { @@ -2454,7 +2454,7 @@ class cpp_reserved_words_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace data_only_functions_model_namespace { @@ -3538,7 +3538,7 @@ class data_only_functions_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace deprecated_jacobian_usage_model_namespace { @@ -4059,7 +4059,7 @@ class deprecated_jacobian_usage_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace double_reject_model_namespace { @@ -4442,7 +4442,7 @@ class double_reject_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace eight_schools_ncp_model_namespace { @@ -4830,7 +4830,7 @@ class eight_schools_ncp_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace funcall_type_promotion_model_namespace { @@ -5391,7 +5391,7 @@ class funcall_type_promotion_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace jacobian_pe_model_namespace { @@ -5852,7 +5852,7 @@ class jacobian_pe_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace mixed_type_arrays_model_namespace { @@ -6350,7 +6350,7 @@ class mixed_type_arrays_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace mother_model_namespace { @@ -12915,7 +12915,7 @@ class mother_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace motherHOF_model_namespace { @@ -17705,7 +17705,7 @@ class motherHOF_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace new_integrate_interface_model_namespace { @@ -20295,7 +20295,7 @@ class new_integrate_interface_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace old_integrate_interface_model_namespace { @@ -25810,7 +25810,7 @@ class old_integrate_interface_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace optimize_glm_model_namespace { @@ -26891,7 +26891,7 @@ class optimize_glm_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace overload_tilde_stanlib_model_namespace { @@ -28601,7 +28601,7 @@ class overload_tilde_stanlib_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace overload_tilde_udfs_model_namespace { @@ -29062,7 +29062,7 @@ class overload_tilde_udfs_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace overloading_templating_model_namespace { @@ -29838,7 +29838,7 @@ class overloading_templating_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace param_constraint_model_namespace { @@ -30317,7 +30317,7 @@ class param_constraint_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace print_unicode_model_namespace { @@ -30797,7 +30797,7 @@ class print_unicode_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace promotion_model_namespace { @@ -31174,7 +31174,7 @@ class promotion_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace recursive_slicing_model_namespace { @@ -31958,7 +31958,7 @@ class recursive_slicing_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace reduce_sum_m1_model_namespace { @@ -32614,7 +32614,7 @@ class reduce_sum_m1_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace reduce_sum_m2_model_namespace { @@ -34503,7 +34503,7 @@ class reduce_sum_m2_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace reduce_sum_m3_model_namespace { @@ -39166,7 +39166,7 @@ class reduce_sum_m3_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace reject_exit_model_namespace { @@ -40699,7 +40699,7 @@ class reject_exit_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace return_position_types_model_namespace { @@ -41185,7 +41185,7 @@ class return_position_types_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace self_assign_model_namespace { @@ -41561,7 +41561,7 @@ class self_assign_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace shadowing_model_namespace { @@ -42078,7 +42078,7 @@ class shadowing_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace single_argument_lpmf_model_namespace { @@ -43507,7 +43507,7 @@ class single_argument_lpmf_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace stochastic_matrices_model_namespace { @@ -43941,7 +43941,7 @@ class stochastic_matrices_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace sum_to_zero_model_namespace { @@ -44817,7 +44817,7 @@ class sum_to_zero_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace tilde_block_model_namespace { @@ -45449,7 +45449,7 @@ class tilde_block_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace transform_model_namespace { @@ -46240,7 +46240,7 @@ class transform_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace truncate_model_namespace { @@ -48989,7 +48989,7 @@ class truncate_model final : public model_base_crtp { } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace udf_tilde_stmt_conflict_model_namespace { @@ -49547,7 +49547,7 @@ class udf_tilde_stmt_conflict_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template +namespace unicode_identifiers_model_namespace { +using stan::model::model_base_crtp; +using namespace stan::math; +stan::math::profile_map profiles__; +static constexpr std::array locations_array__ = + {" (found before start of program)", + " (in 'unicode_identifiers.stan', line 7, column 2 to column 10)", + " (in 'unicode_identifiers.stan', line 8, column 2 to column 19)", + " (in 'unicode_identifiers.stan', line 9, column 2 to column 19)", + " (in 'unicode_identifiers.stan', line 19, column 2 to column 13)", + " (in 'unicode_identifiers.stan', line 20, column 2 to column 30)", + " (in 'unicode_identifiers.stan', line 21, column 2 to column 15)", + " (in 'unicode_identifiers.stan', line 12, column 2 to column 22)", + " (in 'unicode_identifiers.stan', line 13, column 2 to column 21)", + " (in 'unicode_identifiers.stan', line 2, column 2 to column 17)", + " (in 'unicode_identifiers.stan', line 3, column 8 to column 9)", + " (in 'unicode_identifiers.stan', line 3, column 2 to column 18)", + " (in 'unicode_identifiers.stan', line 4, column 8 to column 9)", + " (in 'unicode_identifiers.stan', line 4, column 2 to column 28)", + " (in 'unicode_identifiers.stan', line 8, column 8 to column 9)"}; +class unicode_identifiers_model final : public model_base_crtp { + private: + int J; + std::vector y; + std::vector \u03C3; + public: + ~unicode_identifiers_model() {} + unicode_identifiers_model(stan::io::var_context& context__, unsigned int + random_seed__ = 0, std::ostream* + pstream__ = nullptr) : model_base_crtp(0) { + int current_statement__ = 0; + // suppress unused var warning + (void) current_statement__; + using local_scalar_t__ = double; + auto base_rng__ = stan::services::util::create_rng(random_seed__, 0); + // suppress unused var warning + (void) base_rng__; + static constexpr const char* function__ = + "unicode_identifiers_model_namespace::unicode_identifiers_model"; + // suppress unused var warning + (void) function__; + local_scalar_t__ DUMMY_VAR__(std::numeric_limits::quiet_NaN()); + // suppress unused var warning + (void) DUMMY_VAR__; + try { + current_statement__ = 9; + context__.validate_dims("data initialization", "J", "int", + std::vector{}); + J = std::numeric_limits::min(); + current_statement__ = 9; + J = context__.vals_i("J")[(1 - 1)]; + current_statement__ = 9; + stan::math::check_greater_or_equal(function__, "J", J, 0); + current_statement__ = 10; + stan::math::validate_non_negative_index("y", "J", J); + current_statement__ = 11; + context__.validate_dims("data initialization", "y", "double", + std::vector{static_cast(J)}); + y = std::vector(J, std::numeric_limits::quiet_NaN()); + current_statement__ = 11; + y = context__.vals_r("y"); + current_statement__ = 12; + stan::math::validate_non_negative_index("\317\203", "J", J); + current_statement__ = 13; + context__.validate_dims("data initialization", "\317\203", "double", + std::vector{static_cast(J)}); + \u03C3 = std::vector(J, + std::numeric_limits::quiet_NaN()); + current_statement__ = 13; + \u03C3 = context__.vals_r("\317\203"); + current_statement__ = 13; + stan::math::check_greater_or_equal(function__, "\317\203", \u03C3, 0); + current_statement__ = 14; + stan::math::validate_non_negative_index("\316\270", "J", J); + } catch (const std::exception& e) { + stan::lang::rethrow_located(e, locations_array__[current_statement__]); + } + num_params_r__ = 1 + J + 1; + } + inline std::string model_name() const final { + return "unicode_identifiers_model"; + } + inline std::vector model_compile_info() const noexcept { + return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", + "stancflags = --allow-unicode --print-cpp"}; + } + // Base log prob + template * = nullptr, + stan::require_vector_like_vt* = nullptr, + stan::require_not_st_var* = nullptr> + inline stan::scalar_type_t + log_prob_impl(VecR& params_r__, VecI& params_i__, std::ostream* + pstream__ = nullptr) const { + using T__ = stan::scalar_type_t; + using local_scalar_t__ = T__; + T__ lp__(0.0); + stan::math::accumulator lp_accum__; + stan::io::deserializer in__(params_r__, params_i__); + int current_statement__ = 0; + // suppress unused var warning + (void) current_statement__; + local_scalar_t__ DUMMY_VAR__(std::numeric_limits::quiet_NaN()); + // suppress unused var warning + (void) DUMMY_VAR__; + static constexpr const char* function__ = + "unicode_identifiers_model_namespace::log_prob"; + // suppress unused var warning + (void) function__; + try { + current_statement__ = 1; + auto \u03BC = in__.template read(); + current_statement__ = 2; + auto \u03B8 = in__.template read>(J); + current_statement__ = 3; + auto \u03C4 = + in__.template read_constrain_lb(0, + lp__); + { + current_statement__ = 7; + lp_accum__.add(stan::math::normal_lpdf(\u03B8, \u03BC, + \u03C4)); + current_statement__ = 8; + lp_accum__.add(stan::math::normal_lpdf(y, \u03B8, \u03C3)); + } + } catch (const std::exception& e) { + stan::lang::rethrow_located(e, locations_array__[current_statement__]); + } + lp_accum__.add(lp__); + return lp_accum__.sum(); + } + // Reverse mode autodiff log prob + template * = nullptr, + stan::require_vector_like_vt* = nullptr, + stan::require_st_var* = nullptr> + inline stan::scalar_type_t + log_prob_impl(VecR& params_r__, VecI& params_i__, std::ostream* + pstream__ = nullptr) const { + using T__ = stan::scalar_type_t; + using local_scalar_t__ = T__; + T__ lp__(0.0); + stan::math::accumulator lp_accum__; + stan::io::deserializer in__(params_r__, params_i__); + int current_statement__ = 0; + // suppress unused var warning + (void) current_statement__; + local_scalar_t__ DUMMY_VAR__(std::numeric_limits::quiet_NaN()); + // suppress unused var warning + (void) DUMMY_VAR__; + static constexpr const char* function__ = + "unicode_identifiers_model_namespace::log_prob"; + // suppress unused var warning + (void) function__; + try { + current_statement__ = 1; + auto \u03BC = in__.template read(); + current_statement__ = 2; + auto \u03B8 = in__.template read>(J); + current_statement__ = 3; + auto \u03C4 = + in__.template read_constrain_lb(0, + lp__); + { + current_statement__ = 7; + lp_accum__.add(stan::math::normal_lpdf(\u03B8, \u03BC, + \u03C4)); + current_statement__ = 8; + lp_accum__.add(stan::math::normal_lpdf(y, \u03B8, \u03C3)); + } + } catch (const std::exception& e) { + stan::lang::rethrow_located(e, locations_array__[current_statement__]); + } + lp_accum__.add(lp__); + return lp_accum__.sum(); + } + template * = nullptr, stan::require_vector_like_vt* = nullptr, stan::require_vector_vt* = nullptr> + inline void + write_array_impl(RNG& base_rng__, VecR& params_r__, VecI& params_i__, + VecVar& vars__, const bool + emit_transformed_parameters__ = true, const bool + emit_generated_quantities__ = true, std::ostream* + pstream__ = nullptr) const { + using local_scalar_t__ = double; + stan::io::deserializer in__(params_r__, params_i__); + stan::io::serializer out__(vars__); + static constexpr bool propto__ = true; + // suppress unused var warning + (void) propto__; + double lp__ = 0.0; + // suppress unused var warning + (void) lp__; + int current_statement__ = 0; + // suppress unused var warning + (void) current_statement__; + stan::math::accumulator lp_accum__; + local_scalar_t__ DUMMY_VAR__(std::numeric_limits::quiet_NaN()); + // suppress unused var warning + (void) DUMMY_VAR__; + constexpr bool jacobian__ = false; + // suppress unused var warning + (void) jacobian__; + static constexpr const char* function__ = + "unicode_identifiers_model_namespace::write_array"; + // suppress unused var warning + (void) function__; + try { + current_statement__ = 1; + auto \u03BC = in__.template read(); + current_statement__ = 2; + auto \u03B8 = in__.template read>(J); + current_statement__ = 3; + auto \u03C4 = + in__.template read_constrain_lb(0, + lp__); + out__.write(\u03BC); + out__.write(\u03B8); + out__.write(\u03C4); + if (stan::math::logical_negation( + (stan::math::primitive_value(emit_transformed_parameters__) || + stan::math::primitive_value(emit_generated_quantities__)))) { + return ; + } + if (stan::math::logical_negation(emit_generated_quantities__)) { + return ; + } + int \u00F1abc = std::numeric_limits::min(); + current_statement__ = 5; + \u00F1abc = (1 + + stan::math::to_int( + stan::model::rvalue(\u03B8, "\316\270", + stan::model::index_uni((J - 1))))); + current_statement__ = 6; + if (pstream__) { + stan::math::stan_print(pstream__, \u00F1abc); + *(pstream__) << std::endl; + } + out__.write(\u00F1abc); + } catch (const std::exception& e) { + stan::lang::rethrow_located(e, locations_array__[current_statement__]); + } + } + template * = nullptr, + stan::require_vector_like_vt* = nullptr> + inline void + unconstrain_array_impl(const VecVar& params_r__, const VecI& params_i__, + VecVar& vars__, std::ostream* pstream__ = nullptr) const { + using local_scalar_t__ = double; + stan::io::deserializer in__(params_r__, params_i__); + stan::io::serializer out__(vars__); + int current_statement__ = 0; + // suppress unused var warning + (void) current_statement__; + local_scalar_t__ DUMMY_VAR__(std::numeric_limits::quiet_NaN()); + // suppress unused var warning + (void) DUMMY_VAR__; + try { + local_scalar_t__ \u03BC = DUMMY_VAR__; + current_statement__ = 1; + \u03BC = in__.read(); + out__.write(\u03BC); + std::vector \u03B8 = + std::vector(J, DUMMY_VAR__); + current_statement__ = 2; + stan::model::assign(\u03B8, + in__.read>(J), + "assigning variable \316\270"); + out__.write(\u03B8); + local_scalar_t__ \u03C4 = DUMMY_VAR__; + current_statement__ = 3; + \u03C4 = in__.read(); + out__.write_free_lb(0, \u03C4); + } catch (const std::exception& e) { + stan::lang::rethrow_located(e, locations_array__[current_statement__]); + } + } + template * = nullptr> + inline void + transform_inits_impl(const stan::io::var_context& context__, VecVar& + vars__, std::ostream* pstream__ = nullptr) const { + using local_scalar_t__ = double; + stan::io::serializer out__(vars__); + int current_statement__ = 0; + // suppress unused var warning + (void) current_statement__; + local_scalar_t__ DUMMY_VAR__(std::numeric_limits::quiet_NaN()); + // suppress unused var warning + (void) DUMMY_VAR__; + try { + current_statement__ = 1; + context__.validate_dims("parameter initialization", "\316\274", + "double", std::vector{}); + current_statement__ = 2; + context__.validate_dims("parameter initialization", "\316\270", + "double", std::vector{static_cast(J)}); + current_statement__ = 3; + context__.validate_dims("parameter initialization", "\317\204", + "double", std::vector{}); + local_scalar_t__ \u03BC = DUMMY_VAR__; + current_statement__ = 1; + \u03BC = context__.vals_r("\316\274")[(1 - 1)]; + out__.write(\u03BC); + std::vector \u03B8 = + std::vector(J, DUMMY_VAR__); + current_statement__ = 2; + \u03B8 = context__.vals_r("\316\270"); + out__.write(\u03B8); + local_scalar_t__ \u03C4 = DUMMY_VAR__; + current_statement__ = 3; + \u03C4 = context__.vals_r("\317\204")[(1 - 1)]; + out__.write_free_lb(0, \u03C4); + } catch (const std::exception& e) { + stan::lang::rethrow_located(e, locations_array__[current_statement__]); + } + } + inline void + get_param_names(std::vector& names__, const bool + emit_transformed_parameters__ = true, const bool + emit_generated_quantities__ = true) const { + names__ = std::vector{"\316\274", "\316\270", "\317\204"}; + if (emit_transformed_parameters__) {} + if (emit_generated_quantities__) { + std::vector temp{"\303\261abc"}; + names__.reserve(names__.size() + temp.size()); + names__.insert(names__.end(), temp.begin(), temp.end()); + } + } + inline void + get_dims(std::vector>& dimss__, const bool + emit_transformed_parameters__ = true, const bool + emit_generated_quantities__ = true) const { + dimss__ = std::vector>{std::vector{}, + std::vector{static_cast(J)}, + std::vector{}}; + if (emit_transformed_parameters__) {} + if (emit_generated_quantities__) { + std::vector> temp{std::vector{}}; + dimss__.reserve(dimss__.size() + temp.size()); + dimss__.insert(dimss__.end(), temp.begin(), temp.end()); + } + } + inline void + constrained_param_names(std::vector& param_names__, bool + emit_transformed_parameters__ = true, bool + emit_generated_quantities__ = true) const final { + param_names__.emplace_back(std::string() + "\316\274"); + for (int sym1__ = 1; sym1__ <= J; ++sym1__) { + param_names__.emplace_back(std::string() + "\316\270" + '.' + + std::to_string(sym1__)); + } + param_names__.emplace_back(std::string() + "\317\204"); + if (emit_transformed_parameters__) {} + if (emit_generated_quantities__) { + param_names__.emplace_back(std::string() + "\303\261abc"); + } + } + inline void + unconstrained_param_names(std::vector& param_names__, bool + emit_transformed_parameters__ = true, bool + emit_generated_quantities__ = true) const final { + param_names__.emplace_back(std::string() + "\316\274"); + for (int sym1__ = 1; sym1__ <= J; ++sym1__) { + param_names__.emplace_back(std::string() + "\316\270" + '.' + + std::to_string(sym1__)); + } + param_names__.emplace_back(std::string() + "\317\204"); + if (emit_transformed_parameters__) {} + if (emit_generated_quantities__) { + param_names__.emplace_back(std::string() + "\303\261abc"); + } + } + inline std::string get_constrained_sizedtypes() const { + return std::string("[{\"name\":\"\316\274\",\"type\":{\"name\":\"real\"},\"block\":\"parameters\"},{\"name\":\"\316\270\",\"type\":{\"name\":\"array\",\"length\":" + std::to_string(J) + ",\"element_type\":{\"name\":\"real\"}},\"block\":\"parameters\"},{\"name\":\"\317\204\",\"type\":{\"name\":\"real\"},\"block\":\"parameters\"},{\"name\":\"\303\261abc\",\"type\":{\"name\":\"int\"},\"block\":\"generated_quantities\"}]"); + } + inline std::string get_unconstrained_sizedtypes() const { + return std::string("[{\"name\":\"\316\274\",\"type\":{\"name\":\"real\"},\"block\":\"parameters\"},{\"name\":\"\316\270\",\"type\":{\"name\":\"array\",\"length\":" + std::to_string(J) + ",\"element_type\":{\"name\":\"real\"}},\"block\":\"parameters\"},{\"name\":\"\317\204\",\"type\":{\"name\":\"real\"},\"block\":\"parameters\"},{\"name\":\"\303\261abc\",\"type\":{\"name\":\"int\"},\"block\":\"generated_quantities\"}]"); + } + // Begin method overload boilerplate + template inline void + write_array(RNG& base_rng, Eigen::Matrix& params_r, + Eigen::Matrix& vars, const bool + emit_transformed_parameters = true, const bool + emit_generated_quantities = true, std::ostream* + pstream = nullptr) const { + const size_t num_params__ = ((1 + J) + 1); + const size_t num_transformed = emit_transformed_parameters * (0); + const size_t num_gen_quantities = emit_generated_quantities * (1); + const size_t num_to_write = num_params__ + num_transformed + + num_gen_quantities; + std::vector params_i; + vars = Eigen::Matrix::Constant(num_to_write, + std::numeric_limits::quiet_NaN()); + write_array_impl(base_rng, params_r, params_i, vars, + emit_transformed_parameters, emit_generated_quantities, pstream); + } + template inline void + write_array(RNG& base_rng, std::vector& params_r, std::vector& + params_i, std::vector& vars, bool + emit_transformed_parameters = true, bool + emit_generated_quantities = true, std::ostream* + pstream = nullptr) const { + const size_t num_params__ = ((1 + J) + 1); + const size_t num_transformed = emit_transformed_parameters * (0); + const size_t num_gen_quantities = emit_generated_quantities * (1); + const size_t num_to_write = num_params__ + num_transformed + + num_gen_quantities; + vars = std::vector(num_to_write, + std::numeric_limits::quiet_NaN()); + write_array_impl(base_rng, params_r, params_i, vars, + emit_transformed_parameters, emit_generated_quantities, pstream); + } + template inline T_ + log_prob(Eigen::Matrix& params_r, std::ostream* pstream = nullptr) const { + Eigen::Matrix params_i; + return log_prob_impl(params_r, params_i, pstream); + } + template inline T_ + log_prob(std::vector& params_r, std::vector& params_i, + std::ostream* pstream = nullptr) const { + return log_prob_impl(params_r, params_i, pstream); + } + inline void + transform_inits(const stan::io::var_context& context, + Eigen::Matrix& params_r, std::ostream* + pstream = nullptr) const final { + std::vector params_r_vec(params_r.size()); + std::vector params_i; + transform_inits(context, params_i, params_r_vec, pstream); + params_r = Eigen::Map>(params_r_vec.data(), + params_r_vec.size()); + } + inline void + transform_inits(const stan::io::var_context& context, std::vector& + params_i, std::vector& vars, std::ostream* + pstream__ = nullptr) const { + vars.resize(num_params_r__); + transform_inits_impl(context, vars, pstream__); + } + inline void + unconstrain_array(const std::vector& params_constrained, + std::vector& params_unconstrained, std::ostream* + pstream = nullptr) const { + const std::vector params_i; + params_unconstrained = std::vector(num_params_r__, + std::numeric_limits::quiet_NaN()); + unconstrain_array_impl(params_constrained, params_i, + params_unconstrained, pstream); + } + inline void + unconstrain_array(const Eigen::Matrix& params_constrained, + Eigen::Matrix& params_unconstrained, + std::ostream* pstream = nullptr) const { + const std::vector params_i; + params_unconstrained = Eigen::Matrix::Constant(num_params_r__, + std::numeric_limits::quiet_NaN()); + unconstrain_array_impl(params_constrained, params_i, + params_unconstrained, pstream); + } +}; +} +using stan_model = unicode_identifiers_model_namespace::unicode_identifiers_model; +#ifndef USING_R +// Boilerplate +stan::model::model_base& +new_model(stan::io::var_context& data_context, unsigned int seed, + std::ostream* msg_stream) { + stan_model* m = new stan_model(data_context, seed, msg_stream); + return *m; +} +stan::math::profile_map& get_stan_profile_data() { + return unicode_identifiers_model_namespace::profiles__; +} +#endif +[exit 0] + $ ../../../../../install/default/bin/stanc --allow-unicode --print-cpp user_constrain.stan // Code generated by %%NAME%% %%VERSION%% #include namespace user_constrain_model_namespace { @@ -49928,7 +50410,7 @@ class user_constrain_model final : public model_base_crtp } inline std::vector model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace variable_named_context_model_namespace { @@ -50321,7 +50803,7 @@ class variable_named_context_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template namespace vector_truncate_model_namespace { @@ -50831,7 +51313,7 @@ class vector_truncate_model final : public model_base_crtp model_compile_info() const noexcept { return std::vector{"stanc_version = %%NAME%%3 %%VERSION%%", - "stancflags = --print-cpp"}; + "stancflags = --allow-unicode --print-cpp"}; } // Base log prob template J; // number of schools + array[J] real y; // estimated treatment effect (school j) + array[J] real σ; // std err of effect estimate (school j) +} +parameters { + real μ; + array[J] real θ; + real Ï„; +} +model { + θ ~ normal(μ, Ï„); + y ~ normal(θ, σ); + +} + + +generated quantities { + int ñabc; + ñabc = 1 + to_int(θ[J-1]); + print(ñabc); +} diff --git a/test/integration/good/unicode/basic_unicode.stan b/test/integration/good/unicode/basic_unicode.stan new file mode 100644 index 0000000000..6e4a424927 --- /dev/null +++ b/test/integration/good/unicode/basic_unicode.stan @@ -0,0 +1,14 @@ +data { + int J; // number of schools + array[J] real y; // estimated treatment effect (school j) + array[J] real σ; // std err of effect estimate (school j) +} +parameters { + real μ; + array[J] real θ; + real Ï„; +} +model { + θ ~ normal(μ, Ï„); + y ~ normal(θ, σ); +} diff --git a/test/integration/good/unicode/dune b/test/integration/good/unicode/dune new file mode 100644 index 0000000000..956c7319cf --- /dev/null +++ b/test/integration/good/unicode/dune @@ -0,0 +1,18 @@ +(rule + (targets pretty.output) + (deps + (package stanc) + (:stanfiles + (glob_files *.stan))) + (action + (with-stdout-to + %{targets} + (run + %{bin:run_bin_on_args} + "%{bin:stanc} --auto-format --allow-unicode" + %{stanfiles})))) + +(rule + (alias runtest) + (action + (diff pretty.expected pretty.output))) diff --git a/test/integration/good/unicode/mixed-scripts.stan b/test/integration/good/unicode/mixed-scripts.stan new file mode 100644 index 0000000000..7804c1ce04 --- /dev/null +++ b/test/integration/good/unicode/mixed-scripts.stan @@ -0,0 +1,8 @@ + +data { + // https://www.unicode.org/reports/tr39/#Confusable_Detection + // greek + real Γ; + // cyrillic + real Г; +} diff --git a/test/integration/good/unicode/pretty.expected b/test/integration/good/unicode/pretty.expected new file mode 100644 index 0000000000..d03de2e069 --- /dev/null +++ b/test/integration/good/unicode/pretty.expected @@ -0,0 +1,47 @@ + $ ../../../../../install/default/bin/stanc --auto-format --allow-unicode basic_unicode.stan +data { + int J; // number of schools + array[J] real y; // estimated treatment effect (school j) + array[J] real σ; // std err of effect estimate (school j) +} +parameters { + real μ; + array[J] real θ; + real Ï„; +} +model { + θ ~ normal(μ, Ï„); + y ~ normal(θ, σ); +} + +[exit 0] + $ ../../../../../install/default/bin/stanc --auto-format --allow-unicode mixed-scripts.stan +data { + // https://www.unicode.org/reports/tr39/#Confusable_Detection + // greek + real Γ; + // cyrillic + real Г; +} + +[exit 0] + $ ../../../../../install/default/bin/stanc --auto-format --allow-unicode unicode_special_funs.stan +functions { + real β_lpdf(real θ, real α, real β) { + return beta_lpdf(θ | α, β); + } + + void mañana_lp(real μ) { + target += logit(μ); + } +} +parameters { + real u; +} +model { + mañana_lp(u); + u ~ β(2, 2); + target += β_lpdf(u | 2, 2); +} + +[exit 0] diff --git a/test/integration/good/unicode/unicode_special_funs.stan b/test/integration/good/unicode/unicode_special_funs.stan new file mode 100644 index 0000000000..4755499af2 --- /dev/null +++ b/test/integration/good/unicode/unicode_special_funs.stan @@ -0,0 +1,19 @@ +functions { + real β_lpdf(real θ, real α, real β) { + return beta_lpdf(θ | α, β); + } + + void mañana_lp(real µ){ + target += logit(µ); + } +} + +parameters { + real u; +} + +model { + mañana_lp(u); + u ~ β(2, 2); + target += β_lpdf(u | 2, 2); +}