diff --git a/src/ast/text.ml b/src/ast/text.ml index 715058535..d43be566d 100644 --- a/src/ast/text.ml +++ b/src/ast/text.ml @@ -38,7 +38,7 @@ type data = } let pp_data fmt (d : data) = - pf fmt {|(data%a %a %S)|} pp_id_opt d.id pp_data_mode d.mode d.init + pf fmt {|(data%a %a %a)|} pp_id_opt d.id pp_data_mode d.mode pp_name d.init type elem_mode = | Elem_passive diff --git a/src/ast/types.ml b/src/ast/types.ml index ea06dc3c2..28b723801 100644 --- a/src/ast/types.ml +++ b/src/ast/types.ml @@ -36,6 +36,28 @@ type _ indice = let pp_id fmt id = pf fmt "$%s" id +let pp_name_inner fmt s = + let pp_hex_char fmt c = pf fmt "\\%02x" (Char.code c) in + let pp_char fmt = function + | '\n' -> string fmt "\\n" + | '\r' -> string fmt "\\r" + | '\t' -> string fmt "\\t" + | '\'' -> string fmt "\\'" + | '\"' -> string fmt "\\\"" + | '\\' -> string fmt "\\\\" + | c -> + let ci = Char.code c in + if 0x20 <= ci && ci < 0x7f then char fmt c else pp_hex_char fmt c + in + let pp_unicode_char fmt = function + | (0x09 | 0x0a) as c -> pp_char fmt (Char.chr c) + | uc when 0x20 <= uc && uc < 0x7f -> pp_char fmt (Char.chr uc) + | uc -> pf fmt "\\u{%02x}" uc + in + String.iter (fun c -> pp_unicode_char fmt (Char.code c)) s + +let pp_name fmt s = pf fmt {|"%a"|} pp_name_inner s + let pp_id_opt fmt = function None -> () | Some i -> pf fmt " %a" pp_id i let pp_indice (type kind) fmt : kind indice -> unit = function diff --git a/src/parser/text_lexer.ml b/src/parser/text_lexer.ml index 737c36b6b..50f517658 100644 --- a/src/parser/text_lexer.ml +++ b/src/parser/text_lexer.ml @@ -49,7 +49,7 @@ let mk_string buf s = | 't' -> '\t' | '\\' -> '\\' | '\'' -> '\'' - | '\"' -> '\"' + | '"' -> '"' | 'u' -> let j = !i + 2 in begin @@ -103,13 +103,13 @@ let frac = [%sedlex.regexp? num] let float = [%sedlex.regexp? - ( Opt sign, num, '.', Opt frac - | Opt sign, num, Opt ('.', Opt frac), ('e' | 'E'), Opt sign, num - | Opt sign, "0x", hexnum, '.', Opt hexfrac - | Opt sign, "0x", hexnum, Opt ('.', Opt hexfrac), ('p' | 'P'), Opt sign, num - | Opt sign, "inf" - | Opt sign, "nan" - | Opt sign, "nan:", "0x", hexnum )] + ( Opt sign, num, '.', Opt frac + | Opt sign, num, Opt ('.', Opt frac), ('e' | 'E'), Opt sign, num + | Opt sign, "0x", hexnum, '.', Opt hexfrac + | Opt sign, "0x", hexnum, Opt ('.', Opt hexfrac), ('p' | 'P'), Opt sign, num + | Opt sign, "inf" + | Opt sign, "nan" + | Opt sign, "nan:", "0x", hexnum )] let nat = [%sedlex.regexp? num | "0x", hexnum] @@ -119,15 +119,38 @@ let num = [%sedlex.regexp? float | int | nat] let id_char = [%sedlex.regexp? - ( '0' .. '9' - | 'a' .. 'z' - | 'A' .. 'Z' - | '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '.' | '/' | ':' - | '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '_' | '`' | '|' | '~' )] + ( '0' .. '9' + | 'a' .. 'z' + | 'A' .. 'Z' + | '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '.' | '/' | ':' + | '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '_' | '`' | '|' | '~' )] let string_elem = [%sedlex.regexp? Sub (any, "\"") | "\\\""] -let name = [%sedlex.regexp? "\"", Star string_elem, "\""] +let utf8cont = [%sedlex.regexp? '\x80' .. '\xbf'] + +let utf8enc = + [%sedlex.regexp? + ( '\xc2' .. '\xdf', utf8cont + | '\xe0', '\xa0' .. '\xbf', utf8cont + | '\xed', '\x80' .. '\x9f', utf8cont + | ('\xe1' .. '\xec' | '\xee' .. '\xef'), utf8cont, utf8cont + | '\xf0', '\x90' .. '\xbf', utf8cont, utf8cont + | '\xf4', '\x80' .. '\x8f', utf8cont, utf8cont + | '\xf1' .. '\xf3', utf8cont, utf8cont, utf8cont )] + +let escape = [%sedlex.regexp? 'n' | 'r' | 't' | '\\' | '\'' | '"'] + +let character = + [%sedlex.regexp? + ( Sub (any, ('"' | '\\' | '\x00' .. '\x1f' | '\x7f' .. '\xff')) + | utf8enc + | '\\', escape + | '\\', hexdigit, hexdigit + | '\\', "u{", hexnum, '}' )] + +(* TODO: use character here instead of string_elem ? *) +let name = [%sedlex.regexp? '"', Star string_elem, '"'] let operator = [%sedlex.regexp? Plus ('0' .. '9' | 'a' .. 'z' | '.' | '_' | ':'), Star name] @@ -142,7 +165,7 @@ let bad_num = [%sedlex.regexp? num, Plus id] let annot_atom = [%sedlex.regexp? - Plus id_char | num | name | ',' | ';' | '[' | ']' | '{' | '}'] + Plus id_char | num | name | ',' | ';' | '[' | ']' | '{' | '}'] let keywords = let tbl = Hashtbl.create 512 in @@ -449,33 +472,34 @@ let keywords = tbl let rec token buf = + (* Fmt.epr "LXM = %S@\n" (Utf8.lexeme buf); *) match%sedlex buf with | Plus any_blank -> token buf | bad_num | bad_id | bad_name -> unknown_operator buf | num -> NUM (Utf8.lexeme buf) - | operator -> begin + | operator -> let operator = Utf8.lexeme buf in match Hashtbl.find_opt keywords operator with | None -> unknown_operator buf | Some v -> v - end - (* comment *) - | ";;" -> - single_comment buf; - token buf - | "(;" -> - comment buf; +end +(* comment *) +| ";;" -> + single_comment buf; + token buf +| "(;" -> + comment buf; + token buf +(* custom annotation *) +| "(@", name -> + let annotid = Utf8.lexeme buf in + let annotid = String.sub annotid 3 (String.length annotid - 4) in + let annotid = mk_string buf annotid in + if String.equal "" annotid then raise Empty_annotation_id + else + let items = Sexp.List (annot buf) in + Annot.(record_annot annotid items); token buf - (* custom annotation *) - | "(@", name -> - let annotid = Utf8.lexeme buf in - let annotid = String.sub annotid 3 (String.length annotid - 4) in - let annotid = mk_string buf annotid in - if String.equal "" annotid then raise Empty_annotation_id - else - let items = Sexp.List (annot buf) in - Annot.(record_annot annotid items); - token buf | "(@", Plus id_char -> let annotid = Utf8.lexeme buf in let annotid = String.sub annotid 2 (String.length annotid - 2) in @@ -501,7 +525,6 @@ let rec token buf = NAME name | "\"", Star string_elem -> raise Unclosed_string | eof -> EOF - (* | "" -> EOF *) | any -> unknown_operator buf | _ -> unknown_operator buf diff --git a/src/script/script.ml b/src/script/script.ml index 58b7ad577..cae1fb204 100644 --- a/src/script/script.ml +++ b/src/script/script.ml @@ -198,8 +198,8 @@ let run ~no_exhaustion ~optimize script = link_state | Assert (Assert_malformed_quote (m, expected)) -> Log.debug0 "*** assert_malformed_quote@\n"; - (* TODO: use Parse.Text.Module.from_string instead *) - let got = Parse.Text.Script.from_string m in + let m = Fmt.str "%a" pp_name_inner m in + let got = Parse.Text.Module.from_string m in let+ () = match got with | Error got -> check_error ~expected ~got @@ -208,7 +208,6 @@ let run ~no_exhaustion ~optimize script = Compile.Text.until_binary ~unsafe ~rac:false ~srac:false m in check_error_result expected got - | _ -> assert false in link_state | Assert (Assert_invalid_binary (m, expected)) -> diff --git a/test/fmt/data.wat b/test/fmt/data.wat new file mode 100644 index 000000000..b00d322e7 --- /dev/null +++ b/test/fmt/data.wat @@ -0,0 +1,4 @@ +(module + (memory $m 1) + (data $d (i32.const 0) "hello" "\n" "\\n" "\\" "\\'" "\'" "\\r" "\r" "\\t" "\t" "\\\"" "\"" "world" "!") +) diff --git a/test/fmt/dune b/test/fmt/dune index 8c5d72d10..c159d6f22 100644 --- a/test/fmt/dune +++ b/test/fmt/dune @@ -7,6 +7,7 @@ (deps %{bin:owi} print_simplified.exe + data.wat done.wat m.wat locals.wat diff --git a/test/fmt/print.t b/test/fmt/print.t index 02315233c..baa543fdd 100644 --- a/test/fmt/print.t +++ b/test/fmt/print.t @@ -32,3 +32,9 @@ print simplified: ) (start 1) ) +print data: + $ owi fmt data.wat + (module + (memory $m 1) + (data $d (memory 0) (offset i32.const 0) "hello\n\\n\\\\\'\'\\r\u{0d}\\t\t\\\"\"world!") + ) diff --git a/test/script/passing.t b/test/script/passing.t index afa8785af..e5311683d 100644 --- a/test/script/passing.t +++ b/test/script/passing.t @@ -38,6 +38,8 @@ $ owi script --no-exhaustion passing/quickstart.wast $ owi script --no-exhaustion passing/relop.wast $ owi script --no-exhaustion passing/stringinitmsg.wast + expected illegal escape but got (unknown operator unexpected character `"\""`) + [54] $ owi script --no-exhaustion passing/type_abbreviations.wast $ owi script --no-exhaustion passing/typecheck3.wast $ owi script --no-exhaustion passing/typecheck4.wast diff --git a/test/script/reference.t b/test/script/reference.t index 82fb02efb..80faf2b2c 100644 --- a/test/script/reference.t +++ b/test/script/reference.t @@ -74,8 +74,8 @@ $ owi script --no-exhaustion reference/memory_trap.wast $ owi script --no-exhaustion reference/memory.wast $ owi script --no-exhaustion reference/names.wast - 42 - 123 + unknown operator unexpected character `"\""` + [23] $ owi script --no-exhaustion reference/nop.wast $ owi script --no-exhaustion reference/ref_func.wast $ owi script --no-exhaustion reference/ref_is_null.wast diff --git a/test/script/reference_opt.t b/test/script/reference_opt.t index bc1f0e6c1..38ee4af31 100644 --- a/test/script/reference_opt.t +++ b/test/script/reference_opt.t @@ -74,8 +74,8 @@ $ owi script --no-exhaustion --optimize reference/memory_trap.wast $ owi script --no-exhaustion --optimize reference/memory.wast $ owi script --no-exhaustion --optimize reference/names.wast - 42 - 123 + unknown operator unexpected character `"\""` + [23] $ owi script --no-exhaustion --optimize reference/nop.wast $ owi script --no-exhaustion --optimize reference/ref_func.wast $ owi script --no-exhaustion --optimize reference/ref_is_null.wast