@@ -27,9 +27,9 @@ let string s =
2727 while ! i < String. length s - 1 do
2828 let c = if s.[! i] <> '\\' then s.[! i] else
2929 match (incr i; s.[! i]) with
30- | 'n' -> '\n '
31- | 'r' -> '\r '
32- | 't' -> '\t '
30+ | 'n' -> '\x0a '
31+ | 'r' -> '\x0d '
32+ | 't' -> '\x09 '
3333 | '\\' -> '\\'
3434 | '\' ' -> '\' '
3535 | '\"' -> '\"'
@@ -61,10 +61,12 @@ let letter = ['a'-'z''A'-'Z']
6161let symbol =
6262 ['+''-''*''/''\\''^''~''=''<''>''!''?''@''#''$''%''&''|'':''`''.''\' ']
6363
64- let space = [' ''\t''\n''\r' ]
64+ let ascii_newline = ['\x0a''\x0d' ]
65+ let newline = ascii_newline | " \x0a\x0d "
66+ let space = [' ''\x09''\x0a''\x0d' ]
6567let control = ['\x00' - '\x1f' ] # space
6668let ascii = ['\x00' - '\x7f' ]
67- let ascii_no_nl = ascii # '\x0a'
69+ let ascii_no_nl = ascii # ascii_newline
6870let utf8cont = ['\x80' - '\xbf' ]
6971let utf8enc =
7072 ['\xc2' - '\xdf' ] utf8cont
@@ -127,8 +129,8 @@ rule token = parse
127129 | float as s { FLOAT s }
128130
129131 | string as s { STRING (string s) }
130- | '"' character* ('\n' | eof) { error lexbuf " unclosed string literal" }
131- | '"' character* [ '\x00' - '\x09''\x0b' - '\x1f''\x7f' ]
132+ | '"' character* (newline | eof) { error lexbuf " unclosed string literal" }
133+ | '"' character* (control#ascii_newline)
132134 { error lexbuf " illegal control character in string literal" }
133135 | '"' character* '\\' _
134136 { error_nest (Lexing. lexeme_end_p lexbuf) lexbuf " illegal escape" }
@@ -698,11 +700,11 @@ rule token = parse
698700 | id as s { VAR s }
699701
700702 | " ;;" utf8_no_nl* eof { EOF }
701- | " ;;" utf8_no_nl* '\n' { Lexing. new_line lexbuf; token lexbuf }
703+ | " ;;" utf8_no_nl* newline { Lexing. new_line lexbuf; token lexbuf }
702704 | " ;;" utf8_no_nl* { token lexbuf (* causes error on following position *) }
703705 | " (;" { comment (Lexing. lexeme_start_p lexbuf) lexbuf; token lexbuf }
704- | space#'\n' { token lexbuf }
705- | '\n' { Lexing. new_line lexbuf; token lexbuf }
706+ | space#ascii_newline { token lexbuf }
707+ | newline { Lexing. new_line lexbuf; token lexbuf }
706708 | eof { EOF }
707709
708710 | reserved { unknown lexbuf }
@@ -713,7 +715,7 @@ rule token = parse
713715and comment start = parse
714716 | " ;)" { () }
715717 | " (;" { comment (Lexing. lexeme_start_p lexbuf) lexbuf; comment start lexbuf }
716- | '\n' { Lexing. new_line lexbuf; comment start lexbuf }
718+ | newline { Lexing. new_line lexbuf; comment start lexbuf }
717719 | utf8_no_nl { comment start lexbuf }
718720 | eof { error_nest start lexbuf " unclosed comment" }
719721 | _ { error lexbuf " malformed UTF-8 encoding" }
0 commit comments