fix unicode support and import of repl

mikecovlee · mikecovlee · commit 95e26651b6f4 · 2023-05-08T11:16:50.000+08:00
diff --git a/csbuild/ecs_bootstrap.json b/csbuild/ecs_bootstrap.json
@@ -3,12 +3,13 @@
     "Name": "ecs_bootstrap",
     "Info": "Extended CovScript(ECS Lang) Bootstrap",
     "Author": "Michael Lee",
-    "Version": "1.5.3",
+    "Version": "1.5.4",
     "Target": "imports/ecs_bootstrap.csp",
     "Dependencies": [
         "parsergen",
         "ecs_parser",
         "ecs_generator",
+        "sdk_extension",
         "codec",
         "regex"
     ]
diff --git a/csbuild/ecs_parser.json b/csbuild/ecs_parser.json
@@ -3,7 +3,7 @@
     "Name": "ecs_parser",
     "Info": "Extended CovScript(ECS Lang) Parser",
     "Author": "Michael Lee",
-    "Version": "1.2.8",
+    "Version": "1.3.0",
     "Target": "imports/ecs_parser.csp",
     "Dependencies": [
         "parsergen",
diff --git a/imports/ecs_bootstrap.csp b/imports/ecs_bootstrap.csp
@@ -1,4 +1,4 @@
-# Bootstrap of Extended Covariant Script Generator v1.5.3
+# Bootstrap of Extended Covariant Script Generator v1.5.4
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@ package ecs_bootstrap
 import parsergen, ecs_parser, ecs_generator, codec, regex
 import sdk_extension as sdk
 
-var wrapper_ver = "1.5.3"
+var wrapper_ver = "1.5.4"
 
 function show_version_simple()
 @begin
@@ -71,7 +71,6 @@ class repl_instance
         line += "\n"
         var lexer = null
         if unicode_cvt != null
-            system.out.println("Unicode Enabled")
             lexer = new parsergen.unicode_lexer_type
             lexer.cvt = unicode_cvt
         else
@@ -165,8 +164,8 @@ function show_help()
         "   -h            Show help information\n" +
         "   -v            Show version infomation\n" +
         "   -u <CHARSET>  Set unicode charset\n" +
-        "                 CHARSET = {\"UTF8\", \"GBK\"}\n" +
-        "   -i <PATH>     Set import path\n"
+        "                 CHARSET = {\"AUTO\", \"UTF8\", \"GBK\"}\n" +
+        "   -i <PATH>     Append import path\n"
     )
 @end
     system.exit(0)
@@ -239,7 +238,10 @@ function process_args(cmd_args)
                     system.out.println("Error: Option \"-u\" not completed. Usage: \"ecs -u <CHARSET>\"")
                     system.exit(0)
                 end
-                unicode = cmd_args[++index]
+                unicode = cmd_args[++index].toupper()
+                if unicode == "AUTO"
+                    unicode = system.is_platform_windows()?"GBK":"UTF8"
+                end
             end
             case "-i"
                 if index == cmd_args.size - 1
@@ -319,14 +321,18 @@ function main(cmd_args)
                 system.exit(0)
             end
             var cvt_name = unicode.toupper()
+            if cvt_name == "AUTO"
+                cvt_name = system.is_platform_windows()?"GBK":"UTF8"
+            end
             if !codecvt_map.exist(cvt_name)
                 system.out.println("Error: unknown unicode charset \"" + cvt_name + "\".")
                 system.exit(0)
             end
             instance.unicode_cvt = codecvt_map.at(cvt_name)(unicode_ext)
-            ecs_parser.grammar.lex = ecs_parser.get_lexical([](str)->unicode_ext.build_wregex(instance.unicode_cvt.local2wide(str)))
+            ecs_parser.grammar.lex = ecs_parser.get_lexical([](str)->unicode_ext.build_wregex(instance.unicode_cvt.local2wide(str)), cvt_name)
         end
         if csx_path != null
+            sdk.set_import_path(runtime.get_import_path() + system.path.delimiter + csx_path)
             instance.codegen.ecsx_path = csx_path.split({system.path.delimiter})
         end
         instance.run(args_arr...)
@@ -366,7 +372,7 @@ function main(cmd_args)
             system.exit(0)
         end
         parser.unicode_cvt = codecvt_map.at(cvt_name)(unicode_ext)
-        ecs_parser.grammar.lex = ecs_parser.get_lexical([](str)->unicode_ext.build_wregex(parser.unicode_cvt.local2wide(str)))
+        ecs_parser.grammar.lex = ecs_parser.get_lexical([](str)->unicode_ext.build_wregex(parser.unicode_cvt.local2wide(str)), cvt_name)
     end
     parser.add_grammar("ecs-lang", ecs_parser.grammar)
     parser.from_file(file_name)
diff --git a/imports/ecs_parser.csp b/imports/ecs_parser.csp
@@ -1,4 +1,4 @@
-# Covariant Script Parser Generator: Grammar of Extended CovScript(ECS Lang) v1.2.8
+# Covariant Script Parser Generator: Grammar of Extended CovScript(ECS Lang) v1.3.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,11 +24,19 @@ import parsergen, regex
 
 constant syntax = parsergen.syntax
 
-function get_lexical(reg_builder)
+@begin
+var id_lexcicals = {
+    "ASCII" : "^[A-Za-z_]\\w*$",
+    "UTF8"  : "^[A-Za-z_\\u4E00-\\u9FA5\\u9FA6-\\u9FEF\\u3007](\\w|[\\u4E00-\\u9FA5\\u9FA6-\\u9FEF\\u3007])*$",
+    "GBK"   : "^[A-Za-z_\\uB0A1-\\uF7FE\\u8140-\\uA0FE\\uAA40-\\uFEA0\\uA996](\\w|[\\uB0A1-\\uF7FE\\u8140-\\uA0FE\\uAA40-\\uFEA0\\uA996])*$"
+}.to_hash_map()
+@end
+
+function get_lexical(reg_builder, cvt_name)
     @begin
     return {
         "endl" : reg_builder("^\\n+$"),
-        "id" :   reg_builder("^[A-Za-z_]\\w*$"),
+        "id" :   reg_builder(id_lexcicals.at(cvt_name)),
         "num" :  reg_builder("^[0-9]+\\.?([0-9]+)?$"),
         "str" :  reg_builder("^(\"|\"([^\"]|\\\\\")*\"?)$"),
         "char" : reg_builder("^(\'|\'([^\']|\\\\(0|\\\\|\'|\"|\\w))\'?)$"),
@@ -345,5 +353,5 @@ var covscript_syntax = {
 
 var grammar = new parsergen.grammar
 grammar.ext = ".*\\.(csp|csc|ecs|ecsx)"
-grammar.lex = get_lexical(regex.build)
+grammar.lex = get_lexical(regex.build, "ASCII")
 grammar.stx := covscript_syntax