more unicode char tests

jangko · jangko · commit c1f75b87f3eb · 2021-11-01T13:13:58.000+07:00
addd more tests in tests/execution/execution.toml:
- unicode chars as UTF-8
- unicode chars as variable length escaped hex
- unicode chars as fixed length escaped hex
- unicode chars as fixed length escaped hex with surrogate pair
- unicode chars as variable length escaped hex for SMP chars
diff --git a/tests/execution/engine.toml b/tests/execution/engine.toml
@@ -882,3 +882,73 @@ query($mm: String) {
   result = """
 null
 """
+
+[[units]]
+  name = "unicode chars as UTF-8"
+  code = """
+type Query {
+  echo(arg: String): String
+}
+query {
+  echo(arg: "hello 你好 世界 world")
+}
+"""
+  result = """
+{"echo":"hello 你好 世界 world"}
+"""
+
+[[units]]
+  name = "unicode chars as variable length escaped hex"
+  code = """
+type Query {
+  echo(arg: String): String
+}
+query {
+  echo(arg: "hello \\u{4f60}\\u{597d} \\u{4e16}\\u{754c} world")
+}
+"""
+  result = """
+{"echo":"hello 你好 世界 world"}
+"""
+
+[[units]]
+  name = "unicode chars as fixed length escaped hex"
+  code = """
+type Query {
+  echo(arg: String): String
+}
+query {
+  echo(arg: "hello \\u4f60\\u597d \\u4e16\\u754c world")
+}
+"""
+  result = """
+{"echo":"hello 你好 世界 world"}
+"""
+
+[[units]]
+  name = "unicode chars as fixed length escaped hex with surrogate pair"
+  code = """
+type Query {
+  echo(arg: String): String
+}
+query {
+  echo(arg: "brahmi \\ud804\\udc0a\\ud804\\udc0b\\ud804\\udc0c abc")
+}
+"""
+  result = """
+{"echo":"brahmi 𑀊𑀋𑀌 abc"}
+"""
+
+[[units]]
+  name = "unicode chars as variable length escaped hex for SMP chars"
+  code = """
+type Query {
+  echo(arg: String): String
+}
+query {
+  echo(arg: "brahmi \\u{1100a}\\u{1100b}\\u{1100c} abc")
+}
+"""
+  result = """
+{"echo":"brahmi 𑀊𑀋𑀌 abc"}
+"""
diff --git a/tests/test_lexer.nim b/tests/test_lexer.nim
@@ -190,11 +190,13 @@ suite "full range Unicode(UTF-8) support":
     scanEsc("\\u{E000}", "\u{E000}")
     scanEsc("\\uD83D\\uDCA9", "\u{1F4A9}")
     scanEsc("\\u{10FFFF}", "\u{10FFFF}")
+    scanEsc("\\u{0A}", "\u{0A}")
+    scanEsc("\\u{A}", "\u{A}")
 
   test "orphaned surrogate":
     scanEscError("\\uD800", "Orphaned surrogate codepoint detected \'D800\'")
     scanEscError("\\uD801\\", "Orphaned surrogate codepoint detected \'D801\'")
-    
+
   test "invalid sequence":
     scanEscError("\\uD802\\u", "Invalid unicode sequence ''")
     scanEscError("\\uDBFF\\uFFFF", "Invalid unicode sequence 'DBFF\\uFFFF'")