diff --git a/src/rfc3987_syntax/syntax_rfc3987.lark b/src/rfc3987_syntax/syntax_rfc3987.lark index c1973c0..365cf06 100644 --- a/src/rfc3987_syntax/syntax_rfc3987.lark +++ b/src/rfc3987_syntax/syntax_rfc3987.lark @@ -57,8 +57,8 @@ ifragment: (ipchar | "/" | "?")* iunreserved: alpha | digit | "-" | "." | "_" | "~" | ucschar -ucschar: /[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]/ -iprivate: /[\uE000-\uF8FF]/ +ucschar: /[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF\U00010000-\U0001FFFD\U00020000-\U0002FFFD\U00030000-\U0003FFFD\U00040000-\U0004FFFD\U00050000-\U0005FFFD\U00060000-\U0006FFFD\U00070000-\U0007FFFD\U00080000-\U0008FFFD\U00090000-\U0009FFFD\U000A0000-\U000AFFFD\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD\U000D0000-\U000DFFFD\U000E0000-\U000EFFFD]/ +iprivate: /[\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD]/ sub_delims: "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=" @@ -67,15 +67,15 @@ ip_literal: "[" (ipv6address | ipvfuture) "]" ipvfuture: "v" hexdig+ "." (unreserved | sub_delims | ":")+ -ipv6address: h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" ls32 - | "::" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" ls32 - | h16 "::" h16 ":" h16 ":" h16 ":" h16 ":" ls32 - | h16 ":" h16 "::" h16 ":" h16 ":" h16 ":" ls32 - | h16 ":" h16 ":" h16 "::" h16 ":" h16 ":" ls32 - | h16 ":" h16 ":" h16 ":" h16 "::" h16 ":" ls32 - | h16 ":" h16 ":" h16 ":" h16 ":" h16 "::" ls32 - | h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 "::" h16 - | h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 ":" h16 "::" +ipv6address: ( h16 ":" )~6 ls32 + | "::" ( h16 ":" )~3 ls32 + | h16? "::" ( h16 ":" )~4 ls32 + | ( ( h16 ":" )? h16 )? "::" ( h16 ":" )~3 ls32 + | ( ( h16 ":" )~0..2 h16 )? "::" ( h16 ":" )~2 ls32 + | ( ( h16 ":" )~0..3 h16 )? "::" h16 ":" ls32 + | ( ( h16 ":" )~0..4 h16 )? "::" ls32 + | ( ( h16 ":" )~0..5 h16 )? "::" h16 + | ( ( h16 ":" )~0..6 h16 )? "::" h16: hexdig | hexdig hexdig diff --git a/tests/valid_syntax.json b/tests/valid_syntax.json index 6f60090..e19b45b 100644 --- a/tests/valid_syntax.json +++ b/tests/valid_syntax.json @@ -108,10 +108,60 @@ "value": "пример/тест#часть2", "expect_lark": true, "reason": "" + }, + { + "value": "ucschar/\u00A0-\uD7FF/\uF900-\uFFCF/\uFDF0-\uFFEF", + "expect_lark": true, + "reason": "`ucschar` ranges: %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF" + }, + { + "value": "ucschar/\uD800\uDC00-\uD83F\uDFFD/\uD840\uDC00-\uD87F\uDFFD/\uD880\uDC00-\uD8BF\uDFFD", + "expect_lark": true, + "reason": "`ucschar` ranges: %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD" + }, + { + "value": "ucschar/\uD8C0\uDC00-\uD8FF\uDFFD/\uD900\uDC00-\uD93F\uDFFD/\uD940\uDC00-\uD97F\uDFFD", + "expect_lark": true, + "reason": "`ucschar` ranges: %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD" + }, + { + "value": "ucschar/\uD980\uDC00-\uD9BF\uDFFD/\uD9C0\uDC00-\uD9FF\uDFFD/\uDA00\uDC00-\uDA3F\uDFFD", + "expect_lark": true, + "reason": "`ucschar` ranges: %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD" + }, + { + "value": "ucschar/\uDA40\uDC00-\uDA7F\uDFFD/\uDA80\uDC00-\uDABF\uDFFD/\uDAC0\uDC00-\uDAFF\uDFFD", + "expect_lark": true, + "reason": "`ucschar` ranges: %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD" + }, + { + "value": "ucschar/\uDB00\uDC00-\uDB3F\uDFFD/\uDB40\uDC00-\uDB7F\uDFFD", + "expect_lark": true, + "reason": "`ucschar` ranges: %xD0000-DFFFD / %xE0000-EFFFD" + }, + { + "value": "iprivate?foo=\uE000-\uF8FF&bar=\uDB80\uDC00-\uDBBF\uDFFD&baz=\uDBC0\uDC00-\uDBFF\uDFFD", + "expect_lark": true, + "reason": "`iprivate` ranges: %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD" + }, + { + "value": "//[2345:0425:2CA1:0:0:0567:5673:23b5]/foo", + "expect_lark": true, + "reason": "`IPv6address` case: 6( h16 \":\" ) ls32" + }, + { + "value": "//[2345::BEEF]/foo", + "expect_lark": true, + "reason": "`IPv6address` case: [ *5( h16 \":\" ) h16 ] \"::\" h16" + }, + { + "value": "//[2041:0:140F::875B:131B]/foo", + "expect_lark": true, + "reason": "`IPv6address` case: [ *4( h16 \":\" ) h16 ] \"::\" ls32" } ], "reserved": [], "scheme": [], "pct_encoded": [], "ipv4address": [] -} \ No newline at end of file +}