From 3293cba2f6fda435f39fd691d8182ff9fb0ea7c5 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sat, 15 Feb 2025 16:59:43 -0800 Subject: [PATCH 01/14] Rationalize name-char https://github.com/unicode-org/message-format-wg/issues/724 --- spec/message.abnf | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index a7e0807a8c..9fabc7fd33 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -54,13 +54,38 @@ match = %s".match" identifier = [namespace ":"] name namespace = name name = [bidi] name-start *name-char [bidi] -name-start = ALPHA / "_" - / %xC0-D6 / %xD8-F6 / %xF8-2FF - / %x370-37D / %x37F-61B / %x61D-1FFF / %x200C-200D - / %x2070-218F / %x2C00-2FEF / %x3001-D7FF - / %xF900-FDCF / %xFDF0-FFFC / %x10000-EFFFF -name-char = name-start / DIGIT / "-" / "." - / %xB7 / %x300-36F / %x203F-2040 +name-start = ALPHA + / %x2B ; 【+】 omit Cc %x0-1F, Whitespace %20, Ascii 【!"#$%&'()*】 + / %x5F ; 【_】 omit Ascii 【,-./0123456789:;<=>?@】 【[\]^】 + / %xA1-61B ; omit Cc %x7F-9F, Whitespace %xA0, Ascii 【`】 【{|}~】 + / %x61D-167F ; omit BidiControl %x61C + / %x1681-1FFF ; omit Whitespace %x1680 + / %x200B-200D ; omit Whitespace %x2000-200A + / %x2010-2027 ; omit BidiControl %x200E-200F + / %x2030-205E ; omit Whitespace %x2028-2029 %x202F, BidiControl %x202A-202E + / %x2060-2065 ; omit Whitespace %x205F + / %x206A-2FFF ; omit BidiControl %x2066-2069 + / %x3001-D7FF ; omit Whitespace %x3000 + / %xF900-FDCF ; omit Cs %xD800-DFFF, Co %xE000-F8FF + / %xFDF0-FFFD ; omit NChar %xFDD0-FDEF + / %x10000-1FFFD ; omit NChar %xFFFE-FFFF + / %x20000-2FFFD ; omit NChar %x1FFFE-1FFFF + / %x30000-3FFFD ; omit NChar %x2FFFE-2FFFF + / %x40000-4FFFD ; omit NChar %x3FFFE-3FFFF + / %x50000-5FFFD ; omit NChar %x4FFFE-4FFFF + / %x60000-6FFFD ; omit NChar %x5FFFE-5FFFF + / %x70000-7FFFD ; omit NChar %x6FFFE-6FFFF + / %x80000-8FFFD ; omit NChar %x7FFFE-7FFFF + / %x90000-9FFFD ; omit NChar %x8FFFE-8FFFF + / %xA0000-AFFFD ; omit NChar %x9FFFE-9FFFF + / %xB0000-BFFFD ; omit NChar %xAFFFE-AFFFF + / %xC0000-CFFFD ; omit NChar %xBFFFE-BFFFF + / %xD0000-DFFFD ; omit NChar %xCFFFE-CFFFF + / %xE0000-EFFFD ; omit NChar %xDFFFE-DFFFF + ; omit Co %xF0000-FFFFD %x100000-10FFFD, NChar %xEFFFE-EFFFF %xFFFFE-FFFFF %x10FFFE-10FFFF + +name-char = name-start / DIGIT + / %x2D-2E ; 【-.】 omit Cc %x0-1F, Whitespace 【 】, Ascii 【!"#$%&'()*+,】 ; Restrictions on characters in various contexts simple-start-char = %x01-08 ; omit NULL (%x00), HTAB (%x09) and LF (%x0A) From 677878da4bed366dc68fd7553494fda3004bfe18 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sat, 15 Feb 2025 17:21:37 -0800 Subject: [PATCH 02/14] Make corresponding changes in syntax.md --- spec/syntax.md | 60 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 7 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index 5bf9337e73..66b2c65442 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -843,15 +843,61 @@ option = identifier o "=" o (literal / variable) identifier = [namespace ":"] name namespace = name name = [bidi] name-start *name-char [bidi] -name-start = ALPHA / "_" - / %xC0-D6 / %xD8-F6 / %xF8-2FF - / %x370-37D / %x37F-61B / %x61D-1FFF / %x200C-200D - / %x2070-218F / %x2C00-2FEF / %x3001-D7FF - / %xF900-FDCF / %xFDF0-FFFC / %x10000-EFFFF -name-char = name-start / DIGIT / "-" / "." - / %xB7 / %x300-36F / %x203F-2040 +name-start = ALPHA + / %x2B ; 【+】 omit Cc %x0-1F, Whitespace %20, Ascii 【!"#$%&'()*】 + / %x5F ; 【_】 omit Ascii 【,-./0123456789:;<=>?@】 【[\]^】 + / %xA1-61B ; omit Cc %x7F-9F, Whitespace %xA0, Ascii 【`】 【{|}~】 + / %x61D-167F ; omit BidiControl %x61C + / %x1681-1FFF ; omit Whitespace %x1680 + / %x200B-200D ; omit Whitespace %x2000-200A + / %x2010-2027 ; omit BidiControl %x200E-200F + / %x2030-205E ; omit Whitespace %x2028-2029 %x202F, BidiControl %x202A-202E + / %x2060-2065 ; omit Whitespace %x205F + / %x206A-2FFF ; omit BidiControl %x2066-2069 + / %x3001-D7FF ; omit Whitespace %x3000 + / %xF900-FDCF ; omit Cs %xD800-DFFF, Co %xE000-F8FF + / %xFDF0-FFFD ; omit NChar %xFDD0-FDEF + / %x10000-1FFFD ; omit NChar %xFFFE-FFFF + / %x20000-2FFFD ; omit NChar %x1FFFE-1FFFF + / %x30000-3FFFD ; omit NChar %x2FFFE-2FFFF + / %x40000-4FFFD ; omit NChar %x3FFFE-3FFFF + / %x50000-5FFFD ; omit NChar %x4FFFE-4FFFF + / %x60000-6FFFD ; omit NChar %x5FFFE-5FFFF + / %x70000-7FFFD ; omit NChar %x6FFFE-6FFFF + / %x80000-8FFFD ; omit NChar %x7FFFE-7FFFF + / %x90000-9FFFD ; omit NChar %x8FFFE-8FFFF + / %xA0000-AFFFD ; omit NChar %x9FFFE-9FFFF + / %xB0000-BFFFD ; omit NChar %xAFFFE-AFFFF + / %xC0000-CFFFD ; omit NChar %xBFFFE-BFFFF + / %xD0000-DFFFD ; omit NChar %xCFFFE-CFFFF + / %xE0000-EFFFD ; omit NChar %xDFFFE-DFFFF + ; omit Co %xF0000-FFFFD %x100000-10FFFD, NChar %xEFFFE-EFFFF %xFFFFE-FFFFF %x10FFFE-10FFFF +name-char = name-start / DIGIT + / %x2D-2E ; 【-.】 omit Cc %x0-1F, Whitespace 【 】, Ascii 【!"#$%&'()*+,】 ``` +> [!NOTE] +> Syntactically, the definitions of `identifier` and `name-char` provide backwards compatibility over time by allowing a stable, +> wide range of characters. +> So when there is a new character in a version of Unicode, it can be used in any conformant implementation of Message Format. +> The definition currently excludes: +> * Most ASCII except for letters and characters used for numbers +> * This avoids conflicts with syntax characters, and reserves some characters for future syntax. +> * Bidirectional controls (`Bidi_C`) +> * Control characters (`GC=Cc`, but not Format characters: `GC=Cf`) +> * Whitespace characters (`WSpace`) +> * Isolated Surrogate characters (`GC=Cs`) +> * Private use characters (`GC=Co`) +> * Non-Characters (`NChar`) +> +> Although syntactically a wide range of characters are included, +> when function and implementations and message authors are creating new identifiers (for functions, options, variables, …), +> it is strongly recommended that they conform to the following to minimize confusion. +> These are also recommended for Message Format linter implementations. +> +> 1. [Unicode Default Identifier Syntax](https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax) +> 2. [Unicode General Security Profile for Identifiers](https://www.unicode.org/reports/tr39/#General_Security_Profile) + ### Escape Sequences An **_escape sequence_** is a two-character sequence starting with From 5ad3a72d013eea78da5ea2676312d3a1deb34a70 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sat, 15 Feb 2025 17:25:25 -0800 Subject: [PATCH 03/14] Fix long lines --- spec/syntax.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index 66b2c65442..418f935e4f 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -870,8 +870,9 @@ name-start = ALPHA / %xB0000-BFFFD ; omit NChar %xAFFFE-AFFFF / %xC0000-CFFFD ; omit NChar %xBFFFE-BFFFF / %xD0000-DFFFD ; omit NChar %xCFFFE-CFFFF - / %xE0000-EFFFD ; omit NChar %xDFFFE-DFFFF - ; omit Co %xF0000-FFFFD %x100000-10FFFD, NChar %xEFFFE-EFFFF %xFFFFE-FFFFF %x10FFFE-10FFFF + / %xE0000-EFFFD ; omit NChar %xDFFFE-DFFFF, + ; omit NChar %xEFFFE-EFFFF %xFFFFE-FFFFF %x10FFFE-10FFFF, + ; omit Co %xF0000-FFFFD %x100000-10FFFD name-char = name-start / DIGIT / %x2D-2E ; 【-.】 omit Cc %x0-1F, Whitespace 【 】, Ascii 【!"#$%&'()*+,】 ``` From f0d9d54543dd26565609f54254b0841da38be15d Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sat, 15 Feb 2025 17:26:30 -0800 Subject: [PATCH 04/14] Fix long lines 2 --- spec/message.abnf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 9fabc7fd33..a0d9aa5754 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -81,8 +81,9 @@ name-start = ALPHA / %xB0000-BFFFD ; omit NChar %xAFFFE-AFFFF / %xC0000-CFFFD ; omit NChar %xBFFFE-BFFFF / %xD0000-DFFFD ; omit NChar %xCFFFE-CFFFF - / %xE0000-EFFFD ; omit NChar %xDFFFE-DFFFF - ; omit Co %xF0000-FFFFD %x100000-10FFFD, NChar %xEFFFE-EFFFF %xFFFFE-FFFFF %x10FFFE-10FFFF + / %xE0000-EFFFD ; omit NChar %xDFFFE-DFFFF, + ; omit NChar %xEFFFE-EFFFF %xFFFFE-FFFFF %x10FFFE-10FFFF, + ; omit Co %xF0000-FFFFD %x100000-10FFFD name-char = name-start / DIGIT / %x2D-2E ; 【-.】 omit Cc %x0-1F, Whitespace 【 】, Ascii 【!"#$%&'()*+,】 From 39ece5a73d0305ba4a9609d33324c4a2f979d1a7 Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Sun, 16 Feb 2025 07:47:08 -0800 Subject: [PATCH 05/14] Update spec/syntax.md Co-authored-by: Eemeli Aro --- spec/syntax.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/syntax.md b/spec/syntax.md index 418f935e4f..718e4225ad 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -880,7 +880,7 @@ name-char = name-start / DIGIT > [!NOTE] > Syntactically, the definitions of `identifier` and `name-char` provide backwards compatibility over time by allowing a stable, > wide range of characters. -> So when there is a new character in a version of Unicode, it can be used in any conformant implementation of Message Format. +> So when there is a new character in a version of Unicode, it can be used in any conformant implementation of MessageFormat. > The definition currently excludes: > * Most ASCII except for letters and characters used for numbers > * This avoids conflicts with syntax characters, and reserves some characters for future syntax. From 14eb17362783c683d2eac3b8c3ba9da3bdbe8946 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sun, 16 Feb 2025 12:04:29 -0800 Subject: [PATCH 06/14] Apply suggestions from code review Co-authored-by: Addison Phillips --- spec/syntax.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index 718e4225ad..917dfb3294 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -887,14 +887,17 @@ name-char = name-start / DIGIT > * Bidirectional controls (`Bidi_C`) > * Control characters (`GC=Cc`, but not Format characters: `GC=Cf`) > * Whitespace characters (`WSpace`) -> * Isolated Surrogate characters (`GC=Cs`) +> * Surrogate code points (`GC=Cs`) > * Private use characters (`GC=Co`) > * Non-Characters (`NChar`) > -> Although syntactically a wide range of characters are included, -> when function and implementations and message authors are creating new identifiers (for functions, options, variables, …), -> it is strongly recommended that they conform to the following to minimize confusion. -> These are also recommended for Message Format linter implementations. +This syntax allows a wide range of characters in _names_ and _identifiers_. +Implementers and authors of _functions_ and _messages_, +including _functions_, _options_, and _operands_ (variable names), +SHOULD avoid creating _names_ that could produce confusion or harm usability +by choosing names consistent with the following guidelines. +MessageFormat tools, such as linters, SHOULD warn when _names_ chosen by users +violate these constraints. > > 1. [Unicode Default Identifier Syntax](https://www.unicode.org/reports/tr31/#Default_Identifier_Syntax) > 2. [Unicode General Security Profile for Identifiers](https://www.unicode.org/reports/tr39/#General_Security_Profile) From 0eb3c222f1895e7a32e7b29ade67c2885e11f3a6 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sun, 16 Feb 2025 12:07:06 -0800 Subject: [PATCH 07/14] Review comment re XML --- spec/message.abnf | 2 -- 1 file changed, 2 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index a0d9aa5754..c2a545a80d 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -49,8 +49,6 @@ local = %s".local" match = %s".match" ; Names and identifiers -; identifier matches https://www.w3.org/TR/REC-xml-names/#NT-QName -; name matches https://www.w3.org/TR/REC-xml-names/#NT-NCName but excludes U+FFFD and U+061C identifier = [namespace ":"] name namespace = name name = [bidi] name-start *name-char [bidi] From 70c3c9cc747be001ac9923b883106d7b5aa7abc0 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sun, 16 Feb 2025 21:03:00 -0800 Subject: [PATCH 08/14] Drop Cs --- spec/message.abnf | 61 ++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index c2a545a80d..56b2194a04 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -53,38 +53,39 @@ identifier = [namespace ":"] name namespace = name name = [bidi] name-start *name-char [bidi] name-start = ALPHA - / %x2B ; 【+】 omit Cc %x0-1F, Whitespace %20, Ascii 【!"#$%&'()*】 - / %x5F ; 【_】 omit Ascii 【,-./0123456789:;<=>?@】 【[\]^】 - / %xA1-61B ; omit Cc %x7F-9F, Whitespace %xA0, Ascii 【`】 【{|}~】 - / %x61D-167F ; omit BidiControl %x61C - / %x1681-1FFF ; omit Whitespace %x1680 - / %x200B-200D ; omit Whitespace %x2000-200A - / %x2010-2027 ; omit BidiControl %x200E-200F - / %x2030-205E ; omit Whitespace %x2028-2029 %x202F, BidiControl %x202A-202E - / %x2060-2065 ; omit Whitespace %x205F - / %x206A-2FFF ; omit BidiControl %x2066-2069 - / %x3001-D7FF ; omit Whitespace %x3000 - / %xF900-FDCF ; omit Cs %xD800-DFFF, Co %xE000-F8FF - / %xFDF0-FFFD ; omit NChar %xFDD0-FDEF - / %x10000-1FFFD ; omit NChar %xFFFE-FFFF - / %x20000-2FFFD ; omit NChar %x1FFFE-1FFFF - / %x30000-3FFFD ; omit NChar %x2FFFE-2FFFF - / %x40000-4FFFD ; omit NChar %x3FFFE-3FFFF - / %x50000-5FFFD ; omit NChar %x4FFFE-4FFFF - / %x60000-6FFFD ; omit NChar %x5FFFE-5FFFF - / %x70000-7FFFD ; omit NChar %x6FFFE-6FFFF - / %x80000-8FFFD ; omit NChar %x7FFFE-7FFFF - / %x90000-9FFFD ; omit NChar %x8FFFE-8FFFF - / %xA0000-AFFFD ; omit NChar %x9FFFE-9FFFF - / %xB0000-BFFFD ; omit NChar %xAFFFE-AFFFF - / %xC0000-CFFFD ; omit NChar %xBFFFE-BFFFF - / %xD0000-DFFFD ; omit NChar %xCFFFE-CFFFF - / %xE0000-EFFFD ; omit NChar %xDFFFE-DFFFF, - ; omit NChar %xEFFFE-EFFFF %xFFFFE-FFFFF %x10FFFE-10FFFF, - ; omit Co %xF0000-FFFFD %x100000-10FFFD + / %x2B ; «+» omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*» + / %x5F ; «_» omit Ascii: «,-./0123456789:;<=>?@» «[\]^» + / %xA1-61B ; omit Cc: %x7F-9F, Whitespace: %xA0, Ascii: «`» «{|}~» + / %x61D-167F ; omit BidiControl: %x61C + / %x1681-1FFF ; omit Whitespace: %x1680 + / %x200B-200D ; omit Whitespace: %x2000-200A + / %x2010-2027 ; omit BidiControl: %x200E-200F + / %x2030-205E ; omit Whitespace: %x2028-2029 %x202F, BidiControl: %x202A-202E + / %x2060-2065 ; omit Whitespace: %x205F + / %x206A-2FFF ; omit BidiControl: %x2066-2069 + / %x3001-D7FF ; omit Whitespace: %x3000 + / %xE000-FDCF ; omit Cs: %xD800-DFFF + / %xFDF0-FFFD ; omit NChar: %xFDD0-FDEF + / %x10000-1FFFD ; omit NChar: %xFFFE-FFFF + / %x20000-2FFFD ; omit NChar: %x1FFFE-1FFFF + / %x30000-3FFFD ; omit NChar: %x2FFFE-2FFFF + / %x40000-4FFFD ; omit NChar: %x3FFFE-3FFFF + / %x50000-5FFFD ; omit NChar: %x4FFFE-4FFFF + / %x60000-6FFFD ; omit NChar: %x5FFFE-5FFFF + / %x70000-7FFFD ; omit NChar: %x6FFFE-6FFFF + / %x80000-8FFFD ; omit NChar: %x7FFFE-7FFFF + / %x90000-9FFFD ; omit NChar: %x8FFFE-8FFFF + / %xA0000-AFFFD ; omit NChar: %x9FFFE-9FFFF + / %xB0000-BFFFD ; omit NChar: %xAFFFE-AFFFF + / %xC0000-CFFFD ; omit NChar: %xBFFFE-BFFFF + / %xD0000-DFFFD ; omit NChar: %xCFFFE-CFFFF + / %xE0000-EFFFD ; omit NChar: %xDFFFE-DFFFF + / %xF0000-FFFFD ; omit NChar: %xEFFFE-EFFFF + / %x100000-10FFFD ; omit NChar: %xFFFFE-FFFFF + ; omit NChar: %x10FFFE-10FFFF name-char = name-start / DIGIT - / %x2D-2E ; 【-.】 omit Cc %x0-1F, Whitespace 【 】, Ascii 【!"#$%&'()*+,】 + / %x2D-2E ; «-.» omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*+,» ; Restrictions on characters in various contexts simple-start-char = %x01-08 ; omit NULL (%x00), HTAB (%x09) and LF (%x0A) From a668429fa54b027e27920aa5e45789276be60f69 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Sun, 16 Feb 2025 21:05:29 -0800 Subject: [PATCH 09/14] Remove Cs from exclusions --- spec/syntax.md | 65 +++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index 917dfb3294..11df14db61 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -844,37 +844,39 @@ identifier = [namespace ":"] name namespace = name name = [bidi] name-start *name-char [bidi] name-start = ALPHA - / %x2B ; 【+】 omit Cc %x0-1F, Whitespace %20, Ascii 【!"#$%&'()*】 - / %x5F ; 【_】 omit Ascii 【,-./0123456789:;<=>?@】 【[\]^】 - / %xA1-61B ; omit Cc %x7F-9F, Whitespace %xA0, Ascii 【`】 【{|}~】 - / %x61D-167F ; omit BidiControl %x61C - / %x1681-1FFF ; omit Whitespace %x1680 - / %x200B-200D ; omit Whitespace %x2000-200A - / %x2010-2027 ; omit BidiControl %x200E-200F - / %x2030-205E ; omit Whitespace %x2028-2029 %x202F, BidiControl %x202A-202E - / %x2060-2065 ; omit Whitespace %x205F - / %x206A-2FFF ; omit BidiControl %x2066-2069 - / %x3001-D7FF ; omit Whitespace %x3000 - / %xF900-FDCF ; omit Cs %xD800-DFFF, Co %xE000-F8FF - / %xFDF0-FFFD ; omit NChar %xFDD0-FDEF - / %x10000-1FFFD ; omit NChar %xFFFE-FFFF - / %x20000-2FFFD ; omit NChar %x1FFFE-1FFFF - / %x30000-3FFFD ; omit NChar %x2FFFE-2FFFF - / %x40000-4FFFD ; omit NChar %x3FFFE-3FFFF - / %x50000-5FFFD ; omit NChar %x4FFFE-4FFFF - / %x60000-6FFFD ; omit NChar %x5FFFE-5FFFF - / %x70000-7FFFD ; omit NChar %x6FFFE-6FFFF - / %x80000-8FFFD ; omit NChar %x7FFFE-7FFFF - / %x90000-9FFFD ; omit NChar %x8FFFE-8FFFF - / %xA0000-AFFFD ; omit NChar %x9FFFE-9FFFF - / %xB0000-BFFFD ; omit NChar %xAFFFE-AFFFF - / %xC0000-CFFFD ; omit NChar %xBFFFE-BFFFF - / %xD0000-DFFFD ; omit NChar %xCFFFE-CFFFF - / %xE0000-EFFFD ; omit NChar %xDFFFE-DFFFF, - ; omit NChar %xEFFFE-EFFFF %xFFFFE-FFFFF %x10FFFE-10FFFF, - ; omit Co %xF0000-FFFFD %x100000-10FFFD + / %x2B ; «+» omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*» + / %x5F ; «_» omit Ascii: «,-./0123456789:;<=>?@» «[\]^» + / %xA1-61B ; omit Cc: %x7F-9F, Whitespace: %xA0, Ascii: «`» «{|}~» + / %x61D-167F ; omit BidiControl: %x61C + / %x1681-1FFF ; omit Whitespace: %x1680 + / %x200B-200D ; omit Whitespace: %x2000-200A + / %x2010-2027 ; omit BidiControl: %x200E-200F + / %x2030-205E ; omit Whitespace: %x2028-2029 %x202F, BidiControl: %x202A-202E + / %x2060-2065 ; omit Whitespace: %x205F + / %x206A-2FFF ; omit BidiControl: %x2066-2069 + / %x3001-D7FF ; omit Whitespace: %x3000 + / %xE000-FDCF ; omit Cs: %xD800-DFFF + / %xFDF0-FFFD ; omit NChar: %xFDD0-FDEF + / %x10000-1FFFD ; omit NChar: %xFFFE-FFFF + / %x20000-2FFFD ; omit NChar: %x1FFFE-1FFFF + / %x30000-3FFFD ; omit NChar: %x2FFFE-2FFFF + / %x40000-4FFFD ; omit NChar: %x3FFFE-3FFFF + / %x50000-5FFFD ; omit NChar: %x4FFFE-4FFFF + / %x60000-6FFFD ; omit NChar: %x5FFFE-5FFFF + / %x70000-7FFFD ; omit NChar: %x6FFFE-6FFFF + / %x80000-8FFFD ; omit NChar: %x7FFFE-7FFFF + / %x90000-9FFFD ; omit NChar: %x8FFFE-8FFFF + / %xA0000-AFFFD ; omit NChar: %x9FFFE-9FFFF + / %xB0000-BFFFD ; omit NChar: %xAFFFE-AFFFF + / %xC0000-CFFFD ; omit NChar: %xBFFFE-BFFFF + / %xD0000-DFFFD ; omit NChar: %xCFFFE-CFFFF + / %xE0000-EFFFD ; omit NChar: %xDFFFE-DFFFF + / %xF0000-FFFFD ; omit NChar: %xEFFFE-EFFFF + / %x100000-10FFFD ; omit NChar: %xFFFFE-FFFFF + ; omit NChar: %x10FFFE-10FFFF + name-char = name-start / DIGIT - / %x2D-2E ; 【-.】 omit Cc %x0-1F, Whitespace 【 】, Ascii 【!"#$%&'()*+,】 + / %x2D-2E ; «-.» omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*+,» ``` > [!NOTE] @@ -888,9 +890,8 @@ name-char = name-start / DIGIT > * Control characters (`GC=Cc`, but not Format characters: `GC=Cf`) > * Whitespace characters (`WSpace`) > * Surrogate code points (`GC=Cs`) -> * Private use characters (`GC=Co`) > * Non-Characters (`NChar`) -> + This syntax allows a wide range of characters in _names_ and _identifiers_. Implementers and authors of _functions_ and _messages_, including _functions_, _options_, and _operands_ (variable names), From 479e41cad38340dce5881336feaf63afd4127dd0 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Mon, 17 Feb 2025 07:33:29 -0800 Subject: [PATCH 10/14] Fix reference to XML --- spec/syntax.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index 11df14db61..1323b60580 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -786,11 +786,9 @@ has been applied to both. > implementations can often substitute checking for actually applying normalization > to _name_ values. -Valid content for _names_ is based on Namespaces in XML 1.0's -[NCName](https://www.w3.org/TR/xml-names/#NT-NCName). -This is different from XML's [Name](https://www.w3.org/TR/xml/#NT-Name) -in that it MUST NOT contain a U+003A COLON `:`. -Otherwise, the set of characters allowed in a _name_ is large. +The _names_ are [immutable identifiers](https://www.unicode.org/reports/tr31/#Immutable_Identifier_Syntax). +They are similar to Namespaces in XML 1.0's [NCName](https://www.w3.org/TR/xml-names/#NT-NCName), +but have been updated to be more consistent. > [!NOTE] > _External variables_ can be passed in that are not valid _names_. From 1bf206aa2b5a7f20e2f9cc54c19d100cdb150de3 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Mon, 17 Feb 2025 07:49:37 -0800 Subject: [PATCH 11/14] Put 'omit' on line before --- spec/message.abnf | 62 +++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 56b2194a04..b00e8e6955 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -53,39 +53,39 @@ identifier = [namespace ":"] name namespace = name name = [bidi] name-start *name-char [bidi] name-start = ALPHA - / %x2B ; «+» omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*» - / %x5F ; «_» omit Ascii: «,-./0123456789:;<=>?@» «[\]^» - / %xA1-61B ; omit Cc: %x7F-9F, Whitespace: %xA0, Ascii: «`» «{|}~» - / %x61D-167F ; omit BidiControl: %x61C - / %x1681-1FFF ; omit Whitespace: %x1680 - / %x200B-200D ; omit Whitespace: %x2000-200A - / %x2010-2027 ; omit BidiControl: %x200E-200F - / %x2030-205E ; omit Whitespace: %x2028-2029 %x202F, BidiControl: %x202A-202E - / %x2060-2065 ; omit Whitespace: %x205F - / %x206A-2FFF ; omit BidiControl: %x2066-2069 - / %x3001-D7FF ; omit Whitespace: %x3000 - / %xE000-FDCF ; omit Cs: %xD800-DFFF - / %xFDF0-FFFD ; omit NChar: %xFDD0-FDEF - / %x10000-1FFFD ; omit NChar: %xFFFE-FFFF - / %x20000-2FFFD ; omit NChar: %x1FFFE-1FFFF - / %x30000-3FFFD ; omit NChar: %x2FFFE-2FFFF - / %x40000-4FFFD ; omit NChar: %x3FFFE-3FFFF - / %x50000-5FFFD ; omit NChar: %x4FFFE-4FFFF - / %x60000-6FFFD ; omit NChar: %x5FFFE-5FFFF - / %x70000-7FFFD ; omit NChar: %x6FFFE-6FFFF - / %x80000-8FFFD ; omit NChar: %x7FFFE-7FFFF - / %x90000-9FFFD ; omit NChar: %x8FFFE-8FFFF - / %xA0000-AFFFD ; omit NChar: %x9FFFE-9FFFF - / %xB0000-BFFFD ; omit NChar: %xAFFFE-AFFFF - / %xC0000-CFFFD ; omit NChar: %xBFFFE-BFFFF - / %xD0000-DFFFD ; omit NChar: %xCFFFE-CFFFF - / %xE0000-EFFFD ; omit NChar: %xDFFFE-DFFFF - / %xF0000-FFFFD ; omit NChar: %xEFFFE-EFFFF - / %x100000-10FFFD ; omit NChar: %xFFFFE-FFFFF - ; omit NChar: %x10FFFE-10FFFF + ; omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*» + / %x2B ; «+» omit Ascii: «,-./0123456789:;<=>?@» «[\]^» + / %x5F ; «_» omit Cc: %x7F-9F, Whitespace: %xA0, Ascii: «`» «{|}~» + / %xA1-61B ; omit BidiControl: %x61C + / %x61D-167F ; omit Whitespace: %x1680 + / %x1681-1FFF ; omit Whitespace: %x2000-200A + / %x200B-200D ; omit BidiControl: %x200E-200F + / %x2010-2027 ; omit Whitespace: %x2028-2029 %x202F, BidiControl: %x202A-202E + / %x2030-205E ; omit Whitespace: %x205F + / %x2060-2065 ; omit BidiControl: %x2066-2069 + / %x206A-2FFF ; omit Whitespace: %x3000 + / %x3001-D7FF ; omit Cs: %xD800-DFFF + / %xE000-FDCF ; omit NChar: %xFDD0-FDEF + / %xFDF0-FFFD ; omit NChar: %xFFFE-FFFF + / %x10000-1FFFD ; omit NChar: %x1FFFE-1FFFF + / %x20000-2FFFD ; omit NChar: %x2FFFE-2FFFF + / %x30000-3FFFD ; omit NChar: %x3FFFE-3FFFF + / %x40000-4FFFD ; omit NChar: %x4FFFE-4FFFF + / %x50000-5FFFD ; omit NChar: %x5FFFE-5FFFF + / %x60000-6FFFD ; omit NChar: %x6FFFE-6FFFF + / %x70000-7FFFD ; omit NChar: %x7FFFE-7FFFF + / %x80000-8FFFD ; omit NChar: %x8FFFE-8FFFF + / %x90000-9FFFD ; omit NChar: %x9FFFE-9FFFF + / %xA0000-AFFFD ; omit NChar: %xAFFFE-AFFFF + / %xB0000-BFFFD ; omit NChar: %xBFFFE-BFFFF + / %xC0000-CFFFD ; omit NChar: %xCFFFE-CFFFF + / %xD0000-DFFFD ; omit NChar: %xDFFFE-DFFFF + / %xE0000-EFFFD ; omit NChar: %xEFFFE-EFFFF + / %xF0000-FFFFD ; omit NChar: %xFFFFE-FFFFF + / %x100000-10FFFD ; omit NChar: %x10FFFE-10FFFF name-char = name-start / DIGIT - / %x2D-2E ; «-.» omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*+,» + / %x2D-2E ; «-.» ; Restrictions on characters in various contexts simple-start-char = %x01-08 ; omit NULL (%x00), HTAB (%x09) and LF (%x0A) From 8ff73f745d901ac6daf0200825a67e28bcdab95f Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Mon, 17 Feb 2025 07:50:52 -0800 Subject: [PATCH 12/14] Put omit on previous line --- spec/syntax.md | 62 +++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index 1323b60580..e3c2e7bd8e 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -842,39 +842,39 @@ identifier = [namespace ":"] name namespace = name name = [bidi] name-start *name-char [bidi] name-start = ALPHA - / %x2B ; «+» omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*» - / %x5F ; «_» omit Ascii: «,-./0123456789:;<=>?@» «[\]^» - / %xA1-61B ; omit Cc: %x7F-9F, Whitespace: %xA0, Ascii: «`» «{|}~» - / %x61D-167F ; omit BidiControl: %x61C - / %x1681-1FFF ; omit Whitespace: %x1680 - / %x200B-200D ; omit Whitespace: %x2000-200A - / %x2010-2027 ; omit BidiControl: %x200E-200F - / %x2030-205E ; omit Whitespace: %x2028-2029 %x202F, BidiControl: %x202A-202E - / %x2060-2065 ; omit Whitespace: %x205F - / %x206A-2FFF ; omit BidiControl: %x2066-2069 - / %x3001-D7FF ; omit Whitespace: %x3000 - / %xE000-FDCF ; omit Cs: %xD800-DFFF - / %xFDF0-FFFD ; omit NChar: %xFDD0-FDEF - / %x10000-1FFFD ; omit NChar: %xFFFE-FFFF - / %x20000-2FFFD ; omit NChar: %x1FFFE-1FFFF - / %x30000-3FFFD ; omit NChar: %x2FFFE-2FFFF - / %x40000-4FFFD ; omit NChar: %x3FFFE-3FFFF - / %x50000-5FFFD ; omit NChar: %x4FFFE-4FFFF - / %x60000-6FFFD ; omit NChar: %x5FFFE-5FFFF - / %x70000-7FFFD ; omit NChar: %x6FFFE-6FFFF - / %x80000-8FFFD ; omit NChar: %x7FFFE-7FFFF - / %x90000-9FFFD ; omit NChar: %x8FFFE-8FFFF - / %xA0000-AFFFD ; omit NChar: %x9FFFE-9FFFF - / %xB0000-BFFFD ; omit NChar: %xAFFFE-AFFFF - / %xC0000-CFFFD ; omit NChar: %xBFFFE-BFFFF - / %xD0000-DFFFD ; omit NChar: %xCFFFE-CFFFF - / %xE0000-EFFFD ; omit NChar: %xDFFFE-DFFFF - / %xF0000-FFFFD ; omit NChar: %xEFFFE-EFFFF - / %x100000-10FFFD ; omit NChar: %xFFFFE-FFFFF - ; omit NChar: %x10FFFE-10FFFF + ; omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*» + / %x2B ; «+» omit Ascii: «,-./0123456789:;<=>?@» «[\]^» + / %x5F ; «_» omit Cc: %x7F-9F, Whitespace: %xA0, Ascii: «`» «{|}~» + / %xA1-61B ; omit BidiControl: %x61C + / %x61D-167F ; omit Whitespace: %x1680 + / %x1681-1FFF ; omit Whitespace: %x2000-200A + / %x200B-200D ; omit BidiControl: %x200E-200F + / %x2010-2027 ; omit Whitespace: %x2028-2029 %x202F, BidiControl: %x202A-202E + / %x2030-205E ; omit Whitespace: %x205F + / %x2060-2065 ; omit BidiControl: %x2066-2069 + / %x206A-2FFF ; omit Whitespace: %x3000 + / %x3001-D7FF ; omit Cs: %xD800-DFFF + / %xE000-FDCF ; omit NChar: %xFDD0-FDEF + / %xFDF0-FFFD ; omit NChar: %xFFFE-FFFF + / %x10000-1FFFD ; omit NChar: %x1FFFE-1FFFF + / %x20000-2FFFD ; omit NChar: %x2FFFE-2FFFF + / %x30000-3FFFD ; omit NChar: %x3FFFE-3FFFF + / %x40000-4FFFD ; omit NChar: %x4FFFE-4FFFF + / %x50000-5FFFD ; omit NChar: %x5FFFE-5FFFF + / %x60000-6FFFD ; omit NChar: %x6FFFE-6FFFF + / %x70000-7FFFD ; omit NChar: %x7FFFE-7FFFF + / %x80000-8FFFD ; omit NChar: %x8FFFE-8FFFF + / %x90000-9FFFD ; omit NChar: %x9FFFE-9FFFF + / %xA0000-AFFFD ; omit NChar: %xAFFFE-AFFFF + / %xB0000-BFFFD ; omit NChar: %xBFFFE-BFFFF + / %xC0000-CFFFD ; omit NChar: %xCFFFE-CFFFF + / %xD0000-DFFFD ; omit NChar: %xDFFFE-DFFFF + / %xE0000-EFFFD ; omit NChar: %xEFFFE-EFFFF + / %xF0000-FFFFD ; omit NChar: %xFFFFE-FFFFF + / %x100000-10FFFD ; omit NChar: %x10FFFE-10FFFF name-char = name-start / DIGIT - / %x2D-2E ; «-.» omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*+,» + / %x2D-2E ; «-.» ``` > [!NOTE] From 074346a94f490872abe03ecfd95b5897adfd13e6 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Mon, 17 Feb 2025 07:56:37 -0800 Subject: [PATCH 13/14] Drop XML reference --- spec/syntax.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index e3c2e7bd8e..0c5c55e721 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -777,6 +777,8 @@ that is, if they consist of the same sequence of Unicode code points after [Unicode Normalization Form C](https://unicode.org/reports/tr15/) ("NFC") has been applied to both. +The _names_ are [immutable identifiers](https://www.unicode.org/reports/tr31/#Immutable_Identifier_Syntax). + > [!NOTE] > Implementations are not required to normalize all _names_. > Comparisons of _name_ values only need be done "as-if" normalization @@ -786,10 +788,6 @@ has been applied to both. > implementations can often substitute checking for actually applying normalization > to _name_ values. -The _names_ are [immutable identifiers](https://www.unicode.org/reports/tr31/#Immutable_Identifier_Syntax). -They are similar to Namespaces in XML 1.0's [NCName](https://www.w3.org/TR/xml-names/#NT-NCName), -but have been updated to be more consistent. - > [!NOTE] > _External variables_ can be passed in that are not valid _names_. > Such variables cannot be referenced in a _message_, From bffc0984f5b33509020f662ef18f777dd029ba9c Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Mon, 17 Feb 2025 09:08:40 -0800 Subject: [PATCH 14/14] Apply suggestions from code review Co-authored-by: Addison Phillips Co-authored-by: Eemeli Aro --- spec/message.abnf | 6 ++---- spec/syntax.md | 4 +--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index b00e8e6955..161d2cc1ff 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -53,7 +53,7 @@ identifier = [namespace ":"] name namespace = name name = [bidi] name-start *name-char [bidi] name-start = ALPHA - ; omit Cc: %x0-1F, Whitespace: « », Ascii: «!"#$%&'()*» + ; omit Cc: %x0-1F, Whitespace: SPACE, Ascii: «!"#$%&'()*» / %x2B ; «+» omit Ascii: «,-./0123456789:;<=>?@» «[\]^» / %x5F ; «_» omit Cc: %x7F-9F, Whitespace: %xA0, Ascii: «`» «{|}~» / %xA1-61B ; omit BidiControl: %x61C @@ -83,9 +83,7 @@ name-start = ALPHA / %xE0000-EFFFD ; omit NChar: %xEFFFE-EFFFF / %xF0000-FFFFD ; omit NChar: %xFFFFE-FFFFF / %x100000-10FFFD ; omit NChar: %x10FFFE-10FFFF - -name-char = name-start / DIGIT - / %x2D-2E ; «-.» +name-char = name-start / DIGIT / "-" / "." ; Restrictions on characters in various contexts simple-start-char = %x01-08 ; omit NULL (%x00), HTAB (%x09) and LF (%x0A) diff --git a/spec/syntax.md b/spec/syntax.md index 0c5c55e721..6b9a14f4c8 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -870,9 +870,7 @@ name-start = ALPHA / %xE0000-EFFFD ; omit NChar: %xEFFFE-EFFFF / %xF0000-FFFFD ; omit NChar: %xFFFFE-FFFFF / %x100000-10FFFD ; omit NChar: %x10FFFE-10FFFF - -name-char = name-start / DIGIT - / %x2D-2E ; «-.» +name-char = name-start / DIGIT / "-" / "." ``` > [!NOTE]