From bf1213b4bcc3c2ea08a496fc45d4034c031f4768 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 24 Feb 2025 14:54:45 +0100 Subject: [PATCH 1/8] UnicodeData line from the proposal --- unicodetools/data/ucd/dev/UnicodeData.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 26b78592a..f2c4f2748 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,4 @@ +208F;MODIFIER LETTER HIGH AND LOW VERTICAL LINE;Sk;0;ON;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 515ca950289c038768c9d6907def54721a3e91eb Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 24 Feb 2025 15:00:18 +0100 Subject: [PATCH 2/8] =?UTF-8?q?lb=3DBB=20like=20=CB=88=CB=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- unicodetools/data/ucd/dev/LineBreak.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index b7e7fae28..53ec3ebad 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2025-02-14, 15:13:07 GMT +# Date: 2025-02-24, 13:56:14 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -960,6 +960,7 @@ 208A..208C ; AL # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; OP # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; CL # Pe SUBSCRIPT RIGHT PARENTHESIS +208F ; BB # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090..209C ; AL # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20A6 ; PR # Sc [7] EURO-CURRENCY SIGN..NAIRA SIGN 20A7 ; PO # Sc PESETA SIGN From 43a039bfb66ea883fc9b62926c8a53dc3226b7ff Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 24 Feb 2025 15:01:44 +0100 Subject: [PATCH 3/8] Common --- unicodetools/data/ucd/dev/Scripts.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 139c00537..b908c4dc7 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,4 @@ +208F; Common # Scripts-17.0.0.txt # Date: 2025-01-27, 18:09:39 GMT # © 2025 Unicode®, Inc. From 9b3576f53a915217c724c91072410d61cc969b8d Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 24 Feb 2025 15:02:39 +0100 Subject: [PATCH 4/8] Diacritic --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 878a4b104..ae0e8bd0c 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,4 @@ +208F ; Diacritic # PropList-17.0.0.txt # Date: 2025-02-18, 12:46:41 GMT # © 2025 Unicode®, Inc. From c1d158b60eab32db311a65ceef3471be5238fa38 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 24 Feb 2025 15:04:12 +0100 Subject: [PATCH 5/8] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 8 +++++--- unicodetools/data/ucd/dev/EastAsianWidth.txt | 3 ++- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- unicodetools/data/ucd/dev/Scripts.txt | 6 +++--- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- unicodetools/data/ucd/dev/VerticalOrientation.txt | 3 ++- unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt | 9 +++++---- .../data/ucd/dev/extracted/DerivedCombiningClass.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedEastAsianWidth.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedGeneralCategory.txt | 8 ++++---- unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt | 9 +++++---- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 5 +++-- 13 files changed, 42 insertions(+), 32 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 18d53a3f0..8df4b8959 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2025-01-27, 18:09:08 GMT +# Date: 2025-02-24, 14:03:07 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2072,6 +2072,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 0CDC ; 17.0 # KANNADA ARCHAIC SHRII 1ACF..1ADD ; 17.0 # [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW 1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE +208F ; 17.0 # MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2B96 ; 17.0 # EQUALS SIGN WITH INFINITY ABOVE A7CE..A7CF ; 17.0 # [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D2 ; 17.0 # LATIN CAPITAL LETTER DOUBLE THORN @@ -2116,6 +2117,6 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 2B73A..2B73E ; 17.0 # [5] CJK UNIFIED IDEOGRAPH-2B73A..CJK UNIFIED IDEOGRAPH-2B73E 323B0..33479 ; 17.0 # [4298] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 4836 +# Total code points: 4837 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index c5df66f61..a4c185a73 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2025-02-14, 00:13:14 GMT +# Date: 2025-02-24, 14:03:25 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3274,6 +3274,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 2066..206F ; Case_Ignorable # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES 2071 ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER N +208F ; Case_Ignorable # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090..209C ; Case_Ignorable # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20D0..20DC ; Case_Ignorable # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; Case_Ignorable # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH @@ -3554,7 +3555,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2798 +# Total code points: 2799 # ================================================ @@ -11812,6 +11813,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 208A..208C ; Grapheme_Base # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Grapheme_Base # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Grapheme_Base # Pe SUBSCRIPT RIGHT PARENTHESIS +208F ; Grapheme_Base # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090..209C ; Grapheme_Base # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20C0 ; Grapheme_Base # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 2100..2101 ; Grapheme_Base # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT @@ -13016,7 +13018,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Grapheme_Base # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 157523 +# Total code points: 157524 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index a5c270b19..25aaa3b9c 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2025-01-27, 18:09:15 GMT +# Date: 2025-02-24, 14:03:30 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -973,6 +973,7 @@ 208A..208C ; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; N # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; N # Pe SUBSCRIPT RIGHT PARENTHESIS +208F ; N # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090..209C ; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN 20A9 ; H # Sc WON SIGN diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index ae0e8bd0c..cd1423518 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,6 +1,5 @@ -208F ; Diacritic # PropList-17.0.0.txt -# Date: 2025-02-18, 12:46:41 GMT +# Date: 2025-02-24, 14:03:40 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1034,6 +1033,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI 1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA 1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA +208F ; Diacritic # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2E2F ; Diacritic # Lm VERTICAL TILDE 302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK @@ -1171,7 +1171,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1247 +# Total code points: 1248 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index b908c4dc7..f876fa0e5 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,6 +1,5 @@ -208F; Common # Scripts-17.0.0.txt -# Date: 2025-01-27, 18:09:39 GMT +# Date: 2025-02-24, 14:03:53 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -155,6 +154,7 @@ 208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS +208F ; Common # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 20A0..20C0 ; Common # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; Common # L& DOUBLE-STRUCK CAPITAL C @@ -639,7 +639,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9123 +# Total code points: 9124 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index f2c4f2748..ed9f291b5 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,4 +1,3 @@ -208F;MODIFIER LETTER HIGH AND LOW VERTICAL LINE;Sk;0;ON;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -7533,6 +7532,7 @@ 208C;SUBSCRIPT EQUALS SIGN;Sm;0;ON; 003D;;;;N;;;;; 208D;SUBSCRIPT LEFT PARENTHESIS;Ps;0;ON; 0028;;;;Y;SUBSCRIPT OPENING PARENTHESIS;;;; 208E;SUBSCRIPT RIGHT PARENTHESIS;Pe;0;ON; 0029;;;;Y;SUBSCRIPT CLOSING PARENTHESIS;;;; +208F;MODIFIER LETTER HIGH AND LOW VERTICAL LINE;Sk;0;ON;;;;;N;;;;; 2090;LATIN SUBSCRIPT SMALL LETTER A;Lm;0;L; 0061;;;;N;;;;; 2091;LATIN SUBSCRIPT SMALL LETTER E;Lm;0;L; 0065;;;;N;;;;; 2092;LATIN SUBSCRIPT SMALL LETTER O;Lm;0;L; 006F;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 71770a13d..311e5d75e 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2025-01-29 +# Date: 2025-02-24, 14:03:55 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -903,6 +903,7 @@ 208A..208C ; R # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; R # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; R # Pe SUBSCRIPT RIGHT PARENTHESIS +208F ; R # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090..209C ; R # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20C0 ; R # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 20D0..20DC ; R # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index a4dc4250a..7de8f3cd6 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2025-01-27, 18:09:10 GMT +# Date: 2025-02-24, 14:03:24 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1234,8 +1234,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 810584 code points not listed here. -# Total code points: 1095402 +# The above property value applies to 810583 code points not listed here. +# Total code points: 1095401 # ================================================ @@ -1614,6 +1614,7 @@ FF1A ; CS # Po FULLWIDTH COLON 208C ; ON # Sm SUBSCRIPT EQUALS SIGN 208D ; ON # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; ON # Pe SUBSCRIPT RIGHT PARENTHESIS +208F ; ON # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2100..2101 ; ON # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2103..2106 ; ON # So [4] DEGREE CELSIUS..CADA UNA 2108..2109 ; ON # So [2] SCRUPLE..DEGREE FAHRENHEIT @@ -2027,7 +2028,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB94..1FBEF ; ON # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; ON # So ALARM BELL SYMBOL -# Total code points: 6855 +# Total code points: 6856 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 3a10fc1e4..d6656165c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2025-01-27, 18:09:10 GMT +# Date: 2025-02-24, 14:03:25 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -752,6 +752,7 @@ 208A..208C ; 0 # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; 0 # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; 0 # Pe SUBSCRIPT RIGHT PARENTHESIS +208F ; 0 # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090..209C ; 0 # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20C0 ; 0 # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 20DD..20E0 ; 0 # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH @@ -2095,7 +2096,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 816745 code points not listed here. +# The above property value applies to 816744 code points not listed here. # Total code points: 1113143 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index c3d0bb02a..b857feb90 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2025-01-27, 18:09:12 GMT +# Date: 2025-02-24, 14:03:27 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -839,6 +839,7 @@ 208A..208C ; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; N # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; N # Pe SUBSCRIPT RIGHT PARENTHESIS +208F ; N # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090..209C ; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN 20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN @@ -2144,7 +2145,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760566 code points not listed here. +# The above property value applies to 760565 code points not listed here. # Total code points: 792267 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 22b9a85f6..9da07e565 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2025-01-27, 18:09:13 GMT +# Date: 2025-02-24, 14:03:27 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -255,7 +255,6 @@ 1FFF ; Cn # 2065 ; Cn # 2072..2073 ; Cn # [2] .. -208F ; Cn # 209D..209F ; Cn # [3] .. 20C1..20CF ; Cn # [15] .. 20F1..20FF ; Cn # [15] .. @@ -754,7 +753,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 814697 +# Total code points: 814696 # ================================================ @@ -4126,6 +4125,7 @@ FFE5..FFE6 ; Sc # [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN 1FDD..1FDF ; Sk # [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI 1FED..1FEF ; Sk # [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA 1FFD..1FFE ; Sk # [2] GREEK OXIA..GREEK DASIA +208F ; Sk # MODIFIER LETTER HIGH AND LOW VERTICAL LINE 309B..309C ; Sk # [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A700..A716 ; Sk # [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR A720..A721 ; Sk # [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE @@ -4138,7 +4138,7 @@ FF40 ; Sk # FULLWIDTH GRAVE ACCENT FFE3 ; Sk # FULLWIDTH MACRON 1F3FB..1F3FF ; Sk # [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 -# Total code points: 125 +# Total code points: 126 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 010826221..49bea3246 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2025-02-14, 17:30:22 GMT +# Date: 2025-02-24, 14:03:28 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -70,8 +70,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757136 code points not listed here. -# Total code points: 894604 +# The above property value applies to 757135 code points not listed here. +# Total code points: 894603 # ================================================ @@ -2450,6 +2450,7 @@ E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 0FD3 ; BB # Po TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA 1806 ; BB # Pd MONGOLIAN TODO SOFT HYPHEN 1FFD ; BB # Sk GREEK OXIA +208F ; BB # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE A874..A875 ; BB # Po [2] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA DOUBLE HEAD MARK A8FC ; BB # Po DEVANAGARI SIGN SIDDHAM 11175 ; BB # Po MAHAJANI SECTION MARK @@ -2463,7 +2464,7 @@ A8FC ; BB # Po DEVANAGARI SIGN SIDDHAM 11B00..11B09 ; BB # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU 11C70 ; BB # Po MARCHEN HEAD MARK -# Total code points: 55 +# Total code points: 56 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 5876213a1..de3448c6c 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2025-01-27, 18:09:14 GMT +# Date: 2025-02-24, 14:03:28 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -7507,6 +7507,7 @@ 208C ; SUBSCRIPT EQUALS SIGN 208D ; SUBSCRIPT LEFT PARENTHESIS 208E ; SUBSCRIPT RIGHT PARENTHESIS +208F ; MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090 ; LATIN SUBSCRIPT SMALL LETTER A 2091 ; LATIN SUBSCRIPT SMALL LETTER E 2092 ; LATIN SUBSCRIPT SMALL LETTER O @@ -45870,6 +45871,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 159834 +# Total code points: 159835 # EOF From b198c37969a6a5869301e91cb5bc882e8924177a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 24 Feb 2025 15:11:09 +0100 Subject: [PATCH 6/8] Failing test --- .../text/UCD/AdditionComparisons/197.txt | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/197.txt diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/197.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/197.txt new file mode 100644 index 000000000..108c0d9a6 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/197.txt @@ -0,0 +1,21 @@ +# IPA: compound stress mark (208F) +# https://github.com/unicode-org/utc-release-management/issues/197 + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Ignoring Block: +Propertywise [ + \x{02C8} ˈ \N{MODIFIER LETTER VERTICAL LINE} + \x{02CC} ˌ \N{MODIFIER LETTER LOW VERTICAL LINE} + \x{208F} \N{MODIFIER LETTER HIGH AND LOW VERTICAL LINE} +] AreAlike +end Ignoring; + +end Ignoring; + +end Ignoring; \ No newline at end of file From a39c6daa8b0bbb175a186dcd760355ae8eff18ec Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 24 Feb 2025 15:11:31 +0100 Subject: [PATCH 7/8] Change proposed Sk to Lm --- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index ed9f291b5..88f9fee60 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -7532,7 +7532,7 @@ 208C;SUBSCRIPT EQUALS SIGN;Sm;0;ON; 003D;;;;N;;;;; 208D;SUBSCRIPT LEFT PARENTHESIS;Ps;0;ON; 0028;;;;Y;SUBSCRIPT OPENING PARENTHESIS;;;; 208E;SUBSCRIPT RIGHT PARENTHESIS;Pe;0;ON; 0029;;;;Y;SUBSCRIPT CLOSING PARENTHESIS;;;; -208F;MODIFIER LETTER HIGH AND LOW VERTICAL LINE;Sk;0;ON;;;;;N;;;;; +208F;MODIFIER LETTER HIGH AND LOW VERTICAL LINE;Lm;0;ON;;;;;N;;;;; 2090;LATIN SUBSCRIPT SMALL LETTER A;Lm;0;L; 0061;;;;N;;;;; 2091;LATIN SUBSCRIPT SMALL LETTER E;Lm;0;L; 0065;;;;N;;;;; 2092;LATIN SUBSCRIPT SMALL LETTER O;Lm;0;L; 006F;;;;N;;;;; From 05cdd354b6eba9b32bc4dc6bdc33d6cbf84548ef Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Mon, 24 Feb 2025 15:12:49 +0100 Subject: [PATCH 8/8] Regenerate UCD --- .../data/ucd/dev/DerivedCoreProperties.txt | 28 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 5 ++-- unicodetools/data/ucd/dev/LineBreak.txt | 4 +-- unicodetools/data/ucd/dev/PropList.txt | 4 +-- unicodetools/data/ucd/dev/Scripts.txt | 4 +-- .../data/ucd/dev/VerticalOrientation.txt | 5 ++-- .../dev/auxiliary/SentenceBreakProperty.txt | 5 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 4 +-- .../dev/extracted/DerivedCombiningClass.txt | 5 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 5 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 9 +++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 4 +-- 13 files changed, 41 insertions(+), 47 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index a4c185a73..b60c3ef8f 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2025-02-24, 14:03:25 GMT +# Date: 2025-02-24, 14:12:15 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -751,7 +751,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1FF6..1FFC ; Alphabetic # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; Alphabetic # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Alphabetic # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Alphabetic # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; Alphabetic # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 2102 ; Alphabetic # L& DOUBLE-STRUCK CAPITAL C 2107 ; Alphabetic # L& EULER CONSTANT 210A..2113 ; Alphabetic # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -1471,7 +1471,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Alphabetic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 147441 +# Total code points: 147442 # ================================================ @@ -3274,8 +3274,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 2066..206F ; Case_Ignorable # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES 2071 ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER N -208F ; Case_Ignorable # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE -2090..209C ; Case_Ignorable # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; Case_Ignorable # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 20D0..20DC ; Case_Ignorable # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; Case_Ignorable # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; Case_Ignorable # Mn COMBINING LEFT RIGHT ARROW ABOVE @@ -6545,7 +6544,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 1FF6..1FFC ; ID_Start # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; ID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; ID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; ID_Start # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; ID_Start # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 2102 ; ID_Start # L& DOUBLE-STRUCK CAPITAL C 2107 ; ID_Start # L& EULER CONSTANT 210A..2113 ; ID_Start # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -7045,7 +7044,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; ID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 145935 +# Total code points: 145936 # ================================================ @@ -7633,7 +7632,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2054 ; ID_Continue # Pc INVERTED UNDERTIE 2071 ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; ID_Continue # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; ID_Continue # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 20D0..20DC ; ID_Continue # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20E1 ; ID_Continue # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E5..20F0 ; ID_Continue # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE @@ -8485,7 +8484,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..33479 ; ID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149273 +# Total code points: 149274 # ================================================ @@ -8776,7 +8775,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 1FF6..1FFC ; XID_Start # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; XID_Start # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; XID_Start # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 2102 ; XID_Start # L& DOUBLE-STRUCK CAPITAL C 2107 ; XID_Start # L& EULER CONSTANT 210A..2113 ; XID_Start # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -9280,7 +9279,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; XID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 145912 +# Total code points: 145913 # ================================================ @@ -9864,7 +9863,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 2054 ; XID_Continue # Pc INVERTED UNDERTIE 2071 ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; XID_Continue # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; XID_Continue # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 20D0..20DC ; XID_Continue # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20E1 ; XID_Continue # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E5..20F0 ; XID_Continue # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE @@ -10721,7 +10720,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..33479 ; XID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149254 +# Total code points: 149255 # ================================================ @@ -11813,8 +11812,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 208A..208C ; Grapheme_Base # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Grapheme_Base # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Grapheme_Base # Pe SUBSCRIPT RIGHT PARENTHESIS -208F ; Grapheme_Base # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE -2090..209C ; Grapheme_Base # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; Grapheme_Base # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 20A0..20C0 ; Grapheme_Base # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 2100..2101 ; Grapheme_Base # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL C diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 25aaa3b9c..cdaa3ab8e 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2025-02-24, 14:03:30 GMT +# Date: 2025-02-24, 14:12:20 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -973,8 +973,7 @@ 208A..208C ; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; N # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; N # Pe SUBSCRIPT RIGHT PARENTHESIS -208F ; N # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE -2090..209C ; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; N # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN 20A9 ; H # Sc WON SIGN 20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 53ec3ebad..bbb5dad6b 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-17.0.0.txt -# Date: 2025-02-24, 13:56:14 GMT +# Date: 2025-02-24, 14:12:22 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -960,7 +960,7 @@ 208A..208C ; AL # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; OP # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; CL # Pe SUBSCRIPT RIGHT PARENTHESIS -208F ; BB # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE +208F ; BB # Lm MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2090..209C ; AL # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20A6 ; PR # Sc [7] EURO-CURRENCY SIGN..NAIRA SIGN 20A7 ; PO # Sc PESETA SIGN diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index cd1423518..6b08c3a0e 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-17.0.0.txt -# Date: 2025-02-24, 14:03:40 GMT +# Date: 2025-02-24, 14:12:32 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1033,7 +1033,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI 1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA 1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA -208F ; Diacritic # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE +208F ; Diacritic # Lm MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2E2F ; Diacritic # Lm VERTICAL TILDE 302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index f876fa0e5..bf89b6281 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2025-02-24, 14:03:53 GMT +# Date: 2025-02-24, 14:12:44 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -154,7 +154,7 @@ 208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS -208F ; Common # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE +208F ; Common # Lm MODIFIER LETTER HIGH AND LOW VERTICAL LINE 20A0..20C0 ; Common # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; Common # L& DOUBLE-STRUCK CAPITAL C diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 311e5d75e..917f51ce0 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2025-02-24, 14:03:55 GMT +# Date: 2025-02-24, 14:12:46 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -903,8 +903,7 @@ 208A..208C ; R # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; R # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; R # Pe SUBSCRIPT RIGHT PARENTHESIS -208F ; R # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE -2090..209C ; R # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; R # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 20A0..20C0 ; R # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 20D0..20DC ; R # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; U # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 0e5dc2e11..77ab338cd 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-17.0.0.txt -# Date: 2025-01-27, 18:09:39 GMT +# Date: 2025-02-24, 14:12:45 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2226,6 +2226,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1CEE..1CF3 ; OLetter # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA 1CF5..1CF6 ; OLetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1CFA ; OLetter # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +208F ; OLetter # Lm MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2135..2138 ; OLetter # Lo [4] ALEF SYMBOL..DALET SYMBOL 2180..2182 ; OLetter # Nl [3] ROMAN NUMERAL ONE THOUSAND C D..ROMAN NUMERAL TEN THOUSAND 2185..2188 ; OLetter # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND @@ -2622,7 +2623,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; OLetter # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 141520 +# Total code points: 141521 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index e5a5b9937..c5d234d89 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-17.0.0.txt -# Date: 2025-01-27, 18:09:43 GMT +# Date: 2025-02-24, 14:12:47 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -944,7 +944,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 2071 ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER I 207F ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; ALetter # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; ALetter # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C 2107 ; ALetter # L& EULER CONSTANT 210A..2113 ; ALetter # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -1383,7 +1383,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 34004 +# Total code points: 34005 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 7de8f3cd6..47538e59f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2025-02-24, 14:03:24 GMT +# Date: 2025-02-24, 14:12:13 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1614,7 +1614,7 @@ FF1A ; CS # Po FULLWIDTH COLON 208C ; ON # Sm SUBSCRIPT EQUALS SIGN 208D ; ON # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; ON # Pe SUBSCRIPT RIGHT PARENTHESIS -208F ; ON # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE +208F ; ON # Lm MODIFIER LETTER HIGH AND LOW VERTICAL LINE 2100..2101 ; ON # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2103..2106 ; ON # So [4] DEGREE CELSIUS..CADA UNA 2108..2109 ; ON # So [2] SCRUPLE..DEGREE FAHRENHEIT diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index d6656165c..c7a99cd0a 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2025-02-24, 14:03:25 GMT +# Date: 2025-02-24, 14:12:15 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -752,8 +752,7 @@ 208A..208C ; 0 # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; 0 # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; 0 # Pe SUBSCRIPT RIGHT PARENTHESIS -208F ; 0 # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE -2090..209C ; 0 # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; 0 # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 20A0..20C0 ; 0 # Sc [33] EURO-CURRENCY SIGN..SOM SIGN 20DD..20E0 ; 0 # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E2..20E4 ; 0 # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index b857feb90..a41edb855 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2025-02-24, 14:03:27 GMT +# Date: 2025-02-24, 14:12:16 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -839,8 +839,7 @@ 208A..208C ; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; N # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; N # Pe SUBSCRIPT RIGHT PARENTHESIS -208F ; N # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE -2090..209C ; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; N # Lm [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN 20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN 20AD..20C0 ; N # Sc [20] KIP SIGN..SOM SIGN diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 9da07e565..7c931d866 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2025-02-24, 14:03:27 GMT +# Date: 2025-02-24, 14:12:17 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2138,7 +2138,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1D9B..1DBF ; Lm # [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 2071 ; Lm # SUPERSCRIPT LATIN SMALL LETTER I 207F ; Lm # SUPERSCRIPT LATIN SMALL LETTER N -2090..209C ; Lm # [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +208F..209C ; Lm # [14] MODIFIER LETTER HIGH AND LOW VERTICAL LINE..LATIN SUBSCRIPT SMALL LETTER T 2C7C..2C7D ; Lm # [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2D6F ; Lm # TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E2F ; Lm # VERTICAL TILDE @@ -2189,7 +2189,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E6FF ; Lm # TAI YO XAM LAI 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 410 +# Total code points: 411 # ================================================ @@ -4125,7 +4125,6 @@ FFE5..FFE6 ; Sc # [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN 1FDD..1FDF ; Sk # [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI 1FED..1FEF ; Sk # [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA 1FFD..1FFE ; Sk # [2] GREEK OXIA..GREEK DASIA -208F ; Sk # MODIFIER LETTER HIGH AND LOW VERTICAL LINE 309B..309C ; Sk # [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A700..A716 ; Sk # [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR A720..A721 ; Sk # [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE @@ -4138,7 +4137,7 @@ FF40 ; Sk # FULLWIDTH GRAVE ACCENT FFE3 ; Sk # FULLWIDTH MACRON 1F3FB..1F3FF ; Sk # [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 -# Total code points: 126 +# Total code points: 125 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 49bea3246..f22a314c0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2025-02-24, 14:03:28 GMT +# Date: 2025-02-24, 14:12:18 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2450,7 +2450,7 @@ E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 0FD3 ; BB # Po TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA 1806 ; BB # Pd MONGOLIAN TODO SOFT HYPHEN 1FFD ; BB # Sk GREEK OXIA -208F ; BB # Sk MODIFIER LETTER HIGH AND LOW VERTICAL LINE +208F ; BB # Lm MODIFIER LETTER HIGH AND LOW VERTICAL LINE A874..A875 ; BB # Po [2] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA DOUBLE HEAD MARK A8FC ; BB # Po DEVANAGARI SIGN SIDDHAM 11175 ; BB # Po MAHAJANI SECTION MARK