diff --git a/lib/elixir/lib/string.ex b/lib/elixir/lib/string.ex index 6903c74c3c1..ba83abfdada 100644 --- a/lib/elixir/lib/string.ex +++ b/lib/elixir/lib/string.ex @@ -18,7 +18,7 @@ defmodule String do "hello world" The functions in this module act according to - [The Unicode Standard, Version 15.1.0](http://www.unicode.org/versions/Unicode15.1.0/). + [The Unicode Standard, Version 16.0.0](http://www.unicode.org/versions/Unicode16.0.0/). ## Interpolation diff --git a/lib/elixir/unicode/IdentifierType.txt b/lib/elixir/unicode/IdentifierType.txt index 695156e6ad7..145d40c5981 100644 --- a/lib/elixir/unicode/IdentifierType.txt +++ b/lib/elixir/unicode/IdentifierType.txt @@ -1,11 +1,11 @@ # IdentifierType.txt -# Date: 2023-08-11, 17:46:40 GMT -# © 2023 Unicode®, Inc. +# Date: 2024-08-14, 23:39:57 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Security Mechanisms for UTS #39 -# Version: 15.1.0 +# Version: 16.0.0 # # For documentation and usage, see https://www.unicode.org/reports/tr39 # @@ -16,7 +16,7 @@ # # For the purpose of regular expressions, the property Identifier_Type is defined as # mapping each code point to a set of enumerated values. -# The short name of Identifier_Type is the same as the long name. +# The short name of Identifier_Type is ID_Type. # The possible values are: # Not_Character, Deprecated, Default_Ignorable, Not_NFKC, Not_XID, # Exclusion, Obsolete, Technical, Uncommon_Use, Limited_Use, Inclusion, Recommended @@ -712,6 +712,12 @@ ABF0..ABF9 ; Limited_Use # 5.2 [10] MEETEI MAYEK DIGIT Z # Total code points: 5033 +# Identifier_Type: Limited_Use Uncommon_Use + +A9CF ; Limited_Use Uncommon_Use # 5.2 JAVANESE PANGRANGKEP + +# Total code points: 1 + # Identifier_Type: Limited_Use Technical 0740..074A ; Limited_Use Technical # 3.0 [11] SYRIAC FEMININE DOT..SYRIAC BARREKH @@ -720,12 +726,6 @@ ABF0..ABF9 ; Limited_Use # 5.2 [10] MEETEI MAYEK DIGIT Z # Total code points: 21 -# Identifier_Type: Limited_Use Exclusion - -A9CF ; Limited_Use Exclusion # 5.2 JAVANESE PANGRANGKEP - -# Total code points: 1 - # Identifier_Type: Limited_Use Obsolete 07E8..07EA ; Limited_Use Obsolete # 5.0 [3] NKO LETTER JONA JA..NKO LETTER JONA RA @@ -749,9 +749,11 @@ A62A..A62B ; Limited_Use Obsolete # 5.1 [2] VAI SYLLABLE NDOLE M 19DE..19DF ; Limited_Use Not_XID # 4.1 [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV 1AA0..1AA6 ; Limited_Use Not_XID # 5.2 [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA 1AA8..1AAD ; Limited_Use Not_XID # 5.2 [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1B4E..1B4F ; Limited_Use Not_XID # 16.0 [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN 1B5A..1B6A ; Limited_Use Not_XID # 5.0 [17] BALINESE PANTI..BALINESE MUSICAL SYMBOL DANG GEDE 1B74..1B7C ; Limited_Use Not_XID # 5.0 [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING 1B7D..1B7E ; Limited_Use Not_XID # 14.0 [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B7F ; Limited_Use Not_XID # 16.0 BALINESE PANTI BAWAK 1BFC..1BFF ; Limited_Use Not_XID # 6.0 [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT 1C3B..1C3F ; Limited_Use Not_XID # 5.1 [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK 1C7E..1C7F ; Limited_Use Not_XID # 5.1 [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD @@ -787,7 +789,7 @@ ABEB ; Limited_Use Not_XID # 5.2 MEETEI MAYEK CHEIKHE 1E2FF ; Limited_Use Not_XID # 12.0 WANCHO NGUN SIGN 1E95E..1E95F ; Limited_Use Not_XID # 9.0 [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# Total code points: 204 +# Total code points: 207 # Identifier_Type: Uncommon_Use @@ -831,6 +833,7 @@ ABEB ; Limited_Use Not_XID # 5.2 MEETEI MAYEK CHEIKHE 06DF..06E4 ; Uncommon_Use # 1.1 [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA 06E7..06E8 ; Uncommon_Use # 1.1 [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON 06EA..06ED ; Uncommon_Use # 1.1 [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0897 ; Uncommon_Use # 16.0 ARABIC PEPET 0898..089F ; Uncommon_Use # 14.0 [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA 08B3..08B4 ; Uncommon_Use # 8.0 [2] ARABIC LETTER AIN WITH THREE DOTS BELOW..ARABIC LETTER KAF WITH DOT BELOW 08CA..08D2 ; Uncommon_Use # 14.0 [9] ARABIC SMALL HIGH FARSI YEH..ARABIC LARGE ROUND DOT INSIDE CIRCLE BELOW @@ -852,6 +855,7 @@ ABEB ; Limited_Use Not_XID # 5.2 MEETEI MAYEK CHEIKHE 0D62..0D63 ; Uncommon_Use # 5.1 [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL 0F39 ; Uncommon_Use # 2.0 TIBETAN MARK TSA -PHRU 1AC1..1ACE ; Uncommon_Use # 14.0 [14] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING LATIN SMALL LETTER INSULAR T +1C89..1C8A ; Uncommon_Use # 16.0 [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE 2054 ; Uncommon_Use # 4.0 INVERTED UNDERTIE 2C68..2C6C ; Uncommon_Use # 5.0 [5] LATIN SMALL LETTER H WITH DESCENDER..LATIN SMALL LETTER Z WITH DESCENDER A66F ; Uncommon_Use # 5.1 COMBINING CYRILLIC VZMET @@ -860,14 +864,19 @@ A78B..A78C ; Uncommon_Use # 5.1 [2] LATIN CAPITAL LETTER A78F ; Uncommon_Use # 8.0 LATIN LETTER SINOLOGICAL DOT A7B2..A7B7 ; Uncommon_Use # 8.0 [6] LATIN CAPITAL LETTER J WITH CROSSED-TAIL..LATIN SMALL LETTER OMEGA A7B8..A7B9 ; Uncommon_Use # 11.0 [2] LATIN CAPITAL LETTER U WITH STROKE..LATIN SMALL LETTER U WITH STROKE +A7CB..A7CD ; Uncommon_Use # 16.0 [3] LATIN CAPITAL LETTER RAMS HORN..LATIN SMALL LETTER S WITH DIAGONAL STROKE +A7DA..A7DC ; Uncommon_Use # 16.0 [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER LAMBDA WITH STROKE AB60..AB63 ; Uncommon_Use # 8.0 [4] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER UO 10780 ; Uncommon_Use # 14.0 MODIFIER LETTER SMALL CAPITAL AA +10EC2..10EC4 ; Uncommon_Use # 16.0 [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC ; Uncommon_Use # 16.0 ARABIC COMBINING ALEF OVERLAY 10EFD..10EFF ; Uncommon_Use # 15.0 [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +116D0..116E3 ; Uncommon_Use # 16.0 [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 1AFF0..1AFF3 ; Uncommon_Use # 14.0 [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; Uncommon_Use # 14.0 [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 1AFFD..1AFFE ; Uncommon_Use # 14.0 [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 -# Total code points: 313 +# Total code points: 346 # Identifier_Type: Uncommon_Use Technical @@ -1168,6 +1177,7 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE 105A3..105B1 ; Exclusion # 14.0 [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; Exclusion # 14.0 [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; Exclusion # 14.0 [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; Exclusion # 16.0 [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; Exclusion # 7.0 [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; Exclusion # 7.0 [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; Exclusion # 7.0 [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1205,6 +1215,9 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE 10C00..10C48 ; Exclusion # 5.2 [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 10C80..10CB2 ; Exclusion # 8.0 [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; Exclusion # 8.0 [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D40..10D65 ; Exclusion # 16.0 [38] GARAY DIGIT ZERO..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; Exclusion # 16.0 [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F..10D85 ; Exclusion # 16.0 [23] GARAY REDUPLICATION MARK..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; Exclusion # 13.0 [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; Exclusion # 13.0 [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; Exclusion # 13.0 [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE @@ -1257,6 +1270,16 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE 1135D..11363 ; Exclusion # 7.0 [7] GRANTHA SIGN PLUTA..GRANTHA VOWEL SIGN VOCALIC LL 11366..1136C ; Exclusion # 7.0 [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Exclusion # 7.0 [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11380..11389 ; Exclusion # 16.0 [10] TULU-TIGALARI LETTER A..TULU-TIGALARI LETTER VOCALIC LL +1138B ; Exclusion # 16.0 TULU-TIGALARI LETTER EE +1138E ; Exclusion # 16.0 TULU-TIGALARI LETTER AI +11390..113B5 ; Exclusion # 16.0 [38] TULU-TIGALARI LETTER OO..TULU-TIGALARI LETTER LLLA +113B7..113C0 ; Exclusion # 16.0 [10] TULU-TIGALARI SIGN AVAGRAHA..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Exclusion # 16.0 TULU-TIGALARI VOWEL SIGN EE +113C5 ; Exclusion # 16.0 TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Exclusion # 16.0 [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113D3 ; Exclusion # 16.0 [8] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN PLUTA +113E1..113E2 ; Exclusion # 16.0 [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11480..114C5 ; Exclusion # 7.0 [70] TIRHUTA ANJI..TIRHUTA GVANG 114C7 ; Exclusion # 7.0 TIRHUTA OM 114D0..114D9 ; Exclusion # 7.0 [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE @@ -1296,6 +1319,8 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE 11A86..11A99 ; Exclusion # 10.0 [20] SOYOMBO CLUSTER-INITIAL LETTER RA..SOYOMBO SUBJOINER 11A9D ; Exclusion # 11.0 SOYOMBO MARK PLUTA 11AC0..11AF8 ; Exclusion # 7.0 [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; Exclusion # 16.0 [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BF0..11BF9 ; Exclusion # 16.0 [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; Exclusion # 9.0 [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C36 ; Exclusion # 9.0 [45] BHAIKSUKI LETTER E..BHAIKSUKI VOWEL SIGN VOCALIC L 11C38..11C40 ; Exclusion # 9.0 [9] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN AVAGRAHA @@ -1315,6 +1340,7 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE 11F12..11F3A ; Exclusion # 15.0 [41] KAWI LETTER KA..KAWI VOWEL SIGN VOCALIC R 11F3E..11F42 ; Exclusion # 15.0 [5] KAWI VOWEL SIGN E..KAWI CONJOINER 11F50..11F59 ; Exclusion # 15.0 [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11F5A ; Exclusion # 16.0 KAWI SIGN NUKTA 12000..1236E ; Exclusion # 5.0 [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 1236F..12398 ; Exclusion # 7.0 [42] CUNEIFORM SIGN KAP ELAMITE..CUNEIFORM SIGN UM TIMES ME 12399 ; Exclusion # 8.0 CUNEIFORM SIGN U U @@ -1325,7 +1351,9 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE 13000..1342E ; Exclusion # 5.2 [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 1342F ; Exclusion # 15.0 EGYPTIAN HIEROGLYPH V011D 13440..13455 ; Exclusion # 15.0 [22] EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +13460..143FA ; Exclusion # 16.0 [3995] EGYPTIAN HIEROGLYPH-13460..EGYPTIAN HIEROGLYPH-143FA 14400..14646 ; Exclusion # 8.0 [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16100..16139 ; Exclusion # 16.0 [58] GURUNG KHEMA LETTER A..GURUNG KHEMA DIGIT NINE 16A70..16ABE ; Exclusion # 14.0 [79] TANGSA LETTER OZ..TANGSA LETTER ZA 16AC0..16AC9 ; Exclusion # 14.0 [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE 16AD0..16AED ; Exclusion # 7.0 [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I @@ -1335,6 +1363,8 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE 16B50..16B59 ; Exclusion # 7.0 [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE 16B63..16B77 ; Exclusion # 7.0 [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS 16B7D..16B8F ; Exclusion # 7.0 [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16D40..16D6C ; Exclusion # 16.0 [45] KIRAT RAI SIGN ANUSVARA..KIRAT RAI SIGN SAAT +16D70..16D79 ; Exclusion # 16.0 [10] KIRAT RAI DIGIT ZERO..KIRAT RAI DIGIT NINE 16E40..16E7F ; Exclusion # 11.0 [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 16FE0 ; Exclusion # 9.0 TANGUT ITERATION MARK 16FE1 ; Exclusion # 10.0 NUSHU ITERATION MARK @@ -1344,6 +1374,7 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE 187F2..187F7 ; Exclusion # 12.0 [6] TANGUT IDEOGRAPH-187F2..TANGUT IDEOGRAPH-187F7 18800..18AF2 ; Exclusion # 9.0 [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755 18AF3..18CD5 ; Exclusion # 13.0 [483] TANGUT COMPONENT-756..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18CFF ; Exclusion # 16.0 KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D08 ; Exclusion # 13.0 [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 1B170..1B2FB ; Exclusion # 10.0 [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 1BC00..1BC6A ; Exclusion # 7.0 [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -1364,21 +1395,25 @@ A930..A953 ; Exclusion # 5.1 [36] REJANG LETTER KA..RE 1E026..1E02A ; Exclusion # 9.0 [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA 1E290..1E2AE ; Exclusion # 14.0 [31] TOTO LETTER PA..TOTO SIGN RISING TONE 1E4D0..1E4F9 ; Exclusion # 15.0 [42] NAG MUNDARI LETTER O..NAG MUNDARI DIGIT NINE +1E5D0..1E5FA ; Exclusion # 16.0 [43] OL ONAL LETTER O..OL ONAL DIGIT NINE 1E800..1E8C4 ; Exclusion # 7.0 [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON 1E8D0..1E8D6 ; Exclusion # 7.0 [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -# Total code points: 16071 +# Total code points: 20461 # Identifier_Type: Exclusion Not_XID 0830..083E ; Exclusion Not_XID # 5.2 [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU 1680 ; Exclusion Not_XID # 3.0 OGHAM SPACE MARK 169B..169C ; Exclusion Not_XID # 3.0 [2] OGHAM FEATHER MARK..OGHAM REVERSED FEATHER MARK +16EB..16ED ; Exclusion Not_XID # 3.0 [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION 1735..1736 ; Exclusion Not_XID # 3.2 [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION 1800..180A ; Exclusion Not_XID # 3.0 [11] MONGOLIAN BIRGA..MONGOLIAN NIRUGU 1A1E..1A1F ; Exclusion Not_XID # 4.1 [2] BUGINESE PALLAWA..BUGINESE END OF SECTION 2CE5..2CEA ; Exclusion Not_XID # 4.1 [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CF9..2CFF ; Exclusion Not_XID # 4.1 [7] COPTIC OLD NUBIAN FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2E30 ; Exclusion Not_XID # 5.1 RING POINT +2E3C ; Exclusion Not_XID # 7.0 STENOGRAPHIC FULL STOP A874..A877 ; Exclusion Not_XID # 5.0 [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD A95F ; Exclusion Not_XID # 5.1 REJANG SECTION MARK 10100..10102 ; Exclusion Not_XID # 4.0 [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK @@ -1412,6 +1447,8 @@ A95F ; Exclusion Not_XID # 5.1 REJANG SECTION MARK 10B99..10B9C ; Exclusion Not_XID # 7.0 [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT 10BA9..10BAF ; Exclusion Not_XID # 7.0 [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED 10CFA..10CFF ; Exclusion Not_XID # 8.0 [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D6E ; Exclusion Not_XID # 16.0 GARAY HYPHEN +10D8E..10D8F ; Exclusion Not_XID # 16.0 [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10EAD ; Exclusion Not_XID # 13.0 YEZIDI HYPHENATION MARK 10F1D..10F26 ; Exclusion Not_XID # 11.0 [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F51..10F59 ; Exclusion Not_XID # 11.0 [9] SOGDIAN NUMBER ONE..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -1430,6 +1467,8 @@ A95F ; Exclusion Not_XID # 5.1 REJANG SECTION MARK 111DD..111DF ; Exclusion Not_XID # 8.0 [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 11238..1123D ; Exclusion Not_XID # 7.0 [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN 112A9 ; Exclusion Not_XID # 8.0 MULTANI SECTION MARK +113D4..113D5 ; Exclusion Not_XID # 16.0 [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA +113D7..113D8 ; Exclusion Not_XID # 16.0 [2] TULU-TIGALARI SIGN OM PUSHPIKA..TULU-TIGALARI SIGN SHRII PUSHPIKA 114C6 ; Exclusion Not_XID # 7.0 TIRHUTA ABBREVIATION SIGN 115C1..115C9 ; Exclusion Not_XID # 7.0 [9] SIDDHAM SIGN SIDDHAM..SIDDHAM END OF TEXT MARK 115CA..115D7 ; Exclusion Not_XID # 8.0 [14] SIDDHAM SECTION MARK WITH TRIDENT AND U-SHAPED ORNAMENTS..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES @@ -1444,6 +1483,7 @@ A95F ; Exclusion Not_XID # 5.1 REJANG SECTION MARK 11A3F..11A46 ; Exclusion Not_XID # 10.0 [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK 11A9A..11A9C ; Exclusion Not_XID # 10.0 [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD 11A9E..11AA2 ; Exclusion Not_XID # 10.0 [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11BE1 ; Exclusion Not_XID # 16.0 SUNUWAR SIGN PVO 11C41..11C45 ; Exclusion Not_XID # 9.0 [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 11C5A..11C6C ; Exclusion Not_XID # 9.0 [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK 11C70..11C71 ; Exclusion Not_XID # 9.0 [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD @@ -1459,6 +1499,7 @@ A95F ; Exclusion Not_XID # 5.1 REJANG SECTION MARK 16B37..16B3F ; Exclusion Not_XID # 7.0 [9] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN XYEEM FAIB 16B44..16B45 ; Exclusion Not_XID # 7.0 [2] PAHAWH HMONG SIGN XAUS..PAHAWH HMONG SIGN CIM TSOV ROG 16B5B..16B61 ; Exclusion Not_XID # 7.0 [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16D6D..16D6F ; Exclusion Not_XID # 16.0 [3] KIRAT RAI SIGN YUPI..KIRAT RAI DOUBLE DANDA 16E80..16E9A ; Exclusion Not_XID # 11.0 [27] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN EXCLAMATION OH 1BC9C ; Exclusion Not_XID # 7.0 DUPLOYAN SIGN O WITH CROSS 1BC9F ; Exclusion Not_XID # 7.0 DUPLOYAN PUNCTUATION CHINOOK FULL STOP @@ -1467,9 +1508,10 @@ A95F ; Exclusion Not_XID # 5.1 REJANG SECTION MARK 1DA6D..1DA74 ; Exclusion Not_XID # 8.0 [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING 1DA76..1DA83 ; Exclusion Not_XID # 8.0 [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH 1DA85..1DA8B ; Exclusion Not_XID # 8.0 [7] SIGNWRITING LOCATION TORSO..SIGNWRITING PARENTHESIS +1E5FF ; Exclusion Not_XID # 16.0 OL ONAL ABBREVIATION SIGN 1E8C7..1E8CF ; Exclusion Not_XID # 7.0 [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE -# Total code points: 1125 +# Total code points: 1142 # Identifier_Type: Obsolete @@ -1593,7 +1635,7 @@ D7CB..D7FB ; Obsolete # 5.2 [49] HANGUL JONGSEONG NIE 2127 ; Obsolete Not_XID # 1.1 INVERTED OHM SIGN 214F ; Obsolete Not_XID # 5.1 SYMBOL FOR SAMARITAN SOURCE 2E0E..2E16 ; Obsolete Not_XID # 4.1 [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE -2E2A..2E30 ; Obsolete Not_XID # 5.1 [7] TWO DOTS OVER ONE DOT PUNCTUATION..RING POINT +2E2A..2E2F ; Obsolete Not_XID # 5.1 [6] TWO DOTS OVER ONE DOT PUNCTUATION..VERTICAL TILDE 2E31 ; Obsolete Not_XID # 5.2 WORD SEPARATOR MIDDLE DOT 2E32 ; Obsolete Not_XID # 6.1 TURNED COMMA 2E35 ; Obsolete Not_XID # 6.1 TURNED SEMICOLON @@ -1607,7 +1649,7 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH 1D200..1D241 ; Obsolete Not_XID # 4.1 [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; Obsolete Not_XID # 4.1 GREEK MUSICAL LEIMMA -# Total code points: 191 +# Total code points: 190 # Identifier_Type: Not_XID @@ -1712,7 +1754,6 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH 1361..1368 ; Not_XID # 3.0 [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR 1372..137C ; Not_XID # 3.0 [11] ETHIOPIC NUMBER TEN..ETHIOPIC NUMBER TEN THOUSAND 1390..1399 ; Not_XID # 4.1 [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT -16EB..16ED ; Not_XID # 3.0 [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION 17D4..17D6 ; Not_XID # 3.0 [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH 17D9..17DB ; Not_XID # 3.0 [3] KHMER SIGN PHNAEK MUAN..KHMER CURRENCY SYMBOL RIEL 17F0..17F9 ; Not_XID # 4.0 [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON @@ -1783,6 +1824,7 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH 23FF ; Not_XID # 10.0 OBSERVER EYE SYMBOL 2400..2424 ; Not_XID # 1.1 [37] SYMBOL FOR NULL..SYMBOL FOR NEWLINE 2425..2426 ; Not_XID # 3.0 [2] SYMBOL FOR DELETE FORM TWO..SYMBOL FOR SUBSTITUTE FORM TWO +2427..2429 ; Not_XID # 16.0 [3] SYMBOL FOR DELETE SQUARE CHECKER BOARD FORM..SYMBOL FOR DELETE MEDIUM SHADE FORM 2440..244A ; Not_XID # 1.1 [11] OCR HOOK..OCR DOUBLE BACKSLASH 2500..2595 ; Not_XID # 1.1 [150] BOX DRAWINGS LIGHT HORIZONTAL..RIGHT ONE EIGHTH BLOCK 2596..259F ; Not_XID # 3.2 [10] QUADRANT LOWER LEFT..QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT @@ -1882,7 +1924,7 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH 2E33..2E34 ; Not_XID # 6.1 [2] RAISED DOT..RAISED COMMA 2E36..2E38 ; Not_XID # 6.1 [3] DAGGER WITH LEFT GUARD..TURNED DAGGER 2E3A..2E3B ; Not_XID # 6.1 [2] TWO-EM DASH..THREE-EM DASH -2E3C..2E42 ; Not_XID # 7.0 [7] STENOGRAPHIC FULL STOP..DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E3D..2E42 ; Not_XID # 7.0 [6] VERTICAL SIX DOTS..DOUBLE LOW-REVERSED-9 QUOTATION MARK 2E43..2E44 ; Not_XID # 9.0 [2] DASH WITH LEFT UPTURN..DOUBLE SUSPENSION MARK 2E45..2E49 ; Not_XID # 10.0 [5] INVERTED LOW KAVYKA..DOUBLE STACKED COMMA 2E4A..2E4E ; Not_XID # 11.0 [5] DOTTED SOLIDUS..PUNCTUS ELEVATUS MARK @@ -1905,6 +1947,7 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH 3190..3191 ; Not_XID # 1.1 [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK 31C0..31CF ; Not_XID # 4.1 [16] CJK STROKE T..CJK STROKE N 31D0..31E3 ; Not_XID # 5.1 [20] CJK STROKE H..CJK STROKE Q +31E4..31E5 ; Not_XID # 16.0 [2] CJK STROKE HXG..CJK STROKE SZP 31EF ; Not_XID # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3248..324F ; Not_XID # 5.2 [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE A67E ; Not_XID # 5.1 CYRILLIC KAVYKA @@ -1930,6 +1973,8 @@ FFFD ; Not_XID # 1.1 REPLACEMENT CHARACTE 11FC0..11FF1 ; Not_XID # 12.0 [50] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL SIGN VAKAIYARAA 11FFF ; Not_XID # 12.0 TAMIL PUNCTUATION END OF TEXT 16FE2 ; Not_XID # 12.0 OLD CHINESE HOOK MARK +1CC00..1CCD5 ; Not_XID # 16.0 [214] UP-POINTING GO-KART..LOWER RIGHT QUADRANT STANDING KNIGHT +1CD00..1CEB3 ; Not_XID # 16.0 [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET 1D2C0..1D2D3 ; Not_XID # 15.0 [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; Not_XID # 11.0 [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D360..1D371 ; Not_XID # 5.0 [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE @@ -2064,6 +2109,8 @@ FFFD ; Not_XID # 1.1 REPLACEMENT CHARACTE 1F860..1F887 ; Not_XID # 7.0 [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; Not_XID # 7.0 [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0..1F8B1 ; Not_XID # 13.0 [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B2..1F8BB ; Not_XID # 16.0 [10] RIGHTWARDS ARROW WITH LOWER HOOK..SOUTH WEST ARROW FROM BAR +1F8C0..1F8C1 ; Not_XID # 16.0 [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1F90B ; Not_XID # 10.0 [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F90C ; Not_XID # 13.0 PINCHED FINGERS 1F90D..1F90F ; Not_XID # 12.0 [3] WHITE HEART..PINCHING HAND @@ -2118,6 +2165,8 @@ FFFD ; Not_XID # 1.1 REPLACEMENT CHARACTE 1FA80..1FA82 ; Not_XID # 12.0 [3] YO-YO..PARACHUTE 1FA83..1FA86 ; Not_XID # 13.0 [4] BOOMERANG..NESTING DOLLS 1FA87..1FA88 ; Not_XID # 15.0 [2] MARACAS..FLUTE +1FA89 ; Not_XID # 16.0 HARP +1FA8F ; Not_XID # 16.0 SHOVEL 1FA90..1FA95 ; Not_XID # 12.0 [6] RINGED PLANET..BANJO 1FA96..1FAA8 ; Not_XID # 13.0 [19] MILITARY HELMET..ROCK 1FAA9..1FAAC ; Not_XID # 14.0 [4] MIRROR BALL..HAMSA @@ -2125,21 +2174,27 @@ FFFD ; Not_XID # 1.1 REPLACEMENT CHARACTE 1FAB0..1FAB6 ; Not_XID # 13.0 [7] FLY..FEATHER 1FAB7..1FABA ; Not_XID # 14.0 [4] LOTUS..NEST WITH EGGS 1FABB..1FABD ; Not_XID # 15.0 [3] HYACINTH..WING +1FABE ; Not_XID # 16.0 LEAFLESS TREE 1FABF ; Not_XID # 15.0 GOOSE 1FAC0..1FAC2 ; Not_XID # 13.0 [3] ANATOMICAL HEART..PEOPLE HUGGING 1FAC3..1FAC5 ; Not_XID # 14.0 [3] PREGNANT MAN..PERSON WITH CROWN +1FAC6 ; Not_XID # 16.0 FINGERPRINT 1FACE..1FACF ; Not_XID # 15.0 [2] MOOSE..DONKEY 1FAD0..1FAD6 ; Not_XID # 13.0 [7] BLUEBERRIES..TEAPOT 1FAD7..1FAD9 ; Not_XID # 14.0 [3] POURING LIQUID..JAR 1FADA..1FADB ; Not_XID # 15.0 [2] GINGER ROOT..PEA POD +1FADC ; Not_XID # 16.0 ROOT VEGETABLE +1FADF ; Not_XID # 16.0 SPLATTER 1FAE0..1FAE7 ; Not_XID # 14.0 [8] MELTING FACE..BUBBLES 1FAE8 ; Not_XID # 15.0 SHAKING FACE +1FAE9 ; Not_XID # 16.0 FACE WITH BAGS UNDER EYES 1FAF0..1FAF6 ; Not_XID # 14.0 [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS 1FAF7..1FAF8 ; Not_XID # 15.0 [2] LEFTWARDS PUSHING HAND..RIGHTWARDS PUSHING HAND 1FB00..1FB92 ; Not_XID # 13.0 [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; Not_XID # 13.0 [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON +1FBCB..1FBEF ; Not_XID # 16.0 [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5704 +# Total code points: 6411 # Identifier_Type: Not_NFKC @@ -2363,6 +2418,7 @@ FFE8..FFEE ; Not_NFKC # 1.1 [7] HALFWIDTH FORMS LIGH 10781..10785 ; Not_NFKC # 14.0 [5] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Not_NFKC # 14.0 [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Not_NFKC # 14.0 [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1CCD6..1CCF9 ; Not_NFKC # 16.0 [36] OUTLINED LATIN CAPITAL LETTER A..OUTLINED DIGIT NINE 1D15E..1D164 ; Not_NFKC # 3.1 [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Not_NFKC # 3.1 [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 1D400..1D454 ; Not_NFKC # 3.1 [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G @@ -2452,7 +2508,7 @@ FFE8..FFEE ; Not_NFKC # 1.1 [7] HALFWIDTH FORMS LIGH 1FBF0..1FBF9 ; Not_NFKC # 13.0 [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE 2F800..2FA1D ; Not_NFKC # 3.1 [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 4921 +# Total code points: 4957 # Identifier_Type: Default_Ignorable diff --git a/lib/elixir/unicode/PropList.txt b/lib/elixir/unicode/PropList.txt index 777e8a28818..fae2831e7a5 100644 --- a/lib/elixir/unicode/PropList.txt +++ b/lib/elixir/unicode/PropList.txt @@ -1,8 +1,8 @@ -# PropList-15.1.0.txt -# Date: 2023-08-01, 21:56:53 GMT -# © 2023 Unicode®, Inc. +# PropList-16.0.0.txt +# Date: 2024-05-31, 18:09:48 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -62,9 +62,10 @@ FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTA FE58 ; Dash # Pd SMALL EM DASH FE63 ; Dash # Pd SMALL HYPHEN-MINUS FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS +10D6E ; Dash # Pd GARAY HYPHEN 10EAD ; Dash # Pd YEZIDI HYPHENATION MARK -# Total code points: 30 +# Total code points: 31 # ================================================ @@ -132,7 +133,8 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET 0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION 070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS 07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK -0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +0830..0835 ; Terminal_Punctuation # Po [6] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION SHIYYAALAA +0837..083E ; Terminal_Punctuation # Po [8] SAMARITAN PUNCTUATION MELODIC QITSA..SAMARITAN PUNCTUATION ANNAAU 085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION 0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA 0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT @@ -149,13 +151,16 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET 1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP 1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK 1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B4E..1B4F ; Terminal_Punctuation # Po [2] BALINESE INVERTED CARIK SIKI..BALINESE INVERTED CARIK PAREREN 1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA 1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN -1B7D..1B7E ; Terminal_Punctuation # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B7D..1B7F ; Terminal_Punctuation # Po [3] BALINESE PANTI LANTANG..BALINESE PANTI BAWAK 1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK 1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +2024 ; Terminal_Punctuation # Po ONE DOT LEADER 203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG 2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2CF9..2CFB ; Terminal_Punctuation # Po [3] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN INDIRECT QUESTION MARK 2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK 2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP 2E41 ; Terminal_Punctuation # Po REVERSED COMMA @@ -174,6 +179,8 @@ AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUN AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI +FE12 ; Terminal_Punctuation # Po PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP +FE15..FE16 ; Terminal_Punctuation # Po [2] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK @@ -201,6 +208,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK 112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK +113D4..113D5 ; Terminal_Punctuation # Po [2] TULU-TIGALARI DANDA..TULU-TIGALARI DOUBLE DANDA 1144B..1144D ; Terminal_Punctuation # Po [3] NEWA DANDA..NEWA COMMA 1145A..1145B ; Terminal_Punctuation # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK 115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR @@ -221,11 +229,12 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP 16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM 16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS +16D6E..16D6F ; Terminal_Punctuation # Po [2] KIRAT RAI DANDA..KIRAT RAI DOUBLE DANDA 16E97..16E98 ; Terminal_Punctuation # Po [2] MEDEFAIDRIN COMMA..MEDEFAIDRIN FULL STOP 1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 278 +# Total code points: 291 # ================================================ @@ -430,6 +439,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L # ================================================ 0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +0363..036F ; Other_Alphabetic # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X 05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG 05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE 05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT @@ -450,6 +460,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A 0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +0897 ; Other_Alphabetic # Mn ARABIC PEPET 08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA 08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN 08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA @@ -634,7 +645,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T 1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1C36 ; Other_Alphabetic # Mn LEPCHA SIGN RAN -1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +1DD3..1DF4 ; Other_Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS 24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA @@ -689,7 +700,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O 10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA 10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69 ; Other_Alphabetic # Mn GARAY VOWEL SIGN E 10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC ; Other_Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU 11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA 11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA @@ -732,6 +745,12 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU 11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK 11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +113B8..113BA ; Other_Alphabetic # Mc [3] TULU-TIGALARI VOWEL SIGN AA..TULU-TIGALARI VOWEL SIGN II +113BB..113C0 ; Other_Alphabetic # Mn [6] TULU-TIGALARI VOWEL SIGN U..TULU-TIGALARI VOWEL SIGN VOCALIC LL +113C2 ; Other_Alphabetic # Mc TULU-TIGALARI VOWEL SIGN EE +113C5 ; Other_Alphabetic # Mc TULU-TIGALARI VOWEL SIGN AI +113C7..113CA ; Other_Alphabetic # Mc [4] TULU-TIGALARI VOWEL SIGN OO..TULU-TIGALARI SIGN CANDRA ANUNASIKA +113CC..113CD ; Other_Alphabetic # Mc [2] TULU-TIGALARI SIGN ANUSVARA..TULU-TIGALARI SIGN VISARGA 11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II 11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI 11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU @@ -761,7 +780,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA 116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II 116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU -1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +1171D ; Other_Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LA +1171E ; Other_Alphabetic # Mc AHOM CONSONANT SIGN MEDIAL RA +1171F ; Other_Alphabetic # Mn AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU 11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E @@ -818,6 +839,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 11F36..11F3A ; Other_Alphabetic # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R 11F3E..11F3F ; Other_Alphabetic # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 11F40 ; Other_Alphabetic # Mn KAWI VOWEL SIGN EU +1611E..16129 ; Other_Alphabetic # Mn [12] GURUNG KHEMA VOWEL SIGN AA..GURUNG KHEMA VOWEL LENGTH MARK +1612A..1612C ; Other_Alphabetic # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA +1612D..1612E ; Other_Alphabetic # Mn [2] GURUNG KHEMA SIGN ANUSVARA..GURUNG KHEMA CONSONANT SIGN MEDIAL RA 16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR 16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI 16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW @@ -834,7 +858,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1425 +# Total code points: 1495 # ================================================ @@ -849,7 +873,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER 17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08 ; Ideographic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +18CFF..18D08 ; Ideographic # Lo [10] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D08 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB 20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF 2A700..2B739 ; Ideographic # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 @@ -861,7 +885,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 106476 +# Total code points: 106477 # ================================================ @@ -932,6 +956,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA 0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA 0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA +0E3A ; Diacritic # Mn THAI CHARACTER PHINTHU 0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT 0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN 0EBA ; Diacritic # Mn LAO SIGN PALI VIRAMA @@ -955,9 +980,11 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 135D..135F ; Diacritic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK 1714 ; Diacritic # Mn TAGALOG SIGN VIRAMA 1715 ; Diacritic # Mc TAGALOG SIGN PAMUDPOD +1734 ; Diacritic # Mc HANUNOO SIGN PAMUDPOD 17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT 17DD ; Diacritic # Mn KHMER SIGN ATTHACAN 1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A60 ; Diacritic # Mn TAI THAM SIGN SAKOT 1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN 1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT 1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW @@ -968,6 +995,8 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG 1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH 1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA +1BE6 ; Diacritic # Mn BATAK SIGN TOMPI +1BF2..1BF3 ; Diacritic # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN 1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA 1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -1006,6 +1035,8 @@ A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIF A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Diacritic # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A806 ; Diacritic # Mn SYLOTI NAGRI SIGN HASANTA +A82C ; Diacritic # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU @@ -1039,9 +1070,13 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 10780..10785 ; Diacritic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Diacritic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Diacritic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10A38..10A3A ; Diacritic # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Diacritic # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4E ; Diacritic # Lm GARAY VOWEL LENGTH MARK +10D69..10D6D ; Diacritic # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -1055,10 +1090,16 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA 11236 ; Diacritic # Mn KHOJKI SIGN NUKTA 112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA -1133C ; Diacritic # Mn GRANTHA SIGN NUKTA +1133B..1133C ; Diacritic # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA 1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA 11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX 11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +113CE ; Diacritic # Mn TULU-TIGALARI SIGN VIRAMA +113CF ; Diacritic # Mc TULU-TIGALARI SIGN LOOPED VIRAMA +113D0 ; Diacritic # Mn TULU-TIGALARI CONJOINER +113D2 ; Diacritic # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; Diacritic # Lo TULU-TIGALARI SIGN PLUTA +113E1..113E2 ; Diacritic # Mn [2] TULU-TIGALARI VEDIC TONE SVARITA..TULU-TIGALARI VEDIC TONE ANUDATTA 11442 ; Diacritic # Mn NEWA SIGN VIRAMA 11446 ; Diacritic # Mn NEWA SIGN NUKTA 114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA @@ -1079,9 +1120,14 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA 11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA 11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +11F41 ; Diacritic # Mc KAWI SIGN KILLER +11F42 ; Diacritic # Mn KAWI CONJOINER +11F5A ; Diacritic # Mn KAWI SIGN NUKTA 13447..13455 ; Diacritic # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +1612F ; Diacritic # Mn GURUNG KHEMA SIGN THOLHOMA 16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE 16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16D6B..16D6C ; Diacritic # Lm [2] KIRAT RAI SIGN VIRAMA..KIRAT RAI SIGN SAAT 16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FF0..16FF1 ; Diacritic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY @@ -1099,11 +1145,12 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE 1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E5EE..1E5EF ; Diacritic # Mn [2] OL ONAL SIGN MU..OL ONAL SIGN IKIR 1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1144 +# Total code points: 1178 # ================================================ @@ -1111,6 +1158,8 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON 0640 ; Extender # Lm ARABIC TATWEEL 07FA ; Extender # Lm NKO LAJANYALAN +0A71 ; Extender # Mn GURMUKHI ADDAK +0AFB ; Extender # Mn GUJARATI SIGN SHADDA 0B55 ; Extender # Mn ORIYA SIGN OVERLINE 0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK 0EC6 ; Extender # Lm LAO KO LA @@ -1132,16 +1181,23 @@ AADD ; Extender # Lm TAI VIET SYMBOL SAM AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 10781..10782 ; Extender # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON +10D4E ; Extender # Lm GARAY VOWEL LENGTH MARK +10D6A ; Extender # Mn GARAY CONSONANT GEMINATION MARK +10D6F ; Extender # Lm GARAY REDUPLICATION MARK +11237 ; Extender # Mn KHOJKI SIGN SHADDA 1135D ; Extender # Lo GRANTHA SIGN PLUTA +113D2 ; Extender # Mn TULU-TIGALARI GEMINATION MARK +113D3 ; Extender # Lo TULU-TIGALARI SIGN PLUTA 115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 11A98 ; Extender # Mn SOYOMBO GEMINATION MARK 16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM 16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK 16FE3 ; Extender # Lm OLD CHINESE ITERATION MARK 1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E5EF ; Extender # Mn OL ONAL SIGN IKIR 1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -# Total code points: 50 +# Total code points: 59 # ================================================ @@ -1217,27 +1273,51 @@ FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] .... +23E2..2429 ; Pattern_Syntax # So [72] WHITE TRAPEZIUM..SYMBOL FOR DELETE MEDIUM SHADE FORM +242A..243F ; Pattern_Syntax # Cn [22] .. 2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH 244B..245F ; Pattern_Syntax # Cn [21] .. 2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE @@ -1824,4 +1911,18 @@ FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT # Total code points: 26 +# ================================================ + +0654..0655 ; Modifier_Combining_Mark # Mn [2] ARABIC HAMZA ABOVE..ARABIC HAMZA BELOW +0658 ; Modifier_Combining_Mark # Mn ARABIC MARK NOON GHUNNA +06DC ; Modifier_Combining_Mark # Mn ARABIC SMALL HIGH SEEN +06E3 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW SEEN +06E7..06E8 ; Modifier_Combining_Mark # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +08CA..08CB ; Modifier_Combining_Mark # Mn [2] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW +08CD..08CF ; Modifier_Combining_Mark # Mn [3] ARABIC SMALL HIGH ZAH..ARABIC LARGE ROUND DOT BELOW +08D3 ; Modifier_Combining_Mark # Mn ARABIC SMALL LOW WAW +08F3 ; Modifier_Combining_Mark # Mn ARABIC SMALL HIGH WAW + +# Total code points: 14 + # EOF diff --git a/lib/elixir/unicode/PropertyValueAliases.txt b/lib/elixir/unicode/PropertyValueAliases.txt index 240cd28c4cc..01c6f659a4f 100644 --- a/lib/elixir/unicode/PropertyValueAliases.txt +++ b/lib/elixir/unicode/PropertyValueAliases.txt @@ -1,8 +1,8 @@ -# PropertyValueAliases-15.1.0.txt -# Date: 2023-08-07, 15:21:34 GMT -# © 2023 Unicode®, Inc. +# PropertyValueAliases-16.0.0.txt +# Date: 2024-07-30, 19:59:00 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -92,6 +92,7 @@ age; 13.0 ; V13_0 age; 14.0 ; V14_0 age; 15.0 ; V15_0 age; 15.1 ; V15_1 +age; 16.0 ; V16_0 age; NA ; Unassigned # Alphabetic (Alpha) @@ -245,6 +246,7 @@ blk; Duployan ; Duployan blk; Early_Dynastic_Cuneiform ; Early_Dynastic_Cuneiform blk; Egyptian_Hieroglyph_Format_Controls; Egyptian_Hieroglyph_Format_Controls blk; Egyptian_Hieroglyphs ; Egyptian_Hieroglyphs +blk; Egyptian_Hieroglyphs_Ext_A ; Egyptian_Hieroglyphs_Extended_A blk; Elbasan ; Elbasan blk; Elymaic ; Elymaic blk; Emoticons ; Emoticons @@ -257,6 +259,7 @@ blk; Ethiopic_Ext ; Ethiopic_Extended blk; Ethiopic_Ext_A ; Ethiopic_Extended_A blk; Ethiopic_Ext_B ; Ethiopic_Extended_B blk; Ethiopic_Sup ; Ethiopic_Supplement +blk; Garay ; Garay blk; Geometric_Shapes ; Geometric_Shapes blk; Geometric_Shapes_Ext ; Geometric_Shapes_Extended blk; Georgian ; Georgian @@ -271,6 +274,7 @@ blk; Greek_Ext ; Greek_Extended blk; Gujarati ; Gujarati blk; Gunjala_Gondi ; Gunjala_Gondi blk; Gurmukhi ; Gurmukhi +blk; Gurung_Khema ; Gurung_Khema blk; Half_And_Full_Forms ; Halfwidth_And_Fullwidth_Forms blk; Half_Marks ; Combining_Half_Marks blk; Hangul ; Hangul_Syllables @@ -311,6 +315,7 @@ blk; Khmer ; Khmer blk; Khmer_Symbols ; Khmer_Symbols blk; Khojki ; Khojki blk; Khudawadi ; Khudawadi +blk; Kirat_Rai ; Kirat_Rai blk; Lao ; Lao blk; Latin_1_Sup ; Latin_1_Supplement ; Latin_1 blk; Latin_Ext_A ; Latin_Extended_A @@ -367,6 +372,7 @@ blk; Music ; Musical_Symbols blk; Myanmar ; Myanmar blk; Myanmar_Ext_A ; Myanmar_Extended_A blk; Myanmar_Ext_B ; Myanmar_Extended_B +blk; Myanmar_Ext_C ; Myanmar_Extended_C blk; Nabataean ; Nabataean blk; Nag_Mundari ; Nag_Mundari blk; Nandinagari ; Nandinagari @@ -380,6 +386,7 @@ blk; Nyiakeng_Puachue_Hmong ; Nyiakeng_Puachue_Hmong blk; OCR ; Optical_Character_Recognition blk; Ogham ; Ogham blk; Ol_Chiki ; Ol_Chiki +blk; Ol_Onal ; Ol_Onal blk; Old_Hungarian ; Old_Hungarian blk; Old_Italic ; Old_Italic blk; Old_North_Arabian ; Old_North_Arabian @@ -425,6 +432,7 @@ blk; Soyombo ; Soyombo blk; Specials ; Specials blk; Sundanese ; Sundanese blk; Sundanese_Sup ; Sundanese_Supplement +blk; Sunuwar ; Sunuwar blk; Sup_Arrows_A ; Supplemental_Arrows_A blk; Sup_Arrows_B ; Supplemental_Arrows_B blk; Sup_Arrows_C ; Supplemental_Arrows_C @@ -438,6 +446,7 @@ blk; Sutton_SignWriting ; Sutton_SignWriting blk; Syloti_Nagri ; Syloti_Nagri blk; Symbols_And_Pictographs_Ext_A ; Symbols_And_Pictographs_Extended_A blk; Symbols_For_Legacy_Computing ; Symbols_For_Legacy_Computing +blk; Symbols_For_Legacy_Computing_Sup ; Symbols_For_Legacy_Computing_Supplement blk; Syriac ; Syriac blk; Syriac_Sup ; Syriac_Supplement blk; Tagalog ; Tagalog @@ -460,8 +469,10 @@ blk; Thai ; Thai blk; Tibetan ; Tibetan blk; Tifinagh ; Tifinagh blk; Tirhuta ; Tirhuta +blk; Todhri ; Todhri blk; Toto ; Toto blk; Transport_And_Map ; Transport_And_Map_Symbols +blk; Tulu_Tigalari ; Tulu_Tigalari blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended blk; UCAS_Ext_A ; Unified_Canadian_Aboriginal_Syllabics_Extended_A @@ -909,6 +920,7 @@ InSC; Number_Joiner ; Number_Joiner InSC; Other ; Other InSC; Pure_Killer ; Pure_Killer InSC; Register_Shifter ; Register_Shifter +InSC; Reordering_Killer ; Reordering_Killer InSC; Syllable_Modifier ; Syllable_Modifier InSC; Tone_Letter ; Tone_Letter InSC; Tone_Mark ; Tone_Mark @@ -1008,6 +1020,7 @@ jg ; Heh_Goal ; Heh_Goal jg ; Heth ; Heth jg ; Kaf ; Kaf jg ; Kaph ; Kaph +jg ; Kashmiri_Yeh ; Kashmiri_Yeh jg ; Khaph ; Khaph jg ; Knotted_Heh ; Knotted_Heh jg ; Lam ; Lam @@ -1073,7 +1086,7 @@ jg ; Syriac_Waw ; Syriac_Waw jg ; Tah ; Tah jg ; Taw ; Taw jg ; Teh_Marbuta ; Teh_Marbuta -jg ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal +jg ; Teh_Marbuta_Goal ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal jg ; Teth ; Teth jg ; Thin_Yeh ; Thin_Yeh jg ; Vertical_Tail ; Vertical_Tail @@ -1165,6 +1178,11 @@ Lower; Y ; Yes ; T Math; N ; No ; F ; False Math; Y ; Yes ; T ; True +# Modifier_Combining_Mark (MCM) + +MCM; N ; No ; F ; False +MCM; Y ; Yes ; T ; True + # NFC_Quick_Check (NFC_QC) NFC_QC; M ; Maybe @@ -1326,6 +1344,7 @@ sc ; Egyp ; Egyptian_Hieroglyphs sc ; Elba ; Elbasan sc ; Elym ; Elymaic sc ; Ethi ; Ethiopic +sc ; Gara ; Garay sc ; Geor ; Georgian sc ; Glag ; Glagolitic sc ; Gong ; Gunjala_Gondi @@ -1334,6 +1353,7 @@ sc ; Goth ; Gothic sc ; Gran ; Grantha sc ; Grek ; Greek sc ; Gujr ; Gujarati +sc ; Gukh ; Gurung_Khema sc ; Guru ; Gurmukhi sc ; Hang ; Hangul sc ; Hani ; Han @@ -1356,6 +1376,7 @@ sc ; Khmr ; Khmer sc ; Khoj ; Khojki sc ; Kits ; Khitan_Small_Script sc ; Knda ; Kannada +sc ; Krai ; Kirat_Rai sc ; Kthi ; Kaithi sc ; Lana ; Tai_Tham sc ; Laoo ; Lao @@ -1392,6 +1413,7 @@ sc ; Nkoo ; Nko sc ; Nshu ; Nushu sc ; Ogam ; Ogham sc ; Olck ; Ol_Chiki +sc ; Onao ; Ol_Onal sc ; Orkh ; Old_Turkic sc ; Orya ; Oriya sc ; Osge ; Osage @@ -1423,6 +1445,7 @@ sc ; Sogo ; Old_Sogdian sc ; Sora ; Sora_Sompeng sc ; Soyo ; Soyombo sc ; Sund ; Sundanese +sc ; Sunu ; Sunuwar sc ; Sylo ; Syloti_Nagri sc ; Syrc ; Syriac sc ; Tagb ; Tagbanwa @@ -1440,7 +1463,9 @@ sc ; Thai ; Thai sc ; Tibt ; Tibetan sc ; Tirh ; Tirhuta sc ; Tnsa ; Tangsa +sc ; Todr ; Todhri sc ; Toto ; Toto +sc ; Tutg ; Tulu_Tigalari sc ; Ugar ; Ugaritic sc ; Vaii ; Vai sc ; Vith ; Vithkuqi @@ -1650,4 +1675,34 @@ XIDS; Y ; Yes ; T # @missing: 0000..10FFFF; cjkRSUnicode; +# kEH_Cat (kEH_Cat) + +# @missing: 0000..10FFFF; kEH_Cat; + +# kEH_Desc (kEH_Desc) + +# @missing: 0000..10FFFF; kEH_Desc; + +# kEH_HG (kEH_HG) + +# @missing: 0000..10FFFF; kEH_HG; + +# kEH_IFAO (kEH_IFAO) + +# @missing: 0000..10FFFF; kEH_IFAO; + +# kEH_JSesh (kEH_JSesh) + +# @missing: 0000..10FFFF; kEH_JSesh; + +# kEH_NoMirror (kEH_NoMirror) + +kEH_NoMirror; N ; No ; F ; False +kEH_NoMirror; Y ; Yes ; T ; True + +# kEH_NoRotate (kEH_NoRotate) + +kEH_NoRotate; N ; No ; F ; False +kEH_NoRotate; Y ; Yes ; T ; True + # EOF diff --git a/lib/elixir/unicode/ScriptExtensions.txt b/lib/elixir/unicode/ScriptExtensions.txt index 23141fb8241..140901a872c 100644 --- a/lib/elixir/unicode/ScriptExtensions.txt +++ b/lib/elixir/unicode/ScriptExtensions.txt @@ -1,8 +1,8 @@ -# ScriptExtensions-15.1.0.txt -# Date: 2023-02-01, 23:02:24 GMT -# © 2023 Unicode®, Inc. +# ScriptExtensions-16.0.0.txt +# Date: 2024-07-30, 19:38:00 GMT +# © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ @@ -21,615 +21,213 @@ # values in that set is not material, but for stability in presentation # it is given here as alphabetical. # -# The Script_Extensions values are presented in sorted order in the file. -# They are sorted first by the number of Script property values in their sets, -# and then alphabetically by first differing Script property value. -# -# Following each distinct Script_Extensions value is the list of code -# points associated with that value, listed in code point order. -# # All code points not explicitly listed for Script_Extensions -# have as their value the corresponding Script property value +# have as their value the corresponding Script property value. # # @missing: 0000..10FFFF;