diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 9e0da5614..c1854334e 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-18.0.0.txt -# Date: 2025-10-02, 22:15:53 GMT +# Date: 2025-10-23, 17:45:30 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2123,9 +2123,10 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG # Newly assigned in Unicode 18.0.0 (September, 2025) 20C3 ; 18.0 # UAE DIRHAM SIGN +18CD6..18CDA ; 18.0 # [5] KHITAN SMALL SCRIPT CHARACTER-18CD6..KHITAN SMALL SCRIPT CHARACTER-18CDA 1F7DB ; 18.0 # BULLET IN DOUBLE CIRCLE 1F7F1..1F7FF ; 18.0 # [15] CIRCLE WITH DOUBLE VERTICAL AND HORIZONTAL LINE..RHOMBUS -# Total code points: 17 +# Total code points: 22 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 43f06a9af..20bb1213e 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-18.0.0.txt -# Date: 2025-10-02, 22:16:18 GMT +# Date: 2025-10-23, 17:45:33 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1335,7 +1335,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; Alphabetic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Alphabetic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; Alphabetic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; Alphabetic # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; Alphabetic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; Alphabetic # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -1466,7 +1466,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Alphabetic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 147421 +# Total code points: 147426 # ================================================ @@ -6922,7 +6922,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; ID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; ID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; ID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; ID_Start # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; ID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; ID_Start # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -7038,7 +7038,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; ID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 145916 +# Total code points: 145921 # ================================================ @@ -8313,7 +8313,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; ID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; ID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; ID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; ID_Continue # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; ID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; ID_Continue # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -8471,7 +8471,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..33479 ; ID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149240 +# Total code points: 149245 # ================================================ @@ -9146,7 +9146,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; XID_Start # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; XID_Start # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; XID_Start # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; XID_Start # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; XID_Start # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; XID_Start # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -9262,7 +9262,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; XID_Start # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 145893 +# Total code points: 145898 # ================================================ @@ -10538,7 +10538,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; XID_Continue # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; XID_Continue # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; XID_Continue # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; XID_Continue # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; XID_Continue # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; XID_Continue # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -10696,7 +10696,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..33479 ; XID_Continue # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 149221 +# Total code points: 149226 # ================================================ @@ -12769,7 +12769,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; Grapheme_Base # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Grapheme_Base # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; Grapheme_Base # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; Grapheme_Base # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; Grapheme_Base # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; Grapheme_Base # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -12986,7 +12986,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Grapheme_Base # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 157511 +# Total code points: 157516 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 4924a86cb..0bf6408f5 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ -# EastAsianWidth-17.0.0.txt -# Date: 2025-08-06, 15:35:31 GMT +# EastAsianWidth-18.0.0.txt +# Date: 2025-10-23, 17:45:38 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2381,7 +2381,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 16FF4..16FF6 ; W # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187FF ; W # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18B00..18CDA ; W # Lo [475] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF ; W # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D1E ; W # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; W # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index b05a9d2c1..1d6bfe247 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-17.0.0.txt -# Date: 2025-08-08, 21:14:43 GMT +# LineBreak-18.0.0.txt +# Date: 2025-10-23, 17:45:39 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3293,7 +3293,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 16FF4..16FF6 ; ID # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS 17000..187FF ; ID # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; ID # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18B00..18CDA ; AL # Lo [475] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D1E ; ID # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; ID # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 diff --git a/unicodetools/data/ucd/dev/NamesList.txt b/unicodetools/data/ucd/dev/NamesList.txt index 8b069896b..4d834d9f4 100644 --- a/unicodetools/data/ucd/dev/NamesList.txt +++ b/unicodetools/data/ucd/dev/NamesList.txt @@ -52727,6 +52727,21 @@ FFFF 18CD3 KHITAN SMALL SCRIPT CHARACTER-18CD3 18CD4 KHITAN SMALL SCRIPT CHARACTER-18CD4 18CD5 KHITAN SMALL SCRIPT CHARACTER-18CD5 +@ Jurchen Small Script characters +@+ Characters tentatively identified as Jurchen Small Script, encoded in + the Khitan Small Script block due to similar clustering structure and + limited evidence. This set also includes 18C3E. + x (khitan small script character-18c3e - 18C3E) +18CD6 KHITAN SMALL SCRIPT CHARACTER-18CD6 + * used in Jurchen Small Script +18CD7 KHITAN SMALL SCRIPT CHARACTER-18CD7 + * used in Jurchen Small Script +18CD8 KHITAN SMALL SCRIPT CHARACTER-18CD8 + * used in Jurchen Small Script +18CD9 KHITAN SMALL SCRIPT CHARACTER-18CD9 + * used in Jurchen Small Script +18CDA KHITAN SMALL SCRIPT CHARACTER-18CDA + * used in Jurchen Small Script @ Indication of missing character 18CFF KHITAN SMALL SCRIPT CHARACTER-18CFF * represents a lost or illegible character diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index e64b4224d..c869b16f8 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ -# PropList-17.0.0.txt -# Date: 2025-06-30, 06:19:01 GMT +# PropList-18.0.0.txt +# Date: 2025-10-23, 19:10:08 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -883,7 +883,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER 16FF2..16FF3 ; Ideographic # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; Ideographic # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; Ideographic # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; Ideographic # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; Ideographic # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; Ideographic # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB @@ -896,7 +896,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Ideographic # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 110943 +# Total code points: 110948 # ================================================ diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 691f20a15..8dacb8fec 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-18.0.0.txt -# Date: 2025-10-02, 22:16:48 GMT +# Date: 2025-10-23, 18:02:02 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2974,10 +2974,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # ================================================ 16FE4 ; Khitan_Small_Script # Mn KHITAN SMALL SCRIPT FILLER -18B00..18CD5 ; Khitan_Small_Script # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18B00..18CDA ; Khitan_Small_Script # Lo [475] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF ; Khitan_Small_Script # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF -# Total code points: 472 +# Total code points: 477 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index a3422917d..95b71d88c 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -31740,7 +31740,11 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 18CD3;KHITAN SMALL SCRIPT CHARACTER-18CD3;Lo;0;L;;;;;N;;;;; 18CD4;KHITAN SMALL SCRIPT CHARACTER-18CD4;Lo;0;L;;;;;N;;;;; 18CD5;KHITAN SMALL SCRIPT CHARACTER-18CD5;Lo;0;L;;;;;N;;;;; -18CFF;KHITAN SMALL SCRIPT CHARACTER-18CFF;Lo;0;L;;;;;N;;;;; +18CD6;KHITAN SMALL SCRIPT CHARACTER-18CD6;Lo;0;L;;;;;N;;;;; +18CD7;KHITAN SMALL SCRIPT CHARACTER-18CD7;Lo;0;L;;;;;N;;;;; +18CD8;KHITAN SMALL SCRIPT CHARACTER-18CD8;Lo;0;L;;;;;N;;;;; +18CD9;KHITAN SMALL SCRIPT CHARACTER-18CD9;Lo;0;L;;;;;N;;;;; +18CDA;KHITAN SMALL SCRIPT CHARACTER-18CDA;Lo;0;L;;;;;N;;;;; 18CFF;KHITAN SMALL SCRIPT CHARACTER-18CFF;Lo;0;L;;;;;N;;;;; 18D00;;Lo;0;L;;;;;N;;;;; 18D1E;;Lo;0;L;;;;;N;;;;; 18D80;TANGUT COMPONENT-769;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 70c128820..53ecf374d 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ -# VerticalOrientation-17.0.0.txt -# Date: 2025-08-06, 15:36:00 GMT +# VerticalOrientation-18.0.0.txt +# Date: 2025-10-23, 17:46:00 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2218,8 +2218,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 16FF7..16FFF ; U # Cn [9] .. 17000..187FF ; U # Lo [6144] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187FF 18800..18AFF ; U # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18B00..18CD5 ; U # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18CD6..18CFE ; U # Cn [41] .. +18B00..18CDA ; U # Lo [475] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CDA +18CDB..18CFE ; U # Cn [36] .. 18CFF ; U # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 18D00..18D1E ; U # Lo [31] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D1E 18D1F..18D7F ; U # Cn [97] .. diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index c06f4e26a..a0e1515fb 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ -# SentenceBreakProperty-17.0.0.txt -# Date: 2025-06-30, 06:20:48 GMT +# SentenceBreakProperty-18.0.0.txt +# Date: 2025-10-23, 17:45:59 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2535,7 +2535,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 16FE3 ; OLetter # Lm OLD CHINESE ITERATION MARK 16FF2..16FF3 ; OLetter # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; OLetter # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; OLetter # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; OLetter # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; OLetter # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; OLetter # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; OLetter # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -2616,7 +2616,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; OLetter # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 141501 +# Total code points: 141506 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 20fa24e37..6c623c382 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ -# WordBreakProperty-17.0.0.txt -# Date: 2025-06-30, 06:20:49 GMT +# WordBreakProperty-18.0.0.txt +# Date: 2025-10-23, 19:10:24 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 539221662..20210103b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ -# DerivedBidiClass-17.0.0.txt -# Date: 2025-08-06, 15:35:25 GMT +# DerivedBidiClass-18.0.0.txt +# Date: 2025-10-23, 18:15:34 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1112,7 +1112,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; L # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; L # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; L # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; L # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; L # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; L # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -1229,7 +1229,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 810599 code points not listed here. +# The above property value applies to 810594 code points not listed here. # Total code points: 1095391 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 2b91e97e2..235889e8d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-18.0.0.txt -# Date: 2025-10-02, 22:16:17 GMT +# Date: 2025-10-23, 18:15:36 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1857,7 +1857,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16FE4 ; 0 # Mn KHITAN SMALL SCRIPT FILLER 16FF2..16FF3 ; 0 # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; 0 # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; 0 # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; 0 # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; 0 # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; 0 # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; 0 # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -2090,7 +2090,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 816761 code points not listed here. +# The above property value applies to 816756 code points not listed here. # Total code points: 1113144 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 2026cfec9..00e9fa477 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-18.0.0.txt -# Date: 2025-10-02, 22:16:19 GMT +# Date: 2025-10-23, 18:15:37 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2138,8 +2138,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760595 code points not listed here. -# Total code points: 792263 +# The above property value applies to 760590 code points not listed here. +# Total code points: 792258 # ================================================ @@ -2545,7 +2545,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 16FF2..16FF3 ; W # Lm [2] CHINESE SMALL SIMPLIFIED ER..CHINESE SMALL TRADITIONAL ER 16FF4..16FF6 ; W # Nl [3] YANGQIN SIGN SLOW ONE BEAT..YANGQIN SIGN SLOW TWO BEATS -17000..18CD5 ; W # Lo [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; W # Lo [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; W # Lo [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; W # Lo [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 @@ -2617,7 +2617,7 @@ FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT 31350..33479 ; W # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 # The above property value applies to 56166 code points not listed here. -# Total code points: 182772 +# Total code points: 182777 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 6b6e3269c..954b8b791 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-18.0.0.txt -# Date: 2025-10-02, 22:16:19 GMT +# Date: 2025-10-23, 18:15:38 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -579,7 +579,7 @@ FFFE..FFFF ; Cn # [2] .. 16FA0..16FDF ; Cn # [64] .. 16FE5..16FEF ; Cn # [11] .. 16FF7..16FFF ; Cn # [9] .. -18CD6..18CFE ; Cn # [41] .. +18CDB..18CFE ; Cn # [36] .. 18D1F..18D7F ; Cn # [97] .. 18DF3..1AFEF ; Cn # [8701] .. 1AFF4 ; Cn # @@ -752,7 +752,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 814713 +# Total code points: 814708 # ================================================ @@ -2658,7 +2658,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 16D43..16D6A ; Lo # [40] KIRAT RAI LETTER A..KIRAT RAI VOWEL SIGN AU 16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; Lo # MIAO LETTER NASALIZATION -17000..18CD5 ; Lo # [7382] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CD5 +17000..18CDA ; Lo # [7387] TANGUT IDEOGRAPH-17000..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF..18D1E ; Lo # [32] KHITAN SMALL SCRIPT CHARACTER-18CFF..TANGUT IDEOGRAPH-18D1E 18D80..18DF2 ; Lo # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883 1B000..1B122 ; Lo # [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU @@ -2732,7 +2732,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Lo # [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 141062 +# Total code points: 141067 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index c674a5097..4e1be3145 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ -# DerivedLineBreak-17.0.0.txt -# Date: 2025-08-06, 15:35:29 GMT +# DerivedLineBreak-18.0.0.txt +# Date: 2025-10-23, 18:15:38 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -70,8 +70,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757182 code points not listed here. -# Total code points: 894650 +# The above property value applies to 757177 code points not listed here. +# Total code points: 894645 # ================================================ @@ -1456,7 +1456,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 16F00..16F4A ; AL # Lo [75] MIAO LETTER PA..MIAO LETTER RTE 16F50 ; AL # Lo MIAO LETTER NASALIZATION 16F93..16F9F ; AL # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -18B00..18CD5 ; AL # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18B00..18CDA ; AL # Lo [475] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CDA 18CFF ; AL # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF 1AFF0..1AFF3 ; AL # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 1AFF5..1AFFB ; AL # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 @@ -1639,7 +1639,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE 1FBFA ; AL # So ALARM BELL SYMBOL -# Total code points: 26954 +# Total code points: 26959 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 6ca0d2479..8d4fa3b77 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-18.0.0.txt -# Date: 2025-10-02, 22:16:20 GMT +# Date: 2025-10-23, 18:15:39 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -37940,7 +37940,7 @@ FFFD ; REPLACEMENT CHARACTER 18AFD ; TANGUT COMPONENT-766 18AFE ; TANGUT COMPONENT-767 18AFF ; TANGUT COMPONENT-768 -18B00..18CD5 ; KHITAN SMALL SCRIPT CHARACTER-* +18B00..18CDA ; KHITAN SMALL SCRIPT CHARACTER-* 18CFF ; KHITAN SMALL SCRIPT CHARACTER-* 18D00..18D1E ; TANGUT IDEOGRAPH-* 18D80 ; TANGUT COMPONENT-769 @@ -45840,6 +45840,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 159818 +# Total code points: 159823 # EOF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/231.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/231.txt new file mode 100644 index 000000000..0343241d4 --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/231.txt @@ -0,0 +1,20 @@ +# Symbol: Jurchen Small Script characters (18CD6 .. 18CDA) +# https://github.com/unicode-org/utc-release-management/issues/231 + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008 IDNA2008_Category: + +Propertywise [\x{18CD5} \N{KHITAN SMALL SCRIPT CHARACTER-18CD5} + \x{18CD6} \N{KHITAN SMALL SCRIPT CHARACTER-18CD6} + \x{18CD7} \N{KHITAN SMALL SCRIPT CHARACTER-18CD7} + \x{18CD8} \N{KHITAN SMALL SCRIPT CHARACTER-18CD8} + \x{18CD9} \N{KHITAN SMALL SCRIPT CHARACTER-18CD9} + \x{18CDA} \N{KHITAN SMALL SCRIPT CHARACTER-18CDA}] AreAlike + +end Ignoring; + +end Ignoring; \ No newline at end of file