Skip to content

Commit e1e07ef

Browse files
committed
Merge remote-tracking branch 'la-vache/main' into unihan-17
2 parents b8d3063 + 57a3085 commit e1e07ef

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+3279
-1562
lines changed

.github/workflows/cache_retain.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ jobs:
3030
retain-maven-cache:
3131
name: Run all tests with Maven
3232
runs-on: ubuntu-latest
33+
# Only run this on the upstream repo. Otherwise, running in a personal fork will cause
34+
# Github to disable the personal fork copy of the workflow
35+
# (Github complains about running a scheduled workflow on a repo with > 60 days of inactivity)
36+
if: github.ref == 'refs/heads/main' && github.repository == 'unicode-org/unicodetools'
3337
steps:
3438
- name: Checkout and setup
3539
uses: actions/checkout@v2

unicodetools/data/ucd/dev/ArabicShaping.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@
482482
088C; TAH WITH 3 DOTS BELOW; D; TAH
483483
088D; KEHEH WITH VERTICAL 2 DOTS BELOW; D; GAF
484484
088E; VERTICAL TAIL; R; VERTICAL TAIL
485+
088F; DOTLESS NOON WITH SEPARATE RING ABOVE; D; NOON
485486
0890; ARABIC POUND MARK ABOVE; U; No_Joining_Group
486487
0891; ARABIC PIASTRE MARK ABOVE; U; No_Joining_Group
487488

@@ -850,6 +851,8 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
850851
10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL
851852
10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH
852853
10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF
854+
10EC6; THIN NOON; D; THIN NOON
855+
10EC7; DOTLESS YEH WITH 4 DOTS BELOW; D; YEH
853856

854857
# Sogdian Characters
855858

unicodetools/data/ucd/dev/Blocks.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ FFF0..FFFF; Specials
228228
108E0..108FF; Hatran
229229
10900..1091F; Phoenician
230230
10920..1093F; Lydian
231+
10940..1095C; Sidetic
231232
10980..1099F; Meroitic Hieroglyphs
232233
109A0..109FF; Meroitic Cursive
233234
10A00..10A5F; Kharoshthi
@@ -279,11 +280,13 @@ FFF0..FFFF; Specials
279280
11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
280281
11AC0..11AFF; Pau Cin Hau
281282
11B00..11B5F; Devanagari Extended-A
283+
11B60..11B7F; Sharada Supplement
282284
11BC0..11BFF; Sunuwar
283285
11C00..11C6F; Bhaiksuki
284286
11C70..11CBF; Marchen
285287
11D00..11D5F; Masaram Gondi
286288
11D60..11DAF; Gunjala Gondi
289+
11DB0..11DEF; Tolong Siki
287290
11EE0..11EFF; Makasar
288291
11F00..11F5F; Kawi
289292
11FB0..11FBF; Lisu Supplement
@@ -302,14 +305,17 @@ FFF0..FFFF; Specials
302305
16A70..16ACF; Tangsa
303306
16AD0..16AFF; Bassa Vah
304307
16B00..16B8F; Pahawh Hmong
308+
16EA0..16EDF; Beria Erfe
305309
16D40..16D7F; Kirat Rai
310+
16D80..16DAF; Chisoi
306311
16E40..16E9F; Medefaidrin
307312
16F00..16F9F; Miao
308313
16FE0..16FFF; Ideographic Symbols and Punctuation
309314
17000..187FF; Tangut
310315
18800..18AFF; Tangut Components
311316
18B00..18CFF; Khitan Small Script
312317
18D00..18D7F; Tangut Supplement
318+
18D80..18DFF; Tangut Components Supplement
313319
1AFF0..1AFFF; Kana Extended-B
314320
1B000..1B0FF; Kana Supplement
315321
1B100..1B12F; Kana Extended-A
@@ -318,6 +324,7 @@ FFF0..FFFF; Specials
318324
1BC00..1BC9F; Duployan
319325
1BCA0..1BCAF; Shorthand Format Controls
320326
1CC00..1CEBF; Symbols for Legacy Computing Supplement
327+
1CEC0..1CEFF; Miscellaneous Symbols Supplement
321328
1CF00..1CFCF; Znamenny Musical Notation
322329
1D000..1D0FF; Byzantine Musical Symbols
323330
1D100..1D1FF; Musical Symbols
@@ -336,6 +343,7 @@ FFF0..FFFF; Specials
336343
1E2C0..1E2FF; Wancho
337344
1E4D0..1E4FF; Nag Mundari
338345
1E5D0..1E5FF; Ol Onal
346+
1E6C0..1E6FF; Tai Yo
339347
1E7E0..1E7FF; Ethiopic Extended-B
340348
1E800..1E8DF; Mende Kikakui
341349
1E900..1E95F; Adlam

unicodetools/data/ucd/dev/CaseFolding.txt

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# CaseFolding-16.0.0.txt
2-
# Date: 2024-04-30, 21:48:11 GMT
1+
# CaseFolding-17.0.0.txt
2+
# Date: 2024-11-14, 20:19:39 GMT
33
# © 2024 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -1243,7 +1243,10 @@ A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
12431243
A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
12441244
A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN
12451245
A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE
1246+
A7CE; C; A7CF; # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE
12461247
A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
1248+
A7D2; C; A7D3; # LATIN CAPITAL LETTER DOUBLE THORN
1249+
A7D4; C; A7D5; # LATIN CAPITAL LETTER DOUBLE WYNN
12471250
A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
12481251
A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
12491252
A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA
@@ -1616,6 +1619,31 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
16161619
16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
16171620
16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
16181621
16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
1622+
16EA0; C; 16EBB; # BERIA ERFE CAPITAL LETTER ARKAB
1623+
16EA1; C; 16EBC; # BERIA ERFE CAPITAL LETTER BASIGNA
1624+
16EA2; C; 16EBD; # BERIA ERFE CAPITAL LETTER DARBAI
1625+
16EA3; C; 16EBE; # BERIA ERFE CAPITAL LETTER EH
1626+
16EA4; C; 16EBF; # BERIA ERFE CAPITAL LETTER FITKO
1627+
16EA5; C; 16EC0; # BERIA ERFE CAPITAL LETTER GOWAY
1628+
16EA6; C; 16EC1; # BERIA ERFE CAPITAL LETTER HIRDEABO
1629+
16EA7; C; 16EC2; # BERIA ERFE CAPITAL LETTER I
1630+
16EA8; C; 16EC3; # BERIA ERFE CAPITAL LETTER DJAI
1631+
16EA9; C; 16EC4; # BERIA ERFE CAPITAL LETTER KOBO
1632+
16EAA; C; 16EC5; # BERIA ERFE CAPITAL LETTER LAKKO
1633+
16EAB; C; 16EC6; # BERIA ERFE CAPITAL LETTER MERI
1634+
16EAC; C; 16EC7; # BERIA ERFE CAPITAL LETTER NINI
1635+
16EAD; C; 16EC8; # BERIA ERFE CAPITAL LETTER GNA
1636+
16EAE; C; 16EC9; # BERIA ERFE CAPITAL LETTER NGAY
1637+
16EAF; C; 16ECA; # BERIA ERFE CAPITAL LETTER OI
1638+
16EB0; C; 16ECB; # BERIA ERFE CAPITAL LETTER PI
1639+
16EB1; C; 16ECC; # BERIA ERFE CAPITAL LETTER ERIGO
1640+
16EB2; C; 16ECD; # BERIA ERFE CAPITAL LETTER ERIGO TAMURA
1641+
16EB3; C; 16ECE; # BERIA ERFE CAPITAL LETTER SERI
1642+
16EB4; C; 16ECF; # BERIA ERFE CAPITAL LETTER SHEP
1643+
16EB5; C; 16ED0; # BERIA ERFE CAPITAL LETTER TATASOUE
1644+
16EB6; C; 16ED1; # BERIA ERFE CAPITAL LETTER UI
1645+
16EB7; C; 16ED2; # BERIA ERFE CAPITAL LETTER WASSE
1646+
16EB8; C; 16ED3; # BERIA ERFE CAPITAL LETTER AY
16191647
1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
16201648
1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
16211649
1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM

unicodetools/data/ucd/dev/DerivedAge.txt

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# DerivedAge-17.0.0.txt
2-
# Date: 2024-11-14, 15:47:44 GMT
2+
# Date: 2024-11-16, 02:52:39 GMT
33
# © 2024 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -2065,9 +2065,57 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L
20652065

20662066
# Newly assigned in Unicode 17.0.0 (September, 2025)
20672067

2068+
088F ; 17.0 # ARABIC LETTER NOON WITH RING ABOVE
2069+
09FF ; 17.0 # BENGALI LETTER SANSKRIT BA
2070+
0B53..0B54 ; 17.0 # [2] ORIYA SIGN DOT ABOVE..ORIYA SIGN DOUBLE DOT ABOVE
2071+
0C5C ; 17.0 # TELUGU ARCHAIC SHRII
2072+
0CDC ; 17.0 # KANNADA ARCHAIC SHRII
2073+
1ACF..1ADD ; 17.0 # [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW
2074+
1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE
2075+
2B96 ; 17.0 # EQUALS SIGN WITH INFINITY ABOVE
2076+
A7CE..A7CF ; 17.0 # [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE
2077+
A7D2 ; 17.0 # LATIN CAPITAL LETTER DOUBLE THORN
2078+
A7D4 ; 17.0 # LATIN CAPITAL LETTER DOUBLE WYNN
2079+
A7F1 ; 17.0 # MODIFIER LETTER CAPITAL S
2080+
FBC3..FBD2 ; 17.0 # [16] ARABIC LIGATURE JALLA WA-ALAA..ARABIC LIGATURE ALAYHI AR-RAHMAH
2081+
FD90..FD91 ; 17.0 # [2] ARABIC LIGATURE RAHMATU ALLAAHI ALAYH..ARABIC LIGATURE RAHMATU ALLAAHI ALAYHAA
2082+
FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIGATURE KARRAMA ALLAAHU WAJHAH
2083+
10940..1095C ; 17.0 # [29] SIDETIC LETTER N01..SIDETIC LETTER N29
2084+
10EC5..10EC7 ; 17.0 # [3] ARABIC SMALL YEH BARREE WITH TWO DOTS BELOW..ARABIC LETTER YEH WITH FOUR DOTS BELOW
2085+
10ED0..10ED8 ; 17.0 # [9] ARABIC BIBLICAL END OF VERSE..ARABIC LIGATURE NAWWARA ALLAAHU MARQADAH
2086+
10EFA..10EFB ; 17.0 # [2] ARABIC DOUBLE VERTICAL BAR BELOW..ARABIC SMALL LOW NOON
2087+
11B60..11B67 ; 17.0 # [8] SHARADA VOWEL SIGN OE..SHARADA VOWEL SIGN CANDRA O
2088+
11DB0..11DDB ; 17.0 # [44] TOLONG SIKI LETTER I..TOLONG SIKI UNGGA
2089+
11DE0..11DE9 ; 17.0 # [10] TOLONG SIKI DIGIT ZERO..TOLONG SIKI DIGIT NINE
2090+
16D80..16D9D ; 17.0 # [30] CHISOI LETTER A..CHISOI SIGN SISO
2091+
16DA0..16DA9 ; 17.0 # [10] CHISOI DIGIT ZERO..CHISOI DIGIT NINE
2092+
16EA0..16EB8 ; 17.0 # [25] BERIA ERFE CAPITAL LETTER ARKAB..BERIA ERFE CAPITAL LETTER AY
2093+
16EBB..16ED3 ; 17.0 # [25] BERIA ERFE SMALL LETTER ARKAB..BERIA ERFE SMALL LETTER AY
2094+
16FF2..16FF6 ; 17.0 # [5] CHINESE SMALL SIMPLIFIED ER..YANGQIN SIGN SLOW TWO BEATS
2095+
187F8..187FF ; 17.0 # [8] TANGUT IDEOGRAPH-187F8..TANGUT IDEOGRAPH-187FF
2096+
18D09..18D1E ; 17.0 # [22] TANGUT IDEOGRAPH-18D09..TANGUT IDEOGRAPH-18D1E
2097+
18D80..18DF2 ; 17.0 # [115] TANGUT COMPONENT-769..TANGUT COMPONENT-883
2098+
1CCFA..1CCFC ; 17.0 # [3] SNAKE SYMBOL..NOSE SYMBOL
2099+
1CEBA..1CED0 ; 17.0 # [23] FRAGILE SYMBOL..LEUKOTHEA
2100+
1CEE0..1CEF0 ; 17.0 # [17] GEOMANTIC FIGURE POPULUS..MEDIUM SMALL WHITE CIRCLE WITH HORIZONTAL BAR
2101+
1E6C0..1E6DE ; 17.0 # [31] TAI YO LETTER LOW KO..TAI YO LETTER HIGH KVO
2102+
1E6E0..1E6F5 ; 17.0 # [22] TAI YO LETTER AA..TAI YO SIGN OM
2103+
1E6FE..1E6FF ; 17.0 # [2] TAI YO SYMBOL MUEANG..TAI YO XAM LAI
2104+
1F6D8 ; 17.0 # LANDSLIDE
2105+
1F777..1F77A ; 17.0 # [4] VESTA FORM TWO..PARTHENOPE FORM TWO
2106+
1F8D0..1F8D8 ; 17.0 # [9] LONG RIGHTWARDS ARROW OVER LONG LEFTWARDS ARROW..LONG LEFT RIGHT ARROW WITH DEPENDENT LOBE
2107+
1FA54..1FA57 ; 17.0 # [4] WHITE CHESS FERZ..BLACK CHESS ALFIL
2108+
1FA8A ; 17.0 # TROMBONE
2109+
1FA8E ; 17.0 # TREASURE CHEST
2110+
1FAC8 ; 17.0 # HAIRY CREATURE
2111+
1FACD ; 17.0 # ORCA
2112+
1FADD ; 17.0 # APPLE CORE
2113+
1FAEA ; 17.0 # DISTORTED FACE
2114+
1FAEF ; 17.0 # FIGHT CLOUD
2115+
1FBFA ; 17.0 # ALARM BELL SYMBOL
20682116
2B73A..2B73E ; 17.0 # [5] CJK UNIFIED IDEOGRAPH-2B73A..CJK UNIFIED IDEOGRAPH-2B73E
20692117
323B0..33479 ; 17.0 # [4298] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479
20702118

2071-
# Total code points: 4303
2119+
# Total code points: 4836
20722120

20732121
# EOF

0 commit comments

Comments
 (0)