Skip to content

Commit c028ec7

Browse files
Unicode Consortiumkhwilliamson
authored andcommitted
Add Unicode 17.0
This is includes updates to a few perl files that need to know the current Unicode version, and regenerating perl files that depend on the Unicode data
1 parent 975b8d5 commit c028ec7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+84504
-41720
lines changed

charclass_invlists.inc

Lines changed: 42375 additions & 8630 deletions
Large diffs are not rendered by default.

lib/Unicode/UCD.t

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use Test::More;
1919

2020
use Unicode::UCD qw(charinfo charprop charprops_all);
2121

22-
my $expected_version = '16.0.0';
22+
my $expected_version = '17.0.0';
2323
my $current_version = Unicode::UCD::UnicodeVersion;
2424
my $v_unicode_version = pack "C*", split /\./, $current_version;
2525
my $unknown_script = ($v_unicode_version lt v5.0.0)
@@ -786,7 +786,7 @@ SKIP:
786786
skip("Latin range count will be wrong when using older Unicode release",
787787
2) if $current_version lt $expected_version;
788788
my $n1 = @$r1;
789-
is($n1, 39, "number of ranges in Latin script (Unicode $expected_version)") if $::IS_ASCII;
789+
is($n1, 36, "number of ranges in Latin script (Unicode $expected_version)") if $::IS_ASCII;
790790
shift @$r1 while @$r1;
791791
my $r2 = charscript('Latin');
792792
is(@$r2, $n1, "modifying results should not mess up internal caches");
@@ -888,9 +888,13 @@ if ($v_unicode_version ge v5.2.0) {
888888
}
889889
if ($v_unicode_version gt v3.2.0) { # Is missing from non-Unihan files before
890890
# this
891-
# Extrapolating from Unicode documentation, they moved away here from
892-
# Taiwanese/Japanese usage in favor of mainland China usage.
893-
my $value = ($v_unicode_version lt v15.1.0) ? 1000000000000 : 1000000;
891+
# Extrapolating from Unicode documentation, they moved for two versions
892+
# away here from Taiwanese/Japanese usage in favor of mainland China
893+
# usage.
894+
my $value = ( $v_unicode_version lt v17.0.0
895+
&& $v_unicode_version ge v15.1.0)
896+
? 1000000
897+
: 1000000000000;
894898
is(num("\N{U+5146}"), $value, 'Verify num("\N{U+5146}") == ' . $value);
895899
}
896900

lib/unicore/ArabicShaping.txt

Lines changed: 83 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# ArabicShaping-16.0.0.txt
2-
# Date: 2024-07-30
3-
# © 2024 Unicode®, Inc.
1+
# ArabicShaping-17.0.0.txt
2+
# Date: 2025-08-14
3+
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
66
#
@@ -12,22 +12,22 @@
1212
# shaping, repeating in machine readable form the information
1313
# exemplified in various tables of The Unicode Standard core specification.
1414
#
15-
# This file also defines Joining_Type values for
16-
# Mongolian, Phags-pa, Psalter Pahlavi, Sogdian, Old Uyghur, Chorasmian,
17-
# and Adlam positional shaping,
18-
# and Joining_Type and Joining_Group values for Hanifi Rohingya positional shaping,
19-
# which are not listed in tables in the core specification.
15+
# This file also defines Joining_Type values for Mongolian, Phags-pa,
16+
# Psalter Pahlavi, Sogdian, Old Uyghur, Chorasmian, and Adlam positional
17+
# shaping, and Joining_Type and Joining_Group values for Hanifi Rohingya
18+
# positional shaping, which are not listed in tables in the core
19+
# specification.
2020
#
2121
# Script Section Table(s)
2222
#
23-
# Arabic 9.2 9-3, 9-4, 9-5, 9-7, 9-8, 9-9, 9-10, 9-11
23+
# Arabic 9.2 9-3, 9-4, 9-5, 9-7, 9-8, 9-9, 9-10, 9-11, 9-13
2424
# Syriac 9.3 9-15, 9-16, 9-17, 9-18, 9-19
25-
# Mandaic 9.5 9-21, 9-22
25+
# Mandaic 9.5 9-22, 9-23
2626
# Manichaean 10.5 10-4, 10-5, 10-6, 10-7
2727
# Psalter Pahlavi 10.6 --
2828
# Chorasmian 10.8 --
2929
# Mongolian 13.5 --
30-
# Phags-pa 14.4 --
30+
# Phags-pa 14.4 14-7
3131
# Sogdian 14.10 --
3232
# Old Uyghur 14.11 --
3333
# Hanifi Rohingya 16.14 --
@@ -482,6 +482,7 @@
482482
088C; TAH WITH 3 DOTS BELOW; D; TAH
483483
088D; KEHEH WITH VERTICAL 2 DOTS BELOW; D; GAF
484484
088E; VERTICAL TAIL; R; VERTICAL TAIL
485+
088F; DOTLESS NOON WITH SEPARATE RING ABOVE; D; NOON
485486
0890; ARABIC POUND MARK ABOVE; U; No_Joining_Group
486487
0891; ARABIC PIASTRE MARK ABOVE; U; No_Joining_Group
487488

@@ -850,6 +851,8 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
850851
10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL
851852
10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH
852853
10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF
854+
10EC6; THIN NOON; D; THIN NOON
855+
10EC7; DOTLESS YEH WITH 4 DOTS BELOW; D; YEH
853856

854857
# Sogdian Characters
855858

@@ -942,74 +945,74 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
942945

943946
# Adlam Characters
944947

945-
1E900;ADLAM CAPITAL ALIF; D; No_Joining_Group
946-
1E901;ADLAM CAPITAL DAALI; D; No_Joining_Group
947-
1E902;ADLAM CAPITAL LAAM; D; No_Joining_Group
948-
1E903;ADLAM CAPITAL MIIM; D; No_Joining_Group
949-
1E904;ADLAM CAPITAL BA; D; No_Joining_Group
950-
1E905;ADLAM CAPITAL SINNYIIYHE; D; No_Joining_Group
951-
1E906;ADLAM CAPITAL PE; D; No_Joining_Group
952-
1E907;ADLAM CAPITAL BHE; D; No_Joining_Group
953-
1E908;ADLAM CAPITAL RA; D; No_Joining_Group
954-
1E909;ADLAM CAPITAL E; D; No_Joining_Group
955-
1E90A;ADLAM CAPITAL FA; D; No_Joining_Group
956-
1E90B;ADLAM CAPITAL I; D; No_Joining_Group
957-
1E90C;ADLAM CAPITAL O; D; No_Joining_Group
958-
1E90D;ADLAM CAPITAL DHA; D; No_Joining_Group
959-
1E90E;ADLAM CAPITAL YHE; D; No_Joining_Group
960-
1E90F;ADLAM CAPITAL WAW; D; No_Joining_Group
961-
1E910;ADLAM CAPITAL NUN; D; No_Joining_Group
962-
1E911;ADLAM CAPITAL KAF; D; No_Joining_Group
963-
1E912;ADLAM CAPITAL YA; D; No_Joining_Group
964-
1E913;ADLAM CAPITAL U; D; No_Joining_Group
965-
1E914;ADLAM CAPITAL JIIM; D; No_Joining_Group
966-
1E915;ADLAM CAPITAL CHI; D; No_Joining_Group
967-
1E916;ADLAM CAPITAL HA; D; No_Joining_Group
968-
1E917;ADLAM CAPITAL QAAF; D; No_Joining_Group
969-
1E918;ADLAM CAPITAL GA; D; No_Joining_Group
970-
1E919;ADLAM CAPITAL NYA; D; No_Joining_Group
971-
1E91A;ADLAM CAPITAL TU; D; No_Joining_Group
972-
1E91B;ADLAM CAPITAL NHA; D; No_Joining_Group
973-
1E91C;ADLAM CAPITAL VA; D; No_Joining_Group
974-
1E91D;ADLAM CAPITAL KHA; D; No_Joining_Group
975-
1E91E;ADLAM CAPITAL GBE; D; No_Joining_Group
976-
1E91F;ADLAM CAPITAL ZAL; D; No_Joining_Group
977-
1E920;ADLAM CAPITAL KPO; D; No_Joining_Group
978-
1E921;ADLAM CAPITAL SHA; D; No_Joining_Group
979-
1E922;ADLAM SMALL ALIF; D; No_Joining_Group
980-
1E923;ADLAM SMALL DAALI; D; No_Joining_Group
981-
1E924;ADLAM SMALL LAAM; D; No_Joining_Group
982-
1E925;ADLAM SMALL MIIM; D; No_Joining_Group
983-
1E926;ADLAM SMALL BA; D; No_Joining_Group
984-
1E927;ADLAM SMALL SINNYIIYHE; D; No_Joining_Group
985-
1E928;ADLAM SMALL PE; D; No_Joining_Group
986-
1E929;ADLAM SMALL BHE; D; No_Joining_Group
987-
1E92A;ADLAM SMALL RA; D; No_Joining_Group
988-
1E92B;ADLAM SMALL E; D; No_Joining_Group
989-
1E92C;ADLAM SMALL FA; D; No_Joining_Group
990-
1E92D;ADLAM SMALL I; D; No_Joining_Group
991-
1E92E;ADLAM SMALL O; D; No_Joining_Group
992-
1E92F;ADLAM SMALL DHA; D; No_Joining_Group
993-
1E930;ADLAM SMALL YHE; D; No_Joining_Group
994-
1E931;ADLAM SMALL WAW; D; No_Joining_Group
995-
1E932;ADLAM SMALL NUN; D; No_Joining_Group
996-
1E933;ADLAM SMALL KAF; D; No_Joining_Group
997-
1E934;ADLAM SMALL YA; D; No_Joining_Group
998-
1E935;ADLAM SMALL U; D; No_Joining_Group
999-
1E936;ADLAM SMALL JIIM; D; No_Joining_Group
1000-
1E937;ADLAM SMALL CHI; D; No_Joining_Group
1001-
1E938;ADLAM SMALL HA; D; No_Joining_Group
1002-
1E939;ADLAM SMALL QAAF; D; No_Joining_Group
1003-
1E93A;ADLAM SMALL GA; D; No_Joining_Group
1004-
1E93B;ADLAM SMALL NYA; D; No_Joining_Group
1005-
1E93C;ADLAM SMALL TU; D; No_Joining_Group
1006-
1E93D;ADLAM SMALL NHA; D; No_Joining_Group
1007-
1E93E;ADLAM SMALL VA; D; No_Joining_Group
1008-
1E93F;ADLAM SMALL KHA; D; No_Joining_Group
1009-
1E940;ADLAM SMALL GBE; D; No_Joining_Group
1010-
1E941;ADLAM SMALL ZAL; D; No_Joining_Group
1011-
1E942;ADLAM SMALL KPO; D; No_Joining_Group
1012-
1E943;ADLAM SMALL SHA; D; No_Joining_Group
1013-
1E94B;ADLAM NASALIZATION MARK; T; No_Joining_Group
948+
1E900; ADLAM CAPITAL ALIF; D; No_Joining_Group
949+
1E901; ADLAM CAPITAL DAALI; D; No_Joining_Group
950+
1E902; ADLAM CAPITAL LAAM; D; No_Joining_Group
951+
1E903; ADLAM CAPITAL MIIM; D; No_Joining_Group
952+
1E904; ADLAM CAPITAL BA; D; No_Joining_Group
953+
1E905; ADLAM CAPITAL SINNYIIYHE; D; No_Joining_Group
954+
1E906; ADLAM CAPITAL PE; D; No_Joining_Group
955+
1E907; ADLAM CAPITAL BHE; D; No_Joining_Group
956+
1E908; ADLAM CAPITAL RA; D; No_Joining_Group
957+
1E909; ADLAM CAPITAL E; D; No_Joining_Group
958+
1E90A; ADLAM CAPITAL FA; D; No_Joining_Group
959+
1E90B; ADLAM CAPITAL I; D; No_Joining_Group
960+
1E90C; ADLAM CAPITAL O; D; No_Joining_Group
961+
1E90D; ADLAM CAPITAL DHA; D; No_Joining_Group
962+
1E90E; ADLAM CAPITAL YHE; D; No_Joining_Group
963+
1E90F; ADLAM CAPITAL WAW; D; No_Joining_Group
964+
1E910; ADLAM CAPITAL NUN; D; No_Joining_Group
965+
1E911; ADLAM CAPITAL KAF; D; No_Joining_Group
966+
1E912; ADLAM CAPITAL YA; D; No_Joining_Group
967+
1E913; ADLAM CAPITAL U; D; No_Joining_Group
968+
1E914; ADLAM CAPITAL JIIM; D; No_Joining_Group
969+
1E915; ADLAM CAPITAL CHI; D; No_Joining_Group
970+
1E916; ADLAM CAPITAL HA; D; No_Joining_Group
971+
1E917; ADLAM CAPITAL QAAF; D; No_Joining_Group
972+
1E918; ADLAM CAPITAL GA; D; No_Joining_Group
973+
1E919; ADLAM CAPITAL NYA; D; No_Joining_Group
974+
1E91A; ADLAM CAPITAL TU; D; No_Joining_Group
975+
1E91B; ADLAM CAPITAL NHA; D; No_Joining_Group
976+
1E91C; ADLAM CAPITAL VA; D; No_Joining_Group
977+
1E91D; ADLAM CAPITAL KHA; D; No_Joining_Group
978+
1E91E; ADLAM CAPITAL GBE; D; No_Joining_Group
979+
1E91F; ADLAM CAPITAL ZAL; D; No_Joining_Group
980+
1E920; ADLAM CAPITAL KPO; D; No_Joining_Group
981+
1E921; ADLAM CAPITAL SHA; D; No_Joining_Group
982+
1E922; ADLAM SMALL ALIF; D; No_Joining_Group
983+
1E923; ADLAM SMALL DAALI; D; No_Joining_Group
984+
1E924; ADLAM SMALL LAAM; D; No_Joining_Group
985+
1E925; ADLAM SMALL MIIM; D; No_Joining_Group
986+
1E926; ADLAM SMALL BA; D; No_Joining_Group
987+
1E927; ADLAM SMALL SINNYIIYHE; D; No_Joining_Group
988+
1E928; ADLAM SMALL PE; D; No_Joining_Group
989+
1E929; ADLAM SMALL BHE; D; No_Joining_Group
990+
1E92A; ADLAM SMALL RA; D; No_Joining_Group
991+
1E92B; ADLAM SMALL E; D; No_Joining_Group
992+
1E92C; ADLAM SMALL FA; D; No_Joining_Group
993+
1E92D; ADLAM SMALL I; D; No_Joining_Group
994+
1E92E; ADLAM SMALL O; D; No_Joining_Group
995+
1E92F; ADLAM SMALL DHA; D; No_Joining_Group
996+
1E930; ADLAM SMALL YHE; D; No_Joining_Group
997+
1E931; ADLAM SMALL WAW; D; No_Joining_Group
998+
1E932; ADLAM SMALL NUN; D; No_Joining_Group
999+
1E933; ADLAM SMALL KAF; D; No_Joining_Group
1000+
1E934; ADLAM SMALL YA; D; No_Joining_Group
1001+
1E935; ADLAM SMALL U; D; No_Joining_Group
1002+
1E936; ADLAM SMALL JIIM; D; No_Joining_Group
1003+
1E937; ADLAM SMALL CHI; D; No_Joining_Group
1004+
1E938; ADLAM SMALL HA; D; No_Joining_Group
1005+
1E939; ADLAM SMALL QAAF; D; No_Joining_Group
1006+
1E93A; ADLAM SMALL GA; D; No_Joining_Group
1007+
1E93B; ADLAM SMALL NYA; D; No_Joining_Group
1008+
1E93C; ADLAM SMALL TU; D; No_Joining_Group
1009+
1E93D; ADLAM SMALL NHA; D; No_Joining_Group
1010+
1E93E; ADLAM SMALL VA; D; No_Joining_Group
1011+
1E93F; ADLAM SMALL KHA; D; No_Joining_Group
1012+
1E940; ADLAM SMALL GBE; D; No_Joining_Group
1013+
1E941; ADLAM SMALL ZAL; D; No_Joining_Group
1014+
1E942; ADLAM SMALL KPO; D; No_Joining_Group
1015+
1E943; ADLAM SMALL SHA; D; No_Joining_Group
1016+
1E94B; ADLAM NASALIZATION MARK; T; No_Joining_Group
10141017

10151018
# EOF

lib/unicore/BidiBrackets.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# BidiBrackets-16.0.0.txt
2-
# Date: 2024-02-02
3-
# © 2024 Unicode®, Inc.
1+
# BidiBrackets-17.0.0.txt
2+
# Date: 2025-08-01
3+
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
66
#

lib/unicore/BidiMirroring.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# BidiMirroring-16.0.0.txt
2-
# Date: 2024-01-30
3-
# © 2024 Unicode®, Inc.
1+
# BidiMirroring-17.0.0.txt
2+
# Date: 2025-08-01
3+
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
66
#
@@ -16,7 +16,7 @@
1616
# value, for which there is another Unicode character that typically has a glyph
1717
# that is the mirror image of the original character's glyph.
1818
#
19-
# The repertoire covered by the file is Unicode 16.0.0.
19+
# The repertoire covered by the file is Unicode 17.0.0.
2020
#
2121
# The file contains a list of lines with mappings from one code point
2222
# to another one for character-based mirroring.

lib/unicore/Blocks.txt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# Blocks-16.0.0.txt
2-
# Date: 2024-02-02
3-
# © 2024 Unicode®, Inc.
1+
# Blocks-17.0.0.txt
2+
# Date: 2025-08-01
3+
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
66
#
@@ -228,6 +228,7 @@ FFF0..FFFF; Specials
228228
108E0..108FF; Hatran
229229
10900..1091F; Phoenician
230230
10920..1093F; Lydian
231+
10940..1095F; Sidetic
231232
10980..1099F; Meroitic Hieroglyphs
232233
109A0..109FF; Meroitic Cursive
233234
10A00..10A5F; Kharoshthi
@@ -279,11 +280,13 @@ FFF0..FFFF; Specials
279280
11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
280281
11AC0..11AFF; Pau Cin Hau
281282
11B00..11B5F; Devanagari Extended-A
283+
11B60..11B7F; Sharada Supplement
282284
11BC0..11BFF; Sunuwar
283285
11C00..11C6F; Bhaiksuki
284286
11C70..11CBF; Marchen
285287
11D00..11D5F; Masaram Gondi
286288
11D60..11DAF; Gunjala Gondi
289+
11DB0..11DEF; Tolong Siki
287290
11EE0..11EFF; Makasar
288291
11F00..11F5F; Kawi
289292
11FB0..11FBF; Lisu Supplement
@@ -304,12 +307,14 @@ FFF0..FFFF; Specials
304307
16B00..16B8F; Pahawh Hmong
305308
16D40..16D7F; Kirat Rai
306309
16E40..16E9F; Medefaidrin
310+
16EA0..16EDF; Beria Erfe
307311
16F00..16F9F; Miao
308312
16FE0..16FFF; Ideographic Symbols and Punctuation
309313
17000..187FF; Tangut
310314
18800..18AFF; Tangut Components
311315
18B00..18CFF; Khitan Small Script
312316
18D00..18D7F; Tangut Supplement
317+
18D80..18DFF; Tangut Components Supplement
313318
1AFF0..1AFFF; Kana Extended-B
314319
1B000..1B0FF; Kana Supplement
315320
1B100..1B12F; Kana Extended-A
@@ -318,6 +323,7 @@ FFF0..FFFF; Specials
318323
1BC00..1BC9F; Duployan
319324
1BCA0..1BCAF; Shorthand Format Controls
320325
1CC00..1CEBF; Symbols for Legacy Computing Supplement
326+
1CEC0..1CEFF; Miscellaneous Symbols Supplement
321327
1CF00..1CFCF; Znamenny Musical Notation
322328
1D000..1D0FF; Byzantine Musical Symbols
323329
1D100..1D1FF; Musical Symbols
@@ -336,6 +342,7 @@ FFF0..FFFF; Specials
336342
1E2C0..1E2FF; Wancho
337343
1E4D0..1E4FF; Nag Mundari
338344
1E5D0..1E5FF; Ol Onal
345+
1E6C0..1E6FF; Tai Yo
339346
1E7E0..1E7FF; Ethiopic Extended-B
340347
1E800..1E8DF; Mende Kikakui
341348
1E900..1E95F; Adlam
@@ -367,6 +374,7 @@ FFF0..FFFF; Specials
367374
2F800..2FA1F; CJK Compatibility Ideographs Supplement
368375
30000..3134F; CJK Unified Ideographs Extension G
369376
31350..323AF; CJK Unified Ideographs Extension H
377+
323B0..3347F; CJK Unified Ideographs Extension J
370378
E0000..E007F; Tags
371379
E0100..E01EF; Variation Selectors Supplement
372380
F0000..FFFFF; Supplementary Private Use Area-A

lib/unicore/CJKRadicals.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# CJKRadicals-16.0.0.txt
2-
# Date: 2024-02-02
3-
# © 2024 Unicode®, Inc.
1+
# CJKRadicals-17.0.0.txt
2+
# Date: 2025-05-07
3+
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
66
#
@@ -22,8 +22,8 @@
2222
# the Kangxi Radicals block or the CJK Radicals Supplement block.
2323
# The third field is the CJK unified ideograph.
2424
#
25-
# CJK radical numbers match the regular expression [1-9][0-9]{0,2}\'{0,2}
26-
# and in particular they can end with one or two U+0027 ' APOSTROPHE characters.
25+
# CJK radical numbers match the regular expression [1-9][0-9]{0,2}\'{0,3}
26+
# and in particular they can end with one, two, or three U+0027 ' APOSTROPHE characters.
2727
#
2828
# For more information, see UAX #38: Unicode Han Database (Unihan),
2929
# at https://www.unicode.org/reports/tr38/

0 commit comments

Comments
 (0)