Skip to content

Commit 68c612d

Browse files
authored
Merge pull request #512 from data-man/ucd17
Update to Unicode 17.0
2 parents 6c66234 + 664b32c commit 68c612d

37 files changed

+881
-209
lines changed

share/ucd/CaseFolding.txt

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# CaseFolding-16.0.0.txt
2-
# Date: 2024-04-30, 21:48:11 GMT
3-
# © 2024 Unicode®, Inc.
1+
# CaseFolding-17.0.0.txt
2+
# Date: 2025-07-30, 23:54:36 GMT
3+
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
66
#
@@ -18,15 +18,15 @@
1818
# The data supports both implementations that require simple case foldings
1919
# (where string lengths don't change), and implementations that allow full case folding
2020
# (where string lengths may grow). Note that where they can be supported, the
21-
# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
21+
# full case foldings are superior: for example, they allow "FUSS" and "Fuß" to match.
2222
#
2323
# All code points not listed in this file map to themselves.
2424
#
2525
# NOTE: case folding does not preserve normalization formats!
2626
#
2727
# For information on case folding, including how to have case folding
28-
# preserve normalization formats, see Section 3.13 Default Case Algorithms in
29-
# The Unicode Standard.
28+
# preserve normalization formats, see the
29+
# "Conformance" / "Default Case Algorithms" section of the core specification.
3030
#
3131
# ================================================================================
3232
# Format
@@ -1243,7 +1243,10 @@ A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
12431243
A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
12441244
A7CB; C; 0264; # LATIN CAPITAL LETTER RAMS HORN
12451245
A7CC; C; A7CD; # LATIN CAPITAL LETTER S WITH DIAGONAL STROKE
1246+
A7CE; C; A7CF; # LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE
12461247
A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
1248+
A7D2; C; A7D3; # LATIN CAPITAL LETTER DOUBLE THORN
1249+
A7D4; C; A7D5; # LATIN CAPITAL LETTER DOUBLE WYNN
12471250
A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
12481251
A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
12491252
A7DA; C; A7DB; # LATIN CAPITAL LETTER LAMBDA
@@ -1616,6 +1619,31 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
16161619
16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
16171620
16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
16181621
16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
1622+
16EA0; C; 16EBB; # BERIA ERFE CAPITAL LETTER ARKAB
1623+
16EA1; C; 16EBC; # BERIA ERFE CAPITAL LETTER BASIGNA
1624+
16EA2; C; 16EBD; # BERIA ERFE CAPITAL LETTER DARBAI
1625+
16EA3; C; 16EBE; # BERIA ERFE CAPITAL LETTER EH
1626+
16EA4; C; 16EBF; # BERIA ERFE CAPITAL LETTER FITKO
1627+
16EA5; C; 16EC0; # BERIA ERFE CAPITAL LETTER GOWAY
1628+
16EA6; C; 16EC1; # BERIA ERFE CAPITAL LETTER HIRDEABO
1629+
16EA7; C; 16EC2; # BERIA ERFE CAPITAL LETTER I
1630+
16EA8; C; 16EC3; # BERIA ERFE CAPITAL LETTER DJAI
1631+
16EA9; C; 16EC4; # BERIA ERFE CAPITAL LETTER KOBO
1632+
16EAA; C; 16EC5; # BERIA ERFE CAPITAL LETTER LAKKO
1633+
16EAB; C; 16EC6; # BERIA ERFE CAPITAL LETTER MERI
1634+
16EAC; C; 16EC7; # BERIA ERFE CAPITAL LETTER NINI
1635+
16EAD; C; 16EC8; # BERIA ERFE CAPITAL LETTER GNA
1636+
16EAE; C; 16EC9; # BERIA ERFE CAPITAL LETTER NGAY
1637+
16EAF; C; 16ECA; # BERIA ERFE CAPITAL LETTER OI
1638+
16EB0; C; 16ECB; # BERIA ERFE CAPITAL LETTER PI
1639+
16EB1; C; 16ECC; # BERIA ERFE CAPITAL LETTER ERIGO
1640+
16EB2; C; 16ECD; # BERIA ERFE CAPITAL LETTER ERIGO TAMURA
1641+
16EB3; C; 16ECE; # BERIA ERFE CAPITAL LETTER SERI
1642+
16EB4; C; 16ECF; # BERIA ERFE CAPITAL LETTER SHEP
1643+
16EB5; C; 16ED0; # BERIA ERFE CAPITAL LETTER TATASOUE
1644+
16EB6; C; 16ED1; # BERIA ERFE CAPITAL LETTER UI
1645+
16EB7; C; 16ED2; # BERIA ERFE CAPITAL LETTER WASSE
1646+
16EB8; C; 16ED3; # BERIA ERFE CAPITAL LETTER AY
16191647
1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
16201648
1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
16211649
1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM

share/ucd/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
UCD_URL ?= https://www.unicode.org/Public/16.0.0/ucd/
1+
UCD_URL ?= https://www.unicode.org/Public/17.0.0/ucd/
22

33
WGET ?= wget
44

share/ucd/Scripts.txt

Lines changed: 98 additions & 44 deletions
Large diffs are not rendered by default.

share/ucd/UnicodeData.txt

Lines changed: 474 additions & 15 deletions
Large diffs are not rendered by default.

src/libre/class.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ extern const struct class utf8_Bamum;
2929
extern const struct class utf8_Bassa_Vah;
3030
extern const struct class utf8_Batak;
3131
extern const struct class utf8_Bengali;
32+
extern const struct class utf8_Beria_Erfe;
3233
extern const struct class utf8_Bhaiksuki;
3334
extern const struct class utf8_Bopomofo;
3435
extern const struct class utf8_Brahmi;
@@ -153,6 +154,7 @@ extern const struct class utf8_Saurashtra;
153154
extern const struct class utf8_Sharada;
154155
extern const struct class utf8_Shavian;
155156
extern const struct class utf8_Siddham;
157+
extern const struct class utf8_Sidetic;
156158
extern const struct class utf8_SignWriting;
157159
extern const struct class utf8_Sinhala;
158160
extern const struct class utf8_Sogdian;
@@ -167,6 +169,7 @@ extern const struct class utf8_Tagbanwa;
167169
extern const struct class utf8_Tai_Le;
168170
extern const struct class utf8_Tai_Tham;
169171
extern const struct class utf8_Tai_Viet;
172+
extern const struct class utf8_Tai_Yo;
170173
extern const struct class utf8_Takri;
171174
extern const struct class utf8_Tamil;
172175
extern const struct class utf8_Tangsa;
@@ -178,6 +181,7 @@ extern const struct class utf8_Tibetan;
178181
extern const struct class utf8_Tifinagh;
179182
extern const struct class utf8_Tirhuta;
180183
extern const struct class utf8_Todhri;
184+
extern const struct class utf8_Tolong_Siki;
181185
extern const struct class utf8_Toto;
182186
extern const struct class utf8_Tulu_Tigalari;
183187
extern const struct class utf8_Ugaritic;

src/libre/class/utf8_Arabic.c

Lines changed: 6 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/libre/class/utf8_Beria_Erfe.c

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/libre/class/utf8_Common.c

Lines changed: 16 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/libre/class/utf8_Han.c

Lines changed: 4 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/libre/class/utf8_Inherited.c

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)