Skip to content

Commit e2bdb10

Browse files
committed
1 parent fdc5f2d commit e2bdb10

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+8382
-7633
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ This repository provides packages to use the
1212

1313
The Haskell data structures are generated programmatically from the UCD files.
1414
The latest Unicode version supported by these libraries is
15-
[`15.1.0`](https://www.unicode.org/versions/Unicode15.1.0/).
15+
[`16.0.0`](https://www.unicode.org/versions/Unicode16.0.0/).
1616

1717
### `unicode-data`
1818

ucd.sh

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# we used to generate them earlier are exactly the same as the ones we are
66
# downloading. To ensure that verfication of the checksum is necessary.
77

8-
VERSION=15.1.0
8+
VERSION=16.0.0
99

1010
# When downloading fresh new version comment this out
1111
VERIFY_CHECKSUM=y
@@ -16,29 +16,29 @@ UCD_URL="https://www.unicode.org/Public/$VERSION/ucd"
1616
# $ find data/$VERSION/ -type f -print0 | xargs -0 sha256sum
1717
# Format: filename:checksum
1818
UCD_FILES="\
19-
Blocks.txt:443ee0524a775bf021777c296f5b591b5611c8aef6bc922887d27b0bc13892b5 \
20-
CaseFolding.txt:4e55acfdc32825a22e87670e9056a3bf94ad7c5400065778e9e10f8314372bcf \
21-
DerivedCoreProperties.txt:f55d0db69123431a7317868725b1fcbf1eab6b265d756d1bd7f0f6d9f9ee108b \
22-
DerivedNormalizationProps.txt:8875dccee2bc1a7c1fe568a3b502a9e78c9e0495afd96b6568b4294d0ed1f7e1 \
23-
NameAliases.txt:fbf0e640bab36e165c4da5b6a98bdd963fcb4f923b5097f26f6f7f18b9678698 \
24-
PropertyValueAliases.txt:4b7411fc592c4985e5f03643aa0bddfdfd45250ff1790d358926614d20e37652 \
25-
PropList.txt:05672956317b6296bc2ec3d6cef1f6452b57ff4f2efc6dc55b0a19277d5fcfd1 \
26-
Scripts.txt:0eacb65169ae6eb1d399cd70826b3da15fff19f6f586eecf819b70c83b1d9b32 \
27-
ScriptExtensions.txt:fdfd54237a2c0452ba1060571fd1e58fd46aeecdfda7c5b5be1b716dad755cec \
28-
SpecialCasing.txt:55a477efd933a52cd27e6a9bf70265bb2d8814af31aab07767abc8eb421f27ef \
29-
UnicodeData.txt:2fc713e6a31a87c4850a37fe2caffa4218180fadb5de86b43a143ddb4581fb86 \
30-
extracted/DerivedCombiningClass.txt:b2e69512b1a96e26105f73e8af42bca6d2e40814683ba31615977f276d6734ff \
31-
extracted/DerivedName.txt:c5f39bd2049f8b03963a629a6cb9fa0371a46aebd7f057307773dfec28eea5ee \
32-
extracted/DerivedNumericValues.txt:120a010f7f95c2123ecb4d61313f2f2121abf9289f6426e992f963ec076ee811"
19+
Blocks.txt:f3907b395d410f1b97342292ca6bc83dd12eb4b205f2a0c48efdef99e517d7b0 \
20+
CaseFolding.txt:6f1f9c588eb4a5c718d9e8f93b782685e5c7fec872cf05e8e6878053599e09bb \
21+
DerivedCoreProperties.txt:39d35161f2954497f69e08bdb9e701493f476a3d30222de20028feda36c1dabd \
22+
DerivedNormalizationProps.txt:4d4c03892dea9146d674b686e495df2d55a28d071ac474041d73518f887abddc \
23+
NameAliases.txt:9953f0fcebf5ea8091c5c581e4df0e43f20d2533c84ccca7987a9bb819a896a8 \
24+
PropertyValueAliases.txt:440fd3e5460b9bfe31da67b6f923992e1989d31fe2ed91e091c4b8f8e2620bf9 \
25+
PropList.txt:53d614508e2a0b2305a8aa21cd60d993de9326cdf65993660dfcce4503548583 \
26+
Scripts.txt:9e88f0a677df47311106340be8ede2ecdacd9c1c931831218d2be6d5508e0039 \
27+
ScriptExtensions.txt:049117ce26b9769fe2749b06eef51a50a89faef4a97764dd2d81daa715980700 \
28+
SpecialCasing.txt:8d5de354eef79f2395a54c9c7dcebbaf3d30fc962d0f85611ea97aa973a0c451 \
29+
UnicodeData.txt:ff58e5823bd095166564a006e47d111130813dcf8bf234ef79fa51a870edb48f \
30+
extracted/DerivedCombiningClass.txt:52064d588c98c623b2373905e6a449eb520f900113954bcd212e94ef0810b471 \
31+
extracted/DerivedName.txt:0cc1469faa0c5518572ef93f4f457f93aa8a160ce320aad3793d85f4b435fd24 \
32+
extracted/DerivedNumericValues.txt:00b43cc5c9b86a834f82389c4537f103e652821387daa556f0bd220f6c23007e"
3333

3434
# Security files (https://www.unicode.org/Public/security/$VERSION/$file)
3535
SECURITY_URL="https://www.unicode.org/Public/security/$VERSION"
3636
# Format: filename:checksum
3737
SECURITY_FILES="\
38-
IdentifierStatus.txt:d34efea5bd5f219a1ec8a6eee728ac1efafd2dacba66a241e213457e2a9155f4 \
39-
IdentifierType.txt:7a513c6d5f57b49ec838e8d62899e9f5e336222313a302ffa2adcf4338f6c665 \
40-
confusables.txt:8289f833e4cf78fde56b2080dc0e42934ef5182c9c3f4dd1fbdf2bced69fd5ed \
41-
intentional.txt:6ce4e2a713938109091c5d55cff7099d10433a3494d989787a86e637070e9491"
38+
IdentifierStatus.txt:c6108ca140e054b55a5b0378e7ebed8b1ef0e846251f6195361bc9af8ffc61b1 \
39+
IdentifierType.txt:c7e57f71176fb3035e0c85e4d9f30b08374588b2bd16e729efbc7e49c7c9438f \
40+
confusables.txt:95bd0aad6dced5ebc63436f459c06ab21a8d107cd842fb57f5c3a1e91bca8611 \
41+
intentional.txt:6827f1f7694f747aa93e374619b4bf81ffb18e2feb0b9c982c427f7eec2266c1"
4242

4343
# Download the files
4444

ucd2haskell/exe/UCD2Haskell/Modules/UnicodeData/DerivedNames.hs

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,18 @@ genNamesModule moduleName = Fold step initial done
9090
cjkCompat = 0xf0
9191
cjkUnified = 0xf1
9292
tangut = 0xf2
93+
egyptianHieroglyph = 0xf3
94+
khitan = 0xf4
95+
nushu = 0xf5
9396
hangul = 0x80
9497

9598
encodeName name
96-
| BS.take 28 name == "CJK COMPATIBILITY IDEOGRAPH-" = ("", cjkCompat, 0, True)
97-
| BS.take 22 name == "CJK UNIFIED IDEOGRAPH-" = ("", cjkUnified, 0, True)
98-
| BS.take 17 name == "TANGUT IDEOGRAPH-" = ("", tangut, 0, True)
99+
| BS.take 28 name == "CJK COMPATIBILITY IDEOGRAPH-" = ("", cjkCompat, 0, True)
100+
| BS.take 22 name == "CJK UNIFIED IDEOGRAPH-" = ("", cjkUnified, 0, True)
101+
| BS.take 17 name == "TANGUT IDEOGRAPH-" = ("", tangut, 0, True)
102+
| BS.take 20 name == "EGYPTIAN HIEROGLYPH-" = ("", egyptianHieroglyph, 0, True)
103+
| BS.take 30 name == "KHITAN SMALL SCRIPT CHARACTER-" = ("", khitan, 0, True)
104+
| BS.take 16 name == "NUSHU CHARACTER-" = ("", nushu, 0, True)
99105
| BS.take 16 name == "HANGUL SYLLABLE " =
100106
let !name' = BS.drop 16 name; !len = BS.length name'
101107
in if len <= 12
@@ -123,6 +129,9 @@ genNamesModule moduleName = Fold step initial done
123129
, " , pattern CjkCompatibilityIdeograph"
124130
, " , pattern CjkUnifiedIdeograph"
125131
, " , pattern TangutIdeograph"
132+
, " , pattern EgyptianHieroglyph"
133+
, " , pattern KhitanSmallScript"
134+
, " , pattern NushuCharacter"
126135
, " , pattern HangulSyllable"
127136
, " ) where"
128137
, ""
@@ -159,6 +168,24 @@ genNamesModule moduleName = Fold step initial done
159168
, "pattern TangutIdeograph :: Int#"
160169
, "pattern TangutIdeograph = 0x" <> intHex tangut <> "#"
161170
, ""
171+
, "-- | Egyptian Hieroglyph. Used to test the length returned by 'name'."
172+
, "--"
173+
, "-- @since 0.5.0"
174+
, "pattern EgyptianHieroglyph :: Int#"
175+
, "pattern EgyptianHieroglyph = 0x" <> intHex egyptianHieroglyph <> "#"
176+
, ""
177+
, "-- | Khitan Small Script. Used to test the length returned by 'name'."
178+
, "--"
179+
, "-- @since 0.5.0"
180+
, "pattern KhitanSmallScript :: Int#"
181+
, "pattern KhitanSmallScript = 0x" <> intHex khitan <> "#"
182+
, ""
183+
, "-- | Nushu Character. Used to test the length returned by 'name'."
184+
, "--"
185+
, "-- @since 0.5.0"
186+
, "pattern NushuCharacter :: Int#"
187+
, "pattern NushuCharacter = 0x" <> intHex nushu <> "#"
188+
, ""
162189
, "-- | Hangul syllable. Used to test the length returned by 'name'."
163190
, "--"
164191
, "-- @since 0.3.0"
@@ -180,6 +207,15 @@ genNamesModule moduleName = Fold step initial done
180207
, "-- * If length = @'TangutIdeograph'@,"
181208
, "-- then the name is generated from the pattern “TANGUT IDEOGRAPH-*”,"
182209
, "-- where * is the hexadecimal codepoint."
210+
, "-- * If length = @'EgyptianHieroglyph'@,"
211+
, "-- then the name is generated from the pattern “EGYPTIAN HIEROGLYPH-*”,"
212+
, "-- where * is the hexadecimal codepoint."
213+
, "-- * If length = @'KhitanSmallScript'@,"
214+
, "-- then the name is generated from the pattern “KHITAN SMALL SCRIPT CHARACTER-*”,"
215+
, "-- where * is the hexadecimal codepoint."
216+
, "-- * If length = @'NushuCharacter'@,"
217+
, "-- then the name is generated from the pattern “NUSHU CHARACTER-*”,"
218+
, "-- where * is the hexadecimal codepoint."
183219
, "-- * If length ≥ @'HangulSyllable'@,"
184220
, "-- then the name is generated by prepending “HANGUL SYLLABLE ”"
185221
, "-- to the returned string."

ucd2haskell/ucd2haskell.cabal

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ executable ucd2haskell
104104
, filepath >= 1.4.100 && < 1.5
105105
, getopt-generics >= 0.13 && < 0.14
106106
, shamochu >= 0.1.0 && < 0.2
107-
, unicode-data-parser >= 0.3.0.2 && < 0.4
107+
, unicode-data-parser >= 0.3.0.3 && < 0.4
108108
, vector >= 0.13 && < 0.14
109109
else
110110
buildable: False

unicode-data-names/Changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 0.5.0 (TBD)
4+
5+
- Updated to [Unicode 16.0.0](https://www.unicode.org/versions/Unicode16.0.0/).
6+
37
## 0.4.0 (July 2024)
48

59
- Updated to [Unicode 15.1.0](https://www.unicode.org/versions/Unicode15.1.0/).

unicode-data-names/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ There are 3 APIs:
1212
The Haskell data structures are generated programmatically from the
1313
Unicode character database (UCD) files. The latest Unicode version
1414
supported by this library is
15-
[`15.1.0`](https://www.unicode.org/versions/Unicode15.1.0/).
15+
[`16.0.0`](https://www.unicode.org/versions/Unicode16.0.0/).
1616

1717
Please see the
1818
[Haddock documentation](https://hackage.haskell.org/package/unicode-data-names)

unicode-data-names/lib/Unicode/Char/General/Names.hs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,18 @@ name (C# c#) = case DerivedName.name c# of
8080
where
8181
!hex = showHex c#
8282
!n = 'T':'A':'N':'G':'U':'T':' ':'I':'D':'E':'O':'G':'R':'A':'P':'H':'-':hex
83+
DerivedName.EgyptianHieroglyph -> Just n
84+
where
85+
!hex = showHex c#
86+
!n = 'E':'G':'Y':'P':'T':'I':'A':'N':' ':'H':'I':'E':'R':'O':'G':'L':'Y':'P':'H':'-':hex
87+
DerivedName.KhitanSmallScript -> Just n
88+
where
89+
!hex = showHex c#
90+
!n = 'K':'H':'I':'T':'A':'N':' ':'S':'M':'A':'L':'L':' ':'S':'C':'R':'I':'P':'T':' ':'C':'H':'A':'R':'A':'C':'T':'E':'R':'-':hex
91+
DerivedName.NushuCharacter -> Just n
92+
where
93+
!hex = showHex c#
94+
!n = 'N':'U':'S':'H':'U':' ':'C':'H':'A':'R':'A':'C':'T':'E':'R':'-':hex
8395
_
8496
| isTrue# (len# <# DerivedName.HangulSyllable) -> let !n = unpack name# [] len# in Just n
8597
| otherwise ->

unicode-data-names/lib/Unicode/Char/General/Names/ByteString.hs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@ name (C# c#) = case DerivedName.name c# of
6262
DerivedName.TangutIdeograph -> Just n
6363
where
6464
!n = mkNameFromTemplate# "TANGUT IDEOGRAPH-"# 17# (ord# c#)
65+
DerivedName.EgyptianHieroglyph -> Just n
66+
where
67+
!n = mkNameFromTemplate# "EGYPTIAN HIEROGLYPH-"# 20# (ord# c#)
68+
DerivedName.KhitanSmallScript -> Just n
69+
where
70+
!n = mkNameFromTemplate# "KHITAN SMALL SCRIPT CHARACTER-"# 30# (ord# c#)
71+
DerivedName.NushuCharacter -> Just n
72+
where
73+
!n = mkNameFromTemplate# "NUSHU CHARACTER-"# 16# (ord# c#)
6574
_
6675
| isTrue# (len# <# DerivedName.HangulSyllable) ->
6776
let !n = unpackAddr# name# len#

unicode-data-names/lib/Unicode/Char/General/Names/Text.hs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ name (C# c#) = case DerivedName.name c# of
5454
DerivedName.TangutIdeograph -> Just n
5555
where
5656
!n = mkNameFromTemplate "TANGUT IDEOGRAPH-"# 17# (ord# c#)
57+
DerivedName.EgyptianHieroglyph -> Just n
58+
where
59+
!n = mkNameFromTemplate "EGYPTIAN HIEROGLYPH-"# 20# (ord# c#)
60+
DerivedName.KhitanSmallScript -> Just n
61+
where
62+
!n = mkNameFromTemplate "KHITAN SMALL SCRIPT CHARACTER-"# 30# (ord# c#)
63+
DerivedName.NushuCharacter -> Just n
64+
where
65+
!n = mkNameFromTemplate "NUSHU CHARACTER-"# 16# (ord# c#)
5766
_
5867
| isTrue# (len# <# DerivedName.HangulSyllable) ->
5968
let !n = unpackAddr# name# len#

unicode-data-names/lib/Unicode/Internal/Char/Names/Version.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ module Unicode.Internal.Char.Names.Version (unicodeVersion) where
1313
import Data.Version (Version, makeVersion)
1414

1515
-- | Version of the Unicode standard used by this package:
16-
-- [15.1.0](https://www.unicode.org/versions/Unicode15.1.0/).
16+
-- [16.0.0](https://www.unicode.org/versions/Unicode16.0.0/).
1717
--
1818
-- @since 0.3.0
1919
unicodeVersion :: Version
20-
unicodeVersion = makeVersion [15,1,0]
20+
unicodeVersion = makeVersion [16,0,0]

0 commit comments

Comments
 (0)