Skip to content

Commit 5c5013f

Browse files
authored
Update to Unicode 15.0.0 (#92)
* Update ucd.sh * Regenerate files * Update references to Unicode version * Bump packages versions * Restrict comparison to base to compatible GHC * Fix bounds of unicode-data * Add missing packages to stack.yaml
1 parent bf8bb53 commit 5c5013f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+378
-222
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ This repository provides packages to use the
1212

1313
The Haskell data structures are generated programmatically from the UCD files.
1414
The latest Unicode version supported by these libraries is
15-
[`14.0.0`](https://www.unicode.org/versions/Unicode14.0.0/).
15+
[`15.0.0`](https://www.unicode.org/versions/Unicode15.0.0/).
1616

1717
### `unicode-data`
1818

experimental/unicode-data-text/test/Unicode/Text/CaseSpec.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ does not match the version of this package.
2222
| 9.0.[1-2] | 4.15.0 | 12.1 |
2323
| 9.2.[1-4] | 4.16.0 | 14.0 |
2424
| 9.4.[1-2] | 4.17.0 | 14.0 |
25+
| 9.6.1 | 4.18.0 | 15.0 |
2526
+-------------+----------------+-----------------+
2627
-}
2728

experimental/unicode-data-text/unicode-data-text.cabal

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ library
6868
build-depends:
6969
base >= 4.7 && < 4.18,
7070
text >= 1.2.4 && < 2.1,
71-
unicode-data >= 0.3 && < 0.4
71+
unicode-data >= 0.3 && < 0.5
7272

7373
test-suite test
7474
import: default-extensions, compile-options
@@ -85,7 +85,7 @@ test-suite test
8585
unicode-data-text
8686
build-tool-depends:
8787
hspec-discover:hspec-discover >= 2.0 && < 2.11
88-
if impl(ghc >= 9.2.1)
88+
if impl(ghc >= 9.5.1)
8989
cpp-options: -DCOMPATIBLE_GHC_UNICODE
9090
default-language: Haskell2010
9191

@@ -100,6 +100,6 @@ benchmark bench
100100
tasty-bench >= 0.2.5 && < 0.4,
101101
tasty >= 1.4.1,
102102
text >= 1.2.4 && < 2.1,
103-
unicode-data >= 0.3 && < 0.4,
103+
unicode-data >= 0.3 && < 0.5,
104104
unicode-data-text
105105
ghc-options: -O2 -fdicts-strict -rtsopts

stack.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ resolver: lts-18.18
22
packages:
33
- './unicode-data'
44
- './unicode-data-names'
5+
- './unicode-data-scripts'
6+
- './unicode-data-security'
7+
- './experimental/unicode-data-text'
58
extra-deps:
69
- streamly-0.8.0
710
flags:

ucd.sh

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# we used to generate them earlier are exactly the same as the ones we are
66
# downloading. To ensure that verfication of the checksum is necessary.
77

8-
VERSION=14.0.0
8+
VERSION=15.0.0
99

1010
# When downloading fresh new version comment this out
1111
VERIFY_CHECKSUM=y
@@ -14,29 +14,29 @@ VERIFY_CHECKSUM=y
1414
UCD_URL="https://www.unicode.org/Public/$VERSION/ucd"
1515
# Filename:checksum
1616
UCD_FILES="\
17-
Blocks.txt:598870dddef7b34b5a972916528c456aff2765b79cd4f9647fb58ceb767e7f17 \
18-
CaseFolding.txt:a566cd48687b2cd897e02501118b2413c14ae86d318f9abbbba97feb84189f0f \
19-
DerivedCoreProperties.txt:e3eddd7d469cd1b0feed7528defad1a1cc7c6a9ceb0ae4446a6d10921ed2e7bc \
20-
DerivedNormalizationProps.txt:b2c444c20730b097787fdf50bd7d6dd3fc5256ab8084f5b35b11c8776eca674c \
21-
NameAliases.txt:14b3b677d33f95c51423dce6eef4a6a28b4b160451ecedee4b91edb6745cf4a3 \
22-
PropertyValueAliases.txt:eb755757e20b72b330b2948df3cf2ff7adb0e31bb060140dc09dafb132ace2cd \
23-
PropList.txt:6bddfdb850417a5bee6deff19290fd1b138589909afb50f5a049f343bf2c6722 \
24-
Scripts.txt:52db475c4ec445e73b0b16915448c357614946ad7062843c563e00d7535c6510 \
25-
ScriptExtensions.txt:d37eedf63ff9c48bac863d5f76862373d6cf5269fd21253d499e2430d638c01d \
26-
SpecialCasing.txt:c667b45908fd269af25fd55d2fc5bbc157fb1b77675936e25c513ce32e080334 \
27-
UnicodeData.txt:36018e68657fdcb3485f636630ffe8c8532e01c977703d2803f5b89d6c5feafb \
28-
extracted/DerivedCombiningClass.txt:12b0c3af9b600b49488d66545a3e7844ea980809627201bf9afeebe1c9f16f4e \
29-
extracted/DerivedName.txt:fef3e11514ba152f0d38a09f8018c03a825f846dbb912334c1e5c9fb29392a02 \
30-
extracted/DerivedNumericValues.txt:11075771b112e8e7ccf6ffa637c4c91eadc3ef3db0517b24e605df8fd3624239"
17+
Blocks.txt:529dc5d0f6386d52f2f56e004bbfab48ce2d587eea9d38ba546c4052491bd820 \
18+
CaseFolding.txt:cdd49e55eae3bbf1f0a3f6580c974a0263cb86a6a08daa10fbf705b4808a56f7 \
19+
DerivedCoreProperties.txt:d367290bc0867e6b484c68370530bdd1a08b6b32404601b8c7accaf83e05628d \
20+
DerivedNormalizationProps.txt:d5687a48c95c7d6e1ec59cb29c0f2e8b052018eb069a4371b7368d0561e12a29 \
21+
NameAliases.txt:3e39509e8fae3e5d50ba73759d0b97194501d14a9c63107a6372a46b38be18e8 \
22+
PropertyValueAliases.txt:13a7666843abea5c6b7eb8c057c57ab9bb2ba96cfc936e204224dd67d71cafad \
23+
PropList.txt:e05c0a2811d113dae4abd832884199a3ea8d187ee1b872d8240a788a96540bfd \
24+
Scripts.txt:cca85d830f46aece2e7c1459ef1249993dca8f2e46d51e869255be140d7ea4b0 \
25+
ScriptExtensions.txt:7e07313d9d0bee42220c476b64485995130ae30917bbcf7780b602d677d7e33f \
26+
SpecialCasing.txt:78b29c64b5840d25c11a9f31b665ee551b8a499eca6c70d770fcad7dd710f494 \
27+
UnicodeData.txt:806e9aed65037197f1ec85e12be6e8cd870fc5608b4de0fffd990f689f376a73 \
28+
extracted/DerivedCombiningClass.txt:ca54f6360cd288ad92113415bf1f77749015abe11cbd6798d21f7fa81f04205d \
29+
extracted/DerivedName.txt:f76288153e20de185a40f7ee6e0e365f3c6c80e9e3019b5aa0afc8ac2c1b15f2 \
30+
extracted/DerivedNumericValues.txt:6bd30f385f3baf3ab5d5308c111a81de87bea5f494ba0ba69e8ab45263b8c34d"
3131

3232
# Security files (https://www.unicode.org/Public/security/$VERSION/$file)
3333
SECURITY_URL="https://www.unicode.org/Public/security/$VERSION"
3434
# Filename:checksum
3535
SECURITY_FILES="\
36-
IdentifierStatus.txt:3f3f368fccdb37f350ecedc20b37fa71ab31c04e847884c77780d34283539f73 \
37-
IdentifierType.txt:45a150c23961b58d7784704af6c4daccd6517d97b6489e53d13bbdbf9e4f065f \
38-
confusables.txt:f901938af166c3afa471bd10c224b0979cd024340f290649e16b29f779d48bfe \
39-
intentional.txt:42243c12a2e20546e836576e3091a5a5db2c1fc506899b1d8b56f7b6eab77cb3"
36+
IdentifierStatus.txt:fd5c5e510914a2018e092bc51ea653bd2bfcf7daa116a346f09179a0f74704b0 \
37+
IdentifierType.txt:71e95d5811999776a39c33a9149e5bf3c3311217a36b89005c678f34f08debc0 \
38+
confusables.txt:2b10130885c3370b101c52d7baedc452ab7f0e257b86c1e52ee657ecfc29ce64 \
39+
intentional.txt:4550bcc406b5ce3b1a40ff857a3f8b703ea0c868c35f2f7c93d86bfb733215f9"
4040

4141
# Download the files
4242

unicode-data-names/Changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 0.2.0 (September 2022)
4+
5+
- Update to [Unicode 15.0.0](https://www.unicode.org/versions/Unicode15.0.0/).
6+
37
## 0.1.0 (June 2022)
48

59
- Initial release

unicode-data-names/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ character names and aliases from the
77
The Haskell data structures are generated programmatically from the
88
Unicode character database (UCD) files. The latest Unicode version
99
supported by this library is
10-
[`14.0.0`](https://www.unicode.org/versions/Unicode14.0.0/).
10+
[`15.0.0`](https://www.unicode.org/versions/Unicode15.0.0/).
1111

1212
Please see the
1313
[Haddock documentation](https://hackage.haskell.org/package/unicode-data-names)

unicode-data-names/lib/Unicode/Char/General/Names.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
-- Stability : experimental
77
--
88
-- Unicode character names and name aliases.
9-
-- See Unicode standard 14.0.0, section 4.8.
9+
-- See Unicode standard 15.0.0, section 4.8.
1010
--
1111
-- @since 0.1.0
1212

@@ -84,7 +84,7 @@ nameAliasesWithTypes
8484
= fmap (fmap (fmap unpack))
8585
. NameAliases.nameAliasesWithTypes
8686

87-
-- Note: names are ASCII. See Unicode Standard 14.0.0, section 4.8.
87+
-- Note: names are ASCII. See Unicode Standard 15.0.0, section 4.8.
8888
{-# INLINE unpack #-}
8989
unpack :: CString -> String
9090
unpack = unsafePerformIO . peekCAString

unicode-data-names/lib/Unicode/Internal/Char/UnicodeData/DerivedName.hs

Lines changed: 4 additions & 4 deletions
Large diffs are not rendered by default.

unicode-data-names/lib/Unicode/Internal/Char/UnicodeData/NameAliases.hs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
-- autogenerated from https://www.unicode.org/Public/14.0.0/ucd/NameAliases.txt
1+
-- autogenerated from https://www.unicode.org/Public/15.0.0/ucd/NameAliases.txt
22
-- |
33
-- Module : Unicode.Internal.Char.UnicodeData.NameAliases
44
-- Copyright : (c) 2022 Composewell Technologies and Contributors
@@ -17,7 +17,7 @@ import Data.Maybe (fromMaybe)
1717
import Foreign.C.String (CString)
1818
import GHC.Exts (Ptr(..))
1919

20-
-- | Type of name alias. See Unicode Standard 14.0.0, section 4.8.
20+
-- | Type of name alias. See Unicode Standard 15.0.0, section 4.8.
2121
--
2222
-- @since 0.1.0
2323
data NameAliasType
@@ -86,7 +86,7 @@ nameAliasesWithTypes = \case
8686
'\x0016' -> [(Control,[Ptr "SYNCHRONOUS IDLE\0"#]),(Abbreviation,[Ptr "SYN\0"#])]
8787
'\x0017' -> [(Control,[Ptr "END OF TRANSMISSION BLOCK\0"#]),(Abbreviation,[Ptr "ETB\0"#])]
8888
'\x0018' -> [(Control,[Ptr "CANCEL\0"#]),(Abbreviation,[Ptr "CAN\0"#])]
89-
'\x0019' -> [(Control,[Ptr "END OF MEDIUM\0"#]),(Abbreviation,[Ptr "EOM\0"#])]
89+
'\x0019' -> [(Control,[Ptr "END OF MEDIUM\0"#]),(Abbreviation,[Ptr "EOM\0"#,Ptr "EM\0"#])]
9090
'\x001a' -> [(Control,[Ptr "SUBSTITUTE\0"#]),(Abbreviation,[Ptr "SUB\0"#])]
9191
'\x001b' -> [(Control,[Ptr "ESCAPE\0"#]),(Abbreviation,[Ptr "ESC\0"#])]
9292
'\x001c' -> [(Control,[Ptr "INFORMATION SEPARATOR FOUR\0"#,Ptr "FILE SEPARATOR\0"#]),(Abbreviation,[Ptr "FS\0"#])]
@@ -132,6 +132,7 @@ nameAliasesWithTypes = \case
132132
'\x01a2' -> [(Correction,[Ptr "LATIN CAPITAL LETTER GHA\0"#])]
133133
'\x01a3' -> [(Correction,[Ptr "LATIN SMALL LETTER GHA\0"#])]
134134
'\x034f' -> [(Abbreviation,[Ptr "CGJ\0"#])]
135+
'\x0616' -> [(Correction,[Ptr "ARABIC SMALL HIGH LIGATURE ALEF WITH YEH BARREE\0"#])]
135136
'\x061c' -> [(Abbreviation,[Ptr "ALM\0"#])]
136137
'\x0709' -> [(Correction,[Ptr "SYRIAC SUBLINEAR COLON SKEWED LEFT\0"#])]
137138
'\x0cde' -> [(Correction,[Ptr "KANNADA LETTER LLLA\0"#])]
@@ -149,6 +150,7 @@ nameAliasesWithTypes = \case
149150
'\x180d' -> [(Abbreviation,[Ptr "FVS3\0"#])]
150151
'\x180e' -> [(Abbreviation,[Ptr "MVS\0"#])]
151152
'\x180f' -> [(Abbreviation,[Ptr "FVS4\0"#])]
153+
'\x1bbd' -> [(Correction,[Ptr "SUNDANESE LETTER ARCHAIC I\0"#])]
152154
'\x200b' -> [(Abbreviation,[Ptr "ZWSP\0"#])]
153155
'\x200c' -> [(Abbreviation,[Ptr "ZWNJ\0"#])]
154156
'\x200d' -> [(Abbreviation,[Ptr "ZWJ\0"#])]

0 commit comments

Comments
 (0)