Skip to content

Commit bf8bb53

Browse files
authored
Last fixes before release (#94)
* Remove allBlockRanges * When downloading files, create a separate directory for each Unicode version. This facilitate the work on various Unicode versions. * Improve docs
1 parent cd1e33f commit bf8bb53

File tree

13 files changed

+60
-45
lines changed

13 files changed

+60
-45
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ See `unicode-data`’s [guide](unicode-data/README.md#unicode-database-version-u
6666
## Licensing
6767

6868
`unicode-data*` packages are an [open source](https://github.com/composewell/unicode-data)
69-
project available under a liberal [Apache-2.0 license](LICENSE).
69+
project available under a liberal [Apache-2.0 license](unicode-data/LICENSE).
7070

7171
## Contributing
7272

experimental/unicode-data-text/lib/Unicode/Text/Case.hs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,17 @@ streamUnfold (C.Unfold step inject) = \case
5555
caseConvertStream :: C.Unfold Char Char -> T.Text -> T.Text
5656
caseConvertStream u t = TF.unstream (streamUnfold u (TF.stream t))
5757

58+
-- | Convert to full upper case using 'T.Text' fusion.
5859
{-# INLINE toUpperFusion #-}
5960
toUpperFusion :: T.Text -> T.Text
6061
toUpperFusion = caseConvertStream C.upperCaseMapping
6162

63+
-- | Convert to full lower case using 'T.Text' fusion.
6264
{-# INLINE toLowerFusion #-}
6365
toLowerFusion :: T.Text -> T.Text
6466
toLowerFusion = caseConvertStream C.lowerCaseMapping
6567

68+
-- | Convert to full case fold using 'T.Text' fusion.
6669
{-# INLINE toCaseFoldFusion #-}
6770
toCaseFoldFusion :: T.Text -> T.Text
6871
toCaseFoldFusion = caseConvertStream C.caseFoldMapping
@@ -105,6 +108,7 @@ streamUnfoldToTitle = case C.lowerCaseMapping of
105108
C.Yield c st' -> TF.Yield c (CC2 s st')
106109
{-# INLINE [0] streamUnfoldToTitle #-}
107110

111+
-- | Convert to full title case using 'T.Text' fusion.
108112
{-# INLINE toTitleFusion #-}
109113
toTitleFusion :: T.Text -> T.Text
110114
toTitleFusion = TF.unstream . streamUnfoldToTitle . TF.stream
@@ -189,18 +193,21 @@ caseConvertText ascii (C.Unfold (step :: u -> C.Step u Char) inject) (T.Text src
189193
writeMapping (step st) (dstOff + d)
190194
{-# INLINE caseConvertText #-}
191195

196+
-- | Convert to full upper case /without/ 'T.Text' fusion.
192197
{-# INLINE toUpper #-}
193198
toUpper :: T.Text -> T.Text
194199
toUpper = caseConvertText
195200
(\w -> if w - 97 <= 25 then w - 32 else w)
196201
C.upperCaseMapping
197202

203+
-- | Convert to full lower case /without/ 'T.Text' fusion.
198204
{-# INLINE toLower #-}
199205
toLower :: T.Text -> T.Text
200206
toLower = caseConvertText
201207
(\w -> if w - 65 <= 25 then w + 32 else w)
202208
C.lowerCaseMapping
203209

210+
-- | Convert to full case fold /without/ 'T.Text' fusion.
204211
{-# INLINE toCaseFold #-}
205212
toCaseFold :: T.Text -> T.Text
206213
toCaseFold = caseConvertText

experimental/unicode-data-text/unicode-data-text.cabal

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ version: 0.1.0
44
synopsis: Unicode features for “text” package
55
description:
66
@unicode-data-text@ provides Unicode features from
7-
<https://hackage.haskell.org/package/unicode-data @unicode-data@> package
8-
for the <https://hackage.haskell.org/package/text @text@> package.
7+
<https://hackage.haskell.org/package/unicode-data unicode-data> package
8+
for the <https://hackage.haskell.org/package/text text> package.
99
homepage: http://github.com/composewell/unicode-data
1010
bug-reports: https://github.com/composewell/unicode-data/issues
1111
license: Apache-2.0

ucd.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ SECURITY_FILES="\
4444
# and verify the $checksum if $VERIFY_CHECKSUM is enabled
4545
# $1 = file:checksum
4646
download_file() {
47-
local directory="data/$1"
47+
local directory="data/$VERSION/$1"
4848
local url="$2"
4949
local pair="$3"
5050
local file="$(echo "$pair" | cut -f1 -d':')"
@@ -81,7 +81,7 @@ download_files() {
8181
run_generator() {
8282
# Compile and run ucd2haskell
8383
cabal run --flag ucd2haskell ucd2haskell -- \
84-
--input ./data/ \
84+
--input "./data/$VERSION" \
8585
--output-core ./unicode-data/lib/ \
8686
--output-names ./unicode-data-names/lib/ \
8787
--output-scripts ./unicode-data-scripts/lib/ \

unicode-data/bench/Main.hs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,6 @@ main = defaultMain
134134
, bgroup "blockDefinition"
135135
[ benchRangeNF "unicode-data" (show . B.blockDefinition)
136136
]
137-
, bgroup "allBlockRanges"
138-
[ benchChars "unicode-data" (const B.allBlockRanges)
139-
]
140137
]
141138
, bgroup "Unicode.Char.General.Compat"
142139
[ bgroup' "isAlpha"

unicode-data/exe/Parser/Text.hs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ genBlocksModule moduleName = done <$> Fold.foldl' step initial
315315
, "{-# OPTIONS_HADDOCK hide #-}"
316316
, ""
317317
, "module " <> moduleName
318-
, "(Block(..), BlockDefinition(..), block, blockDefinition, allBlockRanges)"
318+
, "(Block(..), BlockDefinition(..), block, blockDefinition)"
319319
, "where"
320320
, ""
321321
, "import Data.Ix (Ix)"
@@ -344,14 +344,6 @@ genBlocksModule moduleName = done <$> Fold.foldl' step initial
344344
, "blockDefinition :: Block -> BlockDefinition"
345345
, "blockDefinition b = case b of"
346346
, mconcat (reverse defs)
347-
, "-- | All the block ranges, in ascending order."
348-
, "--"
349-
, "-- @since 0.3.1"
350-
, "{-# INLINE allBlockRanges #-}"
351-
, "allBlockRanges :: [(Int, Int)]"
352-
, "allBlockRanges ="
353-
, " " <> show ranges'
354-
, ""
355347
, "-- | Character block, if defined."
356348
, "--"
357349
, "-- @since 0.3.1"

unicode-data/lib/Unicode/Char.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ module Unicode.Char
3535
, module Unicode.Char.Identifiers
3636
, unicodeVersion
3737

38-
-- * Re-export
38+
-- * Re-export from @base@
3939
, ord
4040
, chr
4141
)

unicode-data/lib/Unicode/Char/Case.hs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
--
1010
-- Case and case mapping related functions.
1111
--
12+
-- This module provides /full/ predicates and mappings that are /not/ compatible
13+
-- with those in "Data.Char", which rely on simple properties.
14+
-- See "Unicode.Char.Case.Compat" for a drop-in replacement of the functions in
15+
-- "Data.Char".
16+
--
17+
1218
module Unicode.Char.Case
1319
( -- * Predicates
1420
isLowerCase
@@ -48,29 +54,49 @@ import qualified Unicode.Internal.Char.SpecialCasing.LowerCaseMapping as C
4854
import qualified Unicode.Internal.Char.SpecialCasing.TitleCaseMapping as C
4955
import qualified Unicode.Internal.Char.SpecialCasing.UpperCaseMapping as C
5056

51-
-- | Returns 'True' for lower-case letters.
57+
-- | Returns 'True' for lower-case characters.
58+
--
59+
-- It uses the character property
60+
-- <https://www.unicode.org/reports/tr44/#Lowercase Lowercase>.
5261
--
5362
-- @since 0.3.0
5463
{-# INLINE isLowerCase #-}
5564
isLowerCase :: Char -> Bool
5665
isLowerCase = P.isLowercase
5766

58-
-- | Returns 'True' for lower-case letters.
67+
-- | Returns 'True' for lower-case characters.
68+
--
69+
-- It uses the character property
70+
-- <https://www.unicode.org/reports/tr44/#Lowercase Lowercase>.
5971
--
6072
-- @since 0.1.0
6173
{-# INLINE isLower #-}
6274
{-# DEPRECATED isLower "Use isLowerCase instead. Note that the behavior of this function does not match base:Data.Char.isLower. See Unicode.Char.Case.Compat for behavior compatible with base:Data.Char." #-}
6375
isLower :: Char -> Bool
6476
isLower = P.isLowercase
6577

66-
-- | Returns 'True' for upper-case letters.
78+
-- | Returns 'True' for upper-case characters.
79+
--
80+
-- It uses the character property
81+
-- <https://www.unicode.org/reports/tr44/#Uppercase Uppercase>.
82+
--
83+
-- Note: it does /not/ match title-cased letters. Those are matched using:
84+
-- @'Unicode.Char.General.generalCategory' c ==
85+
-- 'Unicode.Char.General.TitlecaseLetter'@.
6786
--
6887
-- @since 0.3.0
6988
{-# INLINE isUpperCase #-}
7089
isUpperCase :: Char -> Bool
7190
isUpperCase = P.isUppercase
7291

73-
-- | Returns 'True' for upper-case letters.
92+
-- | Returns 'True' for upper-case characters.
93+
--
94+
-- It uses the character property
95+
-- <https://www.unicode.org/reports/tr44/#Uppercase Uppercase>.
96+
--
97+
-- Note: it does /not/ match title-cased letters. Those are matched using:
98+
-- @'Unicode.Char.General.generalCategory' c ==
99+
-- 'Unicode.Char.General.TitlecaseLetter'@.
74100
--
75101
-- @since 0.1.0
76102
{-# INLINE isUpper #-}

unicode-data/lib/Unicode/Char/Case/Compat.hs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
-- Maintainer : [email protected]
66
-- Stability : experimental
77
--
8-
-- Compatibility module for case and case mapping related functions..
8+
-- Compatibility module for case and case mapping related functions.
99
--
1010
-- The functions of this module are drop-in replacement for those in "Data.Char".
1111
-- They are similar but not identical to some functions in "Unicode.Char.Case",
@@ -30,6 +30,11 @@ import qualified Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping as C
3030
-- Title case is used by a small number of letter ligatures like the
3131
-- single-character form of /Lj/.
3232
--
33+
-- It matches characters with general category 'UppercaseLetter' and
34+
-- 'TitlecaseLetter'.
35+
--
36+
-- See: 'Unicode.Char.Case.isUpperCase' for the /full upper/ case predicate.
37+
--
3338
-- prop> isUpper c == Data.Char.isUpper c
3439
--
3540
-- @since 0.3.0
@@ -41,6 +46,10 @@ isUpper c = case generalCategory c of
4146

4247
-- | Selects lower-case alphabetic Unicode characters (letters).
4348
--
49+
-- It matches characters with general category 'LowercaseLetter'.
50+
--
51+
-- See: 'Unicode.Char.Case.isLowerCase' for the /full/ lower case predicate.
52+
--
4453
-- prop> isLower c == Data.Char.isLower c
4554
--
4655
-- @since 0.3.0

unicode-data/lib/Unicode/Char/General/Blocks.hs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- |
2-
-- Module : Unicode.Char.General
2+
-- Module : Unicode.Char.General.Blocks
33
-- Copyright : (c) 2020 Composewell Technologies and Contributors
44
-- License : Apache-2.0
55
-- Maintainer : [email protected]
@@ -14,7 +14,6 @@ module Unicode.Char.General.Blocks
1414
, B.BlockDefinition(..)
1515
, block
1616
, B.blockDefinition
17-
, allBlockRanges
1817
)
1918

2019
where
@@ -27,11 +26,3 @@ import qualified Unicode.Internal.Char.Blocks as B
2726
{-# INLINE block #-}
2827
block :: Char -> Maybe B.Block
2928
block = fmap toEnum . B.block
30-
31-
-- | All the [block](https://www.unicode.org/glossary/#block) ranges,
32-
-- in ascending order.
33-
--
34-
-- @since 0.3.1
35-
{-# INLINE allBlockRanges #-}
36-
allBlockRanges :: [(Int, Int)]
37-
allBlockRanges = B.allBlockRanges

0 commit comments

Comments
 (0)