Skip to content

Commit 7a41914

Browse files
committed
Reorganize functions with the new module structure
1 parent 644b0e8 commit 7a41914

File tree

7 files changed

+140
-122
lines changed

7 files changed

+140
-122
lines changed

lib/Unicode/Char.hs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
-- |
2+
-- Module : Unicode.Char
3+
-- Copyright : (c) 2020 Composewell Technologies and Contributors
4+
-- License : Apache-2.0
5+
-- Maintainer : [email protected]
6+
-- Stability : experimental
7+
--
8+
module Unicode.Char
9+
( module Unicode.Char.Case
10+
, module Unicode.Char.General
11+
, module Unicode.Char.Normalization
12+
)
13+
where
14+
15+
import Unicode.Char.Case
16+
import Unicode.Char.General
17+
import Unicode.Char.Normalization
Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,25 @@
11
-- |
2-
-- Module : Unicode.UCD.Core
2+
-- Module : Unicode.Char.Case
33
-- Copyright : (c) 2020 Composewell Technologies and Contributors
44
-- License : Apache-2.0
55
-- Maintainer : [email protected]
66
-- Stability : experimental
77
--
8-
module Unicode.UCD.Core
9-
( isAlpha
8+
module Unicode.Char.Case
9+
( isLower
1010
, isUpper
11-
, isLower
12-
, isSpace
1311
)
1412
where
1513

1614
import qualified Unicode.Internal.Generated.DerivedCoreProperties as P
17-
import qualified Unicode.Internal.Generated.PropList as P
1815

19-
-- | Returns 'True' for alphabetic Unicode characters (lower-case, upper-case
20-
-- and title-case letters, plus letters of caseless scripts and modifiers
21-
-- letters).
16+
-- | Returns 'True' for lower-case letters.
2217
--
23-
-- prop> isAlpha == Data.Char.isAlpha
18+
-- prop> isLower == Data.Char.isLower
2419
--
25-
{-# INLINE isAlpha #-}
26-
isAlpha :: Char -> Bool
27-
isAlpha = P.isAlphabetic
20+
{-# INLINE isLower #-}
21+
isLower :: Char -> Bool
22+
isLower = P.isLowercase
2823

2924
-- | Returns 'True' for upper-case or title-case letters. Title case is used by
3025
-- a small number of letter ligatures like the single-character form of /Lj/.
@@ -34,20 +29,3 @@ isAlpha = P.isAlphabetic
3429
{-# INLINE isUpper #-}
3530
isUpper :: Char -> Bool
3631
isUpper = P.isUppercase
37-
38-
-- | Returns 'True' for lower-case letters.
39-
--
40-
-- prop> isLower == Data.Char.isLower
41-
--
42-
{-# INLINE isLower #-}
43-
isLower :: Char -> Bool
44-
isLower = P.isLowercase
45-
46-
-- | Returns 'True' for any whitespace characters, and the control
47-
-- characters @\\t@, @\\n@, @\\r@, @\\f@, @\\v@.
48-
--
49-
-- prop> isSpace == Data.Char.isSpace
50-
--
51-
{-# INLINE isSpace #-}
52-
isSpace :: Char -> Bool
53-
isSpace = P.isWhite_Space
Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
-- |
2-
-- Module : Unicode.UCD.Hangul
2+
-- Module : Unicode.Char.General
33
-- Copyright : (c) 2020 Composewell Technologies and Contributors
44
-- License : Apache-2.0
55
-- Maintainer : [email protected]
66
-- Stability : experimental
7+
--
8+
module Unicode.Char.General
9+
( isAlpha
10+
, isSpace
711

8-
module Unicode.UCD.Hangul
9-
( hangulFirst
12+
-- Hangul
13+
, hangulFirst
1014
, hangulLast
1115
, isHangul
1216
, isHangulLV
@@ -34,12 +38,35 @@ import Control.Exception (assert)
3438
import Data.Char (ord)
3539
import Unicode.Internal.Division (quotRem28)
3640

37-
-- Hangul characters can be decomposed algorithmically instead of via mappings
41+
import qualified Unicode.Internal.Generated.DerivedCoreProperties as P
42+
import qualified Unicode.Internal.Generated.PropList as P
43+
44+
-- | Returns 'True' for alphabetic Unicode characters (lower-case, upper-case
45+
-- and title-case letters, plus letters of caseless scripts and modifiers
46+
-- letters).
47+
--
48+
-- prop> isAlpha == Data.Char.isAlpha
49+
--
50+
{-# INLINE isAlpha #-}
51+
isAlpha :: Char -> Bool
52+
isAlpha = P.isAlphabetic
53+
54+
-- | Returns 'True' for any whitespace characters, and the control
55+
-- characters @\\t@, @\\n@, @\\r@, @\\f@, @\\v@.
56+
--
57+
-- prop> isSpace == Data.Char.isSpace
58+
--
59+
{-# INLINE isSpace #-}
60+
isSpace :: Char -> Bool
61+
isSpace = P.isWhite_Space
3862

3963
-------------------------------------------------------------------------------
40-
-- General utilities used by decomposition as well as composition
64+
-- Hangul
4165
-------------------------------------------------------------------------------
4266

67+
-- General utilities used by decomposition as well as composition
68+
-- Hangul characters can be decomposed algorithmically instead of via mappings
69+
4370
-- * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf
4471
-- * https://en.wikipedia.org/wiki/List_of_Hangul_jamo
4572
-- * https://www.unicode.org/reports/tr15/tr15-18.html#Hangul
Lines changed: 77 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,70 @@
11
-- |
2-
-- Module : Unicode.UCD.Decompose
2+
-- Module : Unicode.Char.Normalization
33
-- Copyright : (c) 2020 Composewell Technologies and Contributors
44
-- License : Apache-2.0
55
-- Maintainer : [email protected]
66
-- Stability : experimental
77
--
8-
-- Unicode normalization.
9-
--
8+
module Unicode.Char.Normalization
9+
(
10+
-- Compose
11+
compose
12+
, isSecondStarter
13+
, composeStarters
14+
15+
-- Decompose
16+
, DecomposeMode(..)
17+
, isDecomposable
18+
, decompose
19+
, decomposeHangul
20+
21+
-- Combining class
22+
, isCombining
23+
, combiningClass
24+
)
25+
where
26+
27+
import Control.Exception (assert)
28+
import Data.Char (ord)
29+
import GHC.Base (unsafeChr)
30+
import Unicode.Internal.Division (quotRem21, quotRem28)
31+
import Unicode.Char.General
32+
(hangulFirst, jamoLFirst, jamoTCount, jamoTFirst, jamoVCount, jamoVFirst)
33+
34+
import qualified Unicode.Internal.Generated.UnicodeData.CombiningClass as CC
35+
import qualified Unicode.Internal.Generated.UnicodeData.Compositions as C
36+
import qualified Unicode.Internal.Generated.UnicodeData.Decomposable as D
37+
import qualified Unicode.Internal.Generated.UnicodeData.DecomposableK as K
38+
import qualified Unicode.Internal.Generated.UnicodeData.Decompositions as D
39+
import qualified Unicode.Internal.Generated.UnicodeData.DecompositionsK as K
40+
41+
-------------------------------------------------------------------------------
42+
-- Compose
43+
-------------------------------------------------------------------------------
44+
45+
-- | Compose a starter character (combining class 0) with a combining character
46+
-- (non-zero combining class). Returns the composed character if the starter
47+
-- combines with the combining character, returns 'Nothing' otherwise.
48+
{-# INLINE compose #-}
49+
compose :: Char -> Char -> Maybe Char
50+
compose = C.compose
51+
52+
-- | Compose a starter character with another starter character. Returns the
53+
-- composed character if the two starters combine, returns 'Nothing' otherwise.
54+
{-# INLINE composeStarters #-}
55+
composeStarters :: Char -> Char -> Maybe Char
56+
composeStarters = C.composeStarters
57+
58+
-- | Return 'True' if a starter character may combine with some preceding
59+
-- starter character.
60+
{-# INLINE isSecondStarter #-}
61+
isSecondStarter :: Char -> Bool
62+
isSecondStarter = C.isSecondStarter
63+
64+
-------------------------------------------------------------------------------
65+
-- Decompose
66+
-------------------------------------------------------------------------------
67+
1068
-- For more information please refer to the following sections of the [Unicode
1169
-- standard](https://www.unicode.org/versions/latest/):
1270
--
@@ -29,25 +87,6 @@
2987
-- * [Unicode® Standard Annex #15 - Unicode Normalization Forms](https://www.unicode.org/reports/tr15)
3088
-- * [Unicode® Standard Annex #44 - Unicode Character Database](https://www.unicode.org/reports/tr44/)
3189
--
32-
module Unicode.UCD.Decompose
33-
( DecomposeMode(..)
34-
, isDecomposable
35-
, decompose
36-
, decomposeHangul
37-
)
38-
where
39-
40-
import Control.Exception (assert)
41-
import Data.Char (ord)
42-
import GHC.Base (unsafeChr)
43-
import Unicode.Internal.Division (quotRem21, quotRem28)
44-
45-
import qualified Unicode.Internal.Generated.UnicodeData.Decomposable as D
46-
import qualified Unicode.Internal.Generated.UnicodeData.DecomposableK as K
47-
import qualified Unicode.Internal.Generated.UnicodeData.Decompositions as D
48-
import qualified Unicode.Internal.Generated.UnicodeData.DecompositionsK as K
49-
50-
import Unicode.UCD.Hangul
5190

5291
-------------------------------------------------------------------------------
5392
-- Non Hangul decomposition
@@ -88,3 +127,19 @@ decomposeHangul c = (l, v, t)
88127
l = unsafeChr (jamoLFirst + li)
89128
v = unsafeChr (jamoVFirst + vi)
90129
t = unsafeChr (jamoTFirst + ti)
130+
131+
-------------------------------------------------------------------------------
132+
-- Combining class
133+
-------------------------------------------------------------------------------
134+
135+
-- Determine the combining properties of characters.
136+
137+
-- | Returns the combining class of a character.
138+
{-# INLINE combiningClass #-}
139+
combiningClass :: Char -> Int
140+
combiningClass = CC.combiningClass
141+
142+
-- | Returns 'True' if a character is a combining character.
143+
{-# INLINE isCombining #-}
144+
isCombining :: Char -> Bool
145+
isCombining = CC.isCombining

lib/Unicode/UCD/CombiningClass.hs

Lines changed: 0 additions & 26 deletions
This file was deleted.

lib/Unicode/UCD/Compose.hs

Lines changed: 0 additions & 34 deletions
This file was deleted.

unicode-data.cabal

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,12 @@ library
7070
import: default-extensions, compile-options
7171
default-language: Haskell2010
7272
exposed-modules:
73-
Unicode.UCD.Core
74-
Unicode.UCD.CombiningClass
75-
Unicode.UCD.Compose
76-
Unicode.UCD.Decompose
77-
Unicode.UCD.Hangul
73+
Unicode.Char
74+
-- The module structure is derived from
75+
-- https://www.unicode.org/reports/tr44/#Property_Index_Table
76+
Unicode.Char.Normalization
77+
Unicode.Char.General
78+
Unicode.Char.Case
7879

7980
-- Internal files
8081
Unicode.Internal.Bits

0 commit comments

Comments
 (0)