|
1 | 1 | -- | |
2 | | --- Module : Unicode.UCD.Decompose |
| 2 | +-- Module : Unicode.Char.Normalization |
3 | 3 | -- Copyright : (c) 2020 Composewell Technologies and Contributors |
4 | 4 | -- License : Apache-2.0 |
5 | 5 | |
6 | 6 | -- Stability : experimental |
7 | 7 | -- |
8 | | --- Unicode normalization. |
9 | | --- |
| 8 | +module Unicode.Char.Normalization |
| 9 | + ( |
| 10 | + -- Compose |
| 11 | + compose |
| 12 | + , isSecondStarter |
| 13 | + , composeStarters |
| 14 | + |
| 15 | + -- Decompose |
| 16 | + , DecomposeMode(..) |
| 17 | + , isDecomposable |
| 18 | + , decompose |
| 19 | + , decomposeHangul |
| 20 | + |
| 21 | + -- Combining class |
| 22 | + , isCombining |
| 23 | + , combiningClass |
| 24 | + ) |
| 25 | +where |
| 26 | + |
| 27 | +import Control.Exception (assert) |
| 28 | +import Data.Char (ord) |
| 29 | +import GHC.Base (unsafeChr) |
| 30 | +import Unicode.Internal.Division (quotRem21, quotRem28) |
| 31 | +import Unicode.Char.General |
| 32 | + (hangulFirst, jamoLFirst, jamoTCount, jamoTFirst, jamoVCount, jamoVFirst) |
| 33 | + |
| 34 | +import qualified Unicode.Internal.Generated.UnicodeData.CombiningClass as CC |
| 35 | +import qualified Unicode.Internal.Generated.UnicodeData.Compositions as C |
| 36 | +import qualified Unicode.Internal.Generated.UnicodeData.Decomposable as D |
| 37 | +import qualified Unicode.Internal.Generated.UnicodeData.DecomposableK as K |
| 38 | +import qualified Unicode.Internal.Generated.UnicodeData.Decompositions as D |
| 39 | +import qualified Unicode.Internal.Generated.UnicodeData.DecompositionsK as K |
| 40 | + |
| 41 | +------------------------------------------------------------------------------- |
| 42 | +-- Compose |
| 43 | +------------------------------------------------------------------------------- |
| 44 | + |
| 45 | +-- | Compose a starter character (combining class 0) with a combining character |
| 46 | +-- (non-zero combining class). Returns the composed character if the starter |
| 47 | +-- combines with the combining character, returns 'Nothing' otherwise. |
| 48 | +{-# INLINE compose #-} |
| 49 | +compose :: Char -> Char -> Maybe Char |
| 50 | +compose = C.compose |
| 51 | + |
| 52 | +-- | Compose a starter character with another starter character. Returns the |
| 53 | +-- composed character if the two starters combine, returns 'Nothing' otherwise. |
| 54 | +{-# INLINE composeStarters #-} |
| 55 | +composeStarters :: Char -> Char -> Maybe Char |
| 56 | +composeStarters = C.composeStarters |
| 57 | + |
| 58 | +-- | Return 'True' if a starter character may combine with some preceding |
| 59 | +-- starter character. |
| 60 | +{-# INLINE isSecondStarter #-} |
| 61 | +isSecondStarter :: Char -> Bool |
| 62 | +isSecondStarter = C.isSecondStarter |
| 63 | + |
| 64 | +------------------------------------------------------------------------------- |
| 65 | +-- Decompose |
| 66 | +------------------------------------------------------------------------------- |
| 67 | + |
10 | 68 | -- For more information please refer to the following sections of the [Unicode |
11 | 69 | -- standard](https://www.unicode.org/versions/latest/): |
12 | 70 | -- |
|
29 | 87 | -- * [Unicode® Standard Annex #15 - Unicode Normalization Forms](https://www.unicode.org/reports/tr15) |
30 | 88 | -- * [Unicode® Standard Annex #44 - Unicode Character Database](https://www.unicode.org/reports/tr44/) |
31 | 89 | -- |
32 | | -module Unicode.UCD.Decompose |
33 | | - ( DecomposeMode(..) |
34 | | - , isDecomposable |
35 | | - , decompose |
36 | | - , decomposeHangul |
37 | | - ) |
38 | | -where |
39 | | - |
40 | | -import Control.Exception (assert) |
41 | | -import Data.Char (ord) |
42 | | -import GHC.Base (unsafeChr) |
43 | | -import Unicode.Internal.Division (quotRem21, quotRem28) |
44 | | - |
45 | | -import qualified Unicode.Internal.Generated.UnicodeData.Decomposable as D |
46 | | -import qualified Unicode.Internal.Generated.UnicodeData.DecomposableK as K |
47 | | -import qualified Unicode.Internal.Generated.UnicodeData.Decompositions as D |
48 | | -import qualified Unicode.Internal.Generated.UnicodeData.DecompositionsK as K |
49 | | - |
50 | | -import Unicode.UCD.Hangul |
51 | 90 |
|
52 | 91 | ------------------------------------------------------------------------------- |
53 | 92 | -- Non Hangul decomposition |
@@ -88,3 +127,19 @@ decomposeHangul c = (l, v, t) |
88 | 127 | l = unsafeChr (jamoLFirst + li) |
89 | 128 | v = unsafeChr (jamoVFirst + vi) |
90 | 129 | t = unsafeChr (jamoTFirst + ti) |
| 130 | + |
| 131 | +------------------------------------------------------------------------------- |
| 132 | +-- Combining class |
| 133 | +------------------------------------------------------------------------------- |
| 134 | + |
| 135 | +-- Determine the combining properties of characters. |
| 136 | + |
| 137 | +-- | Returns the combining class of a character. |
| 138 | +{-# INLINE combiningClass #-} |
| 139 | +combiningClass :: Char -> Int |
| 140 | +combiningClass = CC.combiningClass |
| 141 | + |
| 142 | +-- | Returns 'True' if a character is a combining character. |
| 143 | +{-# INLINE isCombining #-} |
| 144 | +isCombining :: Char -> Bool |
| 145 | +isCombining = CC.isCombining |
0 commit comments