Skip to content

Commit 37de284

Browse files
wismilladithyaov
authored andcommitted
Fix isLower and isUpper
1 parent ee0c392 commit 37de284

File tree

6 files changed

+129
-147
lines changed

6 files changed

+129
-147
lines changed

README.md

Lines changed: 67 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -20,159 +20,95 @@ Machine: 8 × AMD Ryzen 5 2500U on Linux.
2020

2121
```
2222
All
23-
Unicode.Char.Case
23+
Unicode.Char.Case.Compat
2424
isLower
25-
base: OK (1.59s)
26-
25 ms ± 583 μs
27-
unicode-data: OK (2.01s)
28-
3.9 ms ± 22 μs, 0.15x
25+
base: OK (1.53s)
26+
24 ms ± 3.8 ms
27+
unicode-data: OK (2.25s)
28+
4.4 ms ± 88 μs, 0.19x
2929
isUpper
30-
base: OK (1.62s)
31-
26 ms ± 1.0 ms
32-
unicode-data: OK (2.00s)
33-
3.9 ms ± 24 μs, 0.15x
34-
Unicode.Char.Case.Compat
30+
base: OK (1.50s)
31+
24 ms ± 450 μs
32+
unicode-data: OK (2.37s)
33+
4.7 ms ± 200 μs, 0.19x
3534
toLower
36-
base: OK (1.46s)
37-
23 ms ± 512 μs
35+
base: OK (1.40s)
36+
22 ms ± 1.8 ms
3837
unicode-data: OK (1.89s)
39-
7.4 ms ± 112 μs, 0.32x
38+
7.2 ms ± 297 μs, 0.32x
4039
toTitle
41-
base: OK (1.49s)
42-
24 ms ± 399 μs
43-
unicode-data: OK (1.92s)
44-
7.5 ms ± 67 μs, 0.32x
40+
base: OK (1.25s)
41+
20 ms ± 2.0 ms
42+
unicode-data: OK (1.65s)
43+
6.4 ms ± 509 μs, 0.32x
4544
toUpper
46-
base: OK (1.46s)
47-
23 ms ± 468 μs
48-
unicode-data: OK (1.75s)
49-
6.9 ms ± 99 μs, 0.30x
45+
base: OK (1.26s)
46+
20 ms ± 2.5 ms
47+
unicode-data: OK (1.72s)
48+
6.8 ms ± 335 μs, 0.34x
5049
Unicode.Char.General
5150
generalCategory
52-
base: OK (1.95s)
53-
129 ms ± 733 μs
54-
unicode-data: OK (1.63s)
55-
108 ms ± 1.1 ms, 0.84x
56-
isAlphabetic
57-
unicode-data: OK (1.28s)
58-
312 μs ± 3.2 μs
51+
base: OK (2.02s)
52+
134 ms ± 1.6 ms
53+
unicode-data: OK (1.75s)
54+
116 ms ± 1.6 ms, 0.87x
5955
isAlphaNum
60-
base: OK (1.56s)
61-
25 ms ± 252 μs
62-
unicode-data: OK (2.35s)
63-
4.6 ms ± 31 μs, 0.19x
64-
isControl
65-
base: OK (1.57s)
66-
25 ms ± 551 μs
56+
base: OK (1.53s)
57+
24 ms ± 1.7 ms
6758
unicode-data: OK (2.16s)
68-
4.2 ms ± 33 μs, 0.17x
59+
4.2 ms ± 29 μs, 0.18x
60+
isControl
61+
base: OK (1.47s)
62+
23 ms ± 2.6 ms
63+
unicode-data: OK (2.23s)
64+
4.4 ms ± 22 μs, 0.19x
6965
isMark
70-
base: OK (1.63s)
71-
26 ms ± 689 μs
72-
unicode-data: OK (2.34s)
73-
4.6 ms ± 27 μs, 0.18x
66+
base: OK (1.47s)
67+
23 ms ± 624 μs
68+
unicode-data: OK (2.28s)
69+
4.5 ms ± 48 μs, 0.19x
7470
isPrint
75-
base: OK (1.62s)
76-
26 ms ± 788 μs
77-
unicode-data: OK (2.13s)
78-
4.2 ms ± 73 μs, 0.16x
71+
base: OK (1.53s)
72+
25 ms ± 2.4 ms
73+
unicode-data: OK (2.27s)
74+
4.4 ms ± 50 μs, 0.18x
7975
isPunctuation
80-
base: OK (1.61s)
81-
26 ms ± 170 μs
82-
unicode-data: OK (2.04s)
83-
4.0 ms ± 30 μs, 0.16x
76+
base: OK (1.51s)
77+
24 ms ± 459 μs
78+
unicode-data: OK (2.24s)
79+
4.4 ms ± 25 μs, 0.18x
8480
isSeparator
85-
base: OK (1.71s)
86-
27 ms ± 247 μs
87-
unicode-data: OK (2.20s)
88-
4.3 ms ± 25 μs, 0.16x
81+
base: OK (1.52s)
82+
24 ms ± 407 μs
83+
unicode-data: OK (2.43s)
84+
4.8 ms ± 94 μs, 0.20x
8985
isSymbol
90-
base: OK (1.68s)
91-
27 ms ± 312 μs
92-
unicode-data: OK (2.32s)
93-
4.5 ms ± 41 μs, 0.17x
94-
isWhiteSpace
95-
unicode-data: OK (1.28s)
96-
312 μs ± 3.5 μs
97-
isHangul
98-
unicode-data: OK (1.28s)
99-
312 μs ± 2.6 μs
100-
isHangulLV
101-
unicode-data: OK (1.28s)
102-
312 μs ± 2.8 μs
103-
isJamo
104-
unicode-data: OK (1.28s)
105-
312 μs ± 2.7 μs
106-
jamoLIndex
107-
unicode-data: OK (1.28s)
108-
312 μs ± 3.1 μs
109-
jamoVIndex
110-
unicode-data: OK (1.28s)
111-
312 μs ± 2.9 μs
112-
jamoTIndex
113-
unicode-data: OK (1.28s)
114-
312 μs ± 2.9 μs
86+
base: OK (1.49s)
87+
24 ms ± 863 μs
88+
unicode-data: OK (1.34s)
89+
5.2 ms ± 92 μs, 0.22x
11590
Unicode.Char.General.Compat
11691
isAlpha
117-
base: OK (1.59s)
118-
25 ms ± 446 μs
92+
base: OK (1.46s)
93+
23 ms ± 322 μs
11994
unicode-data: OK (2.14s)
120-
4.2 ms ± 25 μs, 0.17x
95+
4.1 ms ± 36 μs, 0.18x
12196
isLetter
122-
base: OK (1.72s)
123-
27 ms ± 677 μs
124-
unicode-data: OK (2.14s)
125-
4.2 ms ± 59 μs, 0.15x
97+
base: OK (1.44s)
98+
22 ms ± 640 μs
99+
unicode-data: OK (2.17s)
100+
4.3 ms ± 58 μs, 0.19x
126101
isSpace
127-
base: OK (1.48s)
128-
12 ms ± 99 μs
129-
unicode-data: OK (2.30s)
130-
4.5 ms ± 30 μs, 0.39x
131-
Unicode.Char.Identifiers
132-
isIDContinue
133-
unicode-data: OK (1.28s)
134-
312 μs ± 2.7 μs
135-
isIDStart
136-
unicode-data: OK (1.29s)
137-
312 μs ± 2.7 μs
138-
isXIDContinue
139-
unicode-data: OK (1.28s)
140-
312 μs ± 3.2 μs
141-
isXIDStart
142-
unicode-data: OK (1.28s)
143-
312 μs ± 3.2 μs
144-
isPatternSyntax
145-
unicode-data: OK (1.28s)
146-
312 μs ± 3.4 μs
147-
isPatternWhitespace
148-
unicode-data: OK (1.28s)
149-
312 μs ± 2.9 μs
150-
Unicode.Char.Normalization
151-
isCombining
152-
unicode-data: OK (1.28s)
153-
313 μs ± 5.1 μs
154-
combiningClass
155-
unicode-data: OK (1.66s)
156-
3.2 ms ± 113 μs
157-
isCombiningStarter
158-
unicode-data: OK (1.29s)
159-
312 μs ± 3.2 μs
160-
isDecomposable
161-
Canonical
162-
unicode-data: OK (1.29s)
163-
312 μs ± 3.5 μs
164-
Kompat
165-
unicode-data: OK (1.28s)
166-
312 μs ± 3.5 μs
167-
decomposeHangul
168-
unicode-data: OK (1.28s)
169-
312 μs ± 3.0 μs
102+
base: OK (1.44s)
103+
11 ms ± 1.2 ms
104+
unicode-data: OK (1.36s)
105+
5.3 ms ± 243 μs, 0.49x
170106
Unicode.Char.Numeric
171107
isNumber
172-
base: OK (1.66s)
173-
26 ms ± 404 μs
174-
unicode-data: OK (2.47s)
175-
4.8 ms ± 22 μs, 0.18x
108+
base: OK (1.52s)
109+
24 ms ± 368 μs
110+
unicode-data: OK (2.41s)
111+
4.7 ms ± 41 μs, 0.19x
176112
```
177113

178114
## Unicode database version update

bench/Main.hs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,23 @@ data Bench a = Bench
2020
main :: IO ()
2121
main = defaultMain
2222
[ bgroup "Unicode.Char.Case"
23+
[ bgroup "isLowerCase"
24+
[ benchNF "unicode-data" C.isLowerCase
25+
]
26+
, bgroup "isUpperCase"
27+
[ benchNF "unicode-data" C.isUpperCase
28+
]
29+
]
30+
, bgroup "Unicode.Char.Case.Compat"
2331
[ bgroup' "isLower"
2432
[ Bench "base" B.isLower
25-
, Bench "unicode-data" C.isLower
33+
, Bench "unicode-data" CC.isLower
2634
]
2735
, bgroup' "isUpper"
2836
[ Bench "base" B.isUpper
29-
, Bench "unicode-data" C.isUpper
37+
, Bench "unicode-data" CC.isUpper
3038
]
31-
]
32-
, bgroup "Unicode.Char.Case.Compat"
33-
[ bgroup' "toLower"
39+
, bgroup' "toLower"
3440
[ Bench "base" B.toLower
3541
, Bench "unicode-data" CC.toLower
3642
]

lib/Unicode/Char.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ where
4343

4444
import Data.Char (chr, ord)
4545
import Data.Version (Version, makeVersion)
46-
import Unicode.Char.Case
46+
import Unicode.Char.Case hiding (isLower, isUpper)
4747
import Unicode.Char.Case.Compat
4848
import Unicode.Char.General hiding (isLetter, isSpace)
4949
import Unicode.Char.General.Compat

lib/Unicode/Char/Case.hs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
-- Case and case mapping related functions.
99
--
1010
module Unicode.Char.Case
11-
( isLower
11+
( -- * Predicates
12+
isLowerCase
13+
, isLower
14+
, isUpperCase
1215
, isUpper
1316
)
1417
where
@@ -17,19 +20,30 @@ import qualified Unicode.Internal.Char.DerivedCoreProperties as P
1720

1821
-- | Returns 'True' for lower-case letters.
1922
--
20-
-- prop> isLower c == Data.Char.isLower c
23+
-- @since 0.3.0
24+
{-# INLINE isLowerCase #-}
25+
isLowerCase :: Char -> Bool
26+
isLowerCase = P.isLowercase
27+
28+
-- | Returns 'True' for lower-case letters.
2129
--
2230
-- @since 0.1.0
2331
{-# INLINE isLower #-}
32+
{-# DEPRECATED isLower "Use isLowerCase instead. Note that the behavior of this function does not match base:Data.Char.isLower. See Unicode.Char.Case.Compat for behavior compatible with base:Data.Char." #-}
2433
isLower :: Char -> Bool
2534
isLower = P.isLowercase
2635

27-
-- | Returns 'True' for upper-case or title-case letters. Title case is used by
28-
-- a small number of letter ligatures like the single-character form of /Lj/.
36+
-- | Returns 'True' for upper-case letters.
2937
--
30-
-- prop> isUpper c == Data.Char.isUpper c
38+
-- @since 0.3.0
39+
{-# INLINE isUpperCase #-}
40+
isUpperCase :: Char -> Bool
41+
isUpperCase = P.isUppercase
42+
43+
-- | Returns 'True' for upper-case letters.
3144
--
3245
-- @since 0.1.0
3346
{-# INLINE isUpper #-}
47+
{-# DEPRECATED isUpper "Use isUpperCase instead. Note that the behavior of this function does not match base:Data.Char.isUpper. See Unicode.Char.Case.Compat for behavior compatible with base:Data.Char." #-}
3448
isUpper :: Char -> Bool
3549
isUpper = P.isUppercase

lib/Unicode/Char/Case/Compat.hs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,43 @@
1212
-- therefore they are placed in a separate module in order to avoid ambiguity.
1313
--
1414
module Unicode.Char.Case.Compat
15-
( toUpper
15+
( -- * Predicates
16+
isUpper
17+
, isLower
18+
-- * Case conversion
19+
, toUpper
1620
, toLower
1721
, toTitle
1822
) where
1923

24+
import Unicode.Char.General (GeneralCategory(..), generalCategory)
2025
import qualified Unicode.Internal.Char.UnicodeData.SimpleLowerCaseMapping as C
2126
import qualified Unicode.Internal.Char.UnicodeData.SimpleTitleCaseMapping as C
2227
import qualified Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping as C
2328

29+
-- | Selects upper-case or title-case alphabetic Unicode characters (letters).
30+
-- Title case is used by a small number of letter ligatures like the
31+
-- single-character form of /Lj/.
32+
--
33+
-- prop> isUpper c == Data.Char.isUpper c
34+
--
35+
-- @since 0.3.0
36+
isUpper :: Char -> Bool
37+
isUpper c = case generalCategory c of
38+
UppercaseLetter -> True
39+
TitlecaseLetter -> True
40+
_ -> False
41+
42+
-- | Selects lower-case alphabetic Unicode characters (letters).
43+
--
44+
-- prop> isLower c == Data.Char.isLower c
45+
--
46+
-- @since 0.3.0
47+
isLower :: Char -> Bool
48+
isLower c = case generalCategory c of
49+
LowercaseLetter -> True
50+
_ -> False
51+
2452
-- | Convert a letter to the corresponding upper-case letter, if any.
2553
-- Any other character is returned unchanged.
2654
--

test/Unicode/CharSpec.hs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,9 @@ spec = do
6161
it "isSymbol" do
6262
UChar.isSymbol `shouldBeEqualTo` Char.isSymbol
6363
describe' "Case" do
64-
let it' t = before_ (pendingWith "Incompatible implementation with Data.Char")
65-
. it t
66-
it' "isLower" do
64+
it "isLower" do
6765
UChar.isLower `shouldBeEqualTo` Char.isLower
68-
it' "isUpper" do
66+
it "isUpper" do
6967
UChar.isUpper `shouldBeEqualTo` Char.isUpper
7068
it "toLower" do
7169
UChar.toLower `shouldBeEqualTo` Char.toLower

0 commit comments

Comments
 (0)