Skip to content

Commit be9ce9c

Browse files
committed
ucd2haskell: Better imports
1 parent 899c222 commit be9ce9c

File tree

21 files changed

+438
-299
lines changed

21 files changed

+438
-299
lines changed

ucd2haskell/exe/UCD2Haskell/Generator.hs

Lines changed: 275 additions & 125 deletions
Large diffs are not rendered by default.

ucd2haskell/exe/UCD2Haskell/Modules/Properties.hs

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@ import qualified Unicode.CharacterDatabase.Parser.Properties.Multiple as Props
2525
import UCD2Haskell.Common (Fold (..))
2626
import UCD2Haskell.Generator (
2727
FileRecipe (..),
28+
ShamochuCode (..),
2829
apacheLicense,
2930
genBitmapShamochu,
31+
mkImports,
3032
unlinesBB,
3133
)
3234

@@ -78,20 +80,21 @@ genCorePropertiesModule moduleName isProp = Fold step initial done
7880
Nothing -> Just xs
7981
Just ys -> Just (xs <> ys)
8082

81-
done Acc{..} = unlinesBB (header properties <> genBitmaps values properties)
83+
done Acc{..} = header imports properties <> code
84+
where
85+
ShamochuCode{..} = genBitmaps values properties
8286

83-
genBitmaps values = foldr addBitMap mempty
87+
genBitmaps values = foldMap addBitMap
8488
where
8589
addBitMap property =
86-
(:)
87-
(genBitmapShamochu
90+
genBitmapShamochu
8891
(prop2FuncNameStr property)
8992
(5 NE.:| [6, 7])
9093
-- [2,3,4,5,6]
9194
[]
92-
(IntSet.toAscList (values Map.! property)))
95+
(IntSet.toAscList (values Map.! property))
9396

94-
header exports =
97+
header imports exports = unlinesBB
9598
[ apacheLicense 2020 moduleName
9699
, "{-# OPTIONS_HADDOCK hide #-}"
97100
, "{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}"
@@ -102,11 +105,5 @@ genCorePropertiesModule moduleName isProp = Fold step initial done
102105
<> mconcat (L.intersperse "\n , " (map prop2FuncName exports))
103106
, " ) where"
104107
, ""
105-
, "import Data.Bits (Bits(..))"
106-
, "import Data.Char (ord)"
107-
, "import Data.Int (Int8)"
108-
, "import Data.Word (Word8, Word16)"
109-
, "import GHC.Exts (Ptr(..))"
110-
, "import Unicode.Internal.Bits (lookupBit, lookupWord16AsInt, lookupWord8AsInt)"
111-
, ""
108+
, mkImports imports
112109
]

ucd2haskell/exe/UCD2Haskell/Modules/Scripts.hs

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,14 @@ import UCD2Haskell.Common (
3535
import UCD2Haskell.Generator (
3636
BitmapType (..),
3737
FileRecipe (..),
38+
ShamochuCode (..),
3839
apacheLicense,
3940
generateShamochuBitmaps,
41+
mkImports',
4042
toLookupBitMapName,
4143
unlinesBB,
4244
word32ToWord8s,
45+
(<+>),
4346
)
4447

4548
recipe :: PropertyValuesAliases -> FileRecipe Prop.Entry
@@ -56,6 +59,13 @@ genScriptsModule moduleName aliases = Fold step mempty done
5659
done ranges =
5760
let scripts = Set.toList
5861
(foldr addScript (Set.singleton Defaults.defaultScript) ranges)
62+
ShamochuCode{..} = if length scripts <= 0xff
63+
then mkCharScripts scripts ranges
64+
else error "Cannot encode scripts"
65+
imports' = imports <+> Map.fromList
66+
[ ( "GHC.Exts"
67+
, Set.fromList ["Addr#", "Int(..)", "nullAddr#"] )
68+
, ( "Data.Ix", Set.singleton "Ix" )]
5969
in unlinesBB
6070
[ "{-# LANGUAGE PatternSynonyms #-}"
6171
, "{-# OPTIONS_HADDOCK hide #-}"
@@ -71,13 +81,7 @@ genScriptsModule moduleName aliases = Fold step mempty done
7181
, " , pattern ScriptCharMaskComplement )"
7282
, "where"
7383
, ""
74-
, "import Data.Char (ord)"
75-
, "import Data.Int (Int8)"
76-
, "import Data.Ix (Ix)"
77-
, "import Data.Word (Word16)"
78-
, "import GHC.Exts (Addr#, Int#, Int(..), Ptr(..), nullAddr#, andI#, iShiftL#, iShiftRL#, (+#), (-#))"
79-
, "import Unicode.Internal.Bits.Scripts (lookupWord8AsInt#, lookupWord16AsInt#)"
80-
, ""
84+
, mkImports' "Scripts" imports'
8185
, "-- | Unicode [script](https://www.unicode.org/reports/tr24/)."
8286
, "--"
8387
, "-- The constructors descriptions are the original Unicode values"
@@ -127,9 +131,7 @@ genScriptsModule moduleName aliases = Fold step mempty done
127131
, "-- | Script of a character."
128132
, "--"
129133
, "-- @since 0.1.0"
130-
, if length scripts <= 0xff
131-
then mkCharScripts scripts ranges
132-
else error "Cannot encode scripts"
134+
, code
133135
, ""
134136
]
135137

@@ -297,7 +299,7 @@ genScriptsModule moduleName aliases = Fold step mempty done
297299
encodeBytes = foldr addByte "" . word32ToWord8s
298300
addByte n acc = BB.char7 '\\' <> BB.word8Dec n <> acc
299301

300-
mkCharScripts :: [BS.ShortByteString] -> [Prop.Entry] -> BB.Builder
302+
mkCharScripts :: [BS.ShortByteString] -> [Prop.Entry] -> ShamochuCode
301303
mkCharScripts scripts scriptsRanges =
302304
let charScripts = L.sort (foldMap (rangeToCharScripts getScript) scriptsRanges)
303305
charScripts' = reverse (fst (foldl' addMissing (mempty, '\0') charScripts))
@@ -325,28 +327,32 @@ genScriptsModule moduleName aliases = Fold step mempty done
325327
assert (fromEnum (length scripts) < 0xff)
326328
(fromIntegral . fromEnum)
327329
bitmap0To1 = "scriptPlanes0To1"
328-
in mconcat
329-
[ "{-# INLINE script #-}\n"
330-
, "script :: Char -> Int#\n"
331-
, "script c\n"
332-
, " -- Planes 0-1\n"
333-
, " | cp < 0x", showPaddedHeXB boundPlanes0To1
334-
, " = ", toLookupBitMapName bitmap0To1, " cp#\n"
335-
, mkScriptsBounds def (scripts !!) otherPlanes
336-
, " -- Default: ", BB.shortByteString Defaults.defaultScript, "\n"
337-
, " | otherwise = ", BB.intDec def, "#\n"
338-
, " where\n"
339-
, " !cp@(I# cp#) = ord c\n"
340-
, "\n"
341-
, generateShamochuBitmaps
342-
bitmap0To1
343-
True
344-
ByteMap
345-
(NE.singleton 3)
346-
[5]
347-
toWord8
348-
planes0To1
349-
]
330+
ShamochuCode{..} = generateShamochuBitmaps
331+
bitmap0To1
332+
True
333+
ByteMap
334+
(NE.singleton 3)
335+
[5]
336+
toWord8
337+
planes0To1
338+
in ShamochuCode
339+
{ code = mconcat
340+
[ "{-# INLINE script #-}\n"
341+
, "script :: Char -> Int#\n"
342+
, "script c\n"
343+
, " -- Planes 0-1\n"
344+
, " | cp < 0x", showPaddedHeXB boundPlanes0To1
345+
, " = ", toLookupBitMapName bitmap0To1, " cp#\n"
346+
, mkScriptsBounds def (scripts !!) otherPlanes
347+
, " -- Default: ", BB.shortByteString Defaults.defaultScript, "\n"
348+
, " | otherwise = ", BB.intDec def, "#\n"
349+
, " where\n"
350+
, " !cp@(I# cp#) = ord c\n"
351+
, "\n"
352+
, code
353+
]
354+
, imports = imports }
355+
350356

351357
mkScriptsBounds :: Int -> (Int -> BS.ShortByteString) -> [(Int,Char)] -> BB.Builder
352358
mkScriptsBounds def getScriptName

ucd2haskell/exe/UCD2Haskell/Modules/ScriptsExtensions.hs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,14 @@ import qualified Unicode.CharacterDatabase.Parser.Properties.Single as Prop
2727
import UCD2Haskell.Common (Fold (..), mkHaskellConstructor)
2828
import UCD2Haskell.Generator (
2929
FileRecipe (..),
30+
ShamochuCode (..),
3031
apacheLicense,
3132
enumMapToAddrLiteral,
3233
genEnumBitmapShamochu,
34+
mkImports',
3335
splitPlanes,
3436
unlinesBB,
37+
(<+>),
3538
)
3639

3740
recipe :: PropertyValuesAliases -> ScriptExtensions -> FileRecipe Prop.Entry
@@ -98,14 +101,7 @@ genScriptExtensionsModule moduleName aliases extensions = Fold step initial done
98101
, "(scriptExtensions)"
99102
, "where"
100103
, ""
101-
, "import Data.Char (ord)"
102-
, "import Data.Word (Word8, Word16)"
103-
, "import Data.Int (Int8)"
104-
, "import GHC.Exts"
105-
, " ( Addr#, Int#, Int(..), Ptr(..), nullAddr#"
106-
, " , negateInt#, andI#, iShiftL#, iShiftRL#, (+#), (-#) )"
107-
, "import Unicode.Internal.Bits.Scripts (lookupWord8AsInt#, lookupWord16AsInt#)"
108-
, ""
104+
, mkImports' "Scripts" imports'
109105
, "-- | Script extensions of a character."
110106
, "--"
111107
, "-- Returns a pair:"
@@ -123,16 +119,7 @@ genScriptExtensionsModule moduleName aliases extensions = Fold step initial done
123119
(usedExts Set.\\ singleScriptExtensionsSet)
124120
, " s -> (# negateInt# s, nullAddr# #)"
125121
, ""
126-
, genEnumBitmapShamochu
127-
"encodedScriptExtensions"
128-
True
129-
(NE.singleton 3)
130-
[5]
131-
toWord8
132-
(def, BB.intDec (fromEnum def))
133-
(def, BB.intDec (fromEnum def))
134-
planes0To3
135-
plane14
122+
, code
136123
]
137124
where
138125
-- List ordered by Haskell constructors
@@ -174,6 +161,19 @@ genScriptExtensionsModule moduleName aliases extensions = Fold step initial done
174161
"Cannot generate: genScriptExtensionsModule"
175162
(== def)
176163
scriptExtensions
164+
ShamochuCode{..} = genEnumBitmapShamochu
165+
"encodedScriptExtensions"
166+
True
167+
(NE.singleton 3)
168+
[5]
169+
toWord8
170+
(def, BB.intDec (fromEnum def))
171+
(def, BB.intDec (fromEnum def))
172+
planes0To3
173+
plane14
174+
imports' = imports <+> Map.singleton
175+
"GHC.Exts"
176+
(Set.fromList ["Addr#", "Int(..)", "nullAddr#", "negateInt#"])
177177

178178
mkDecodeScriptExtensions
179179
:: (NE.NonEmpty BS.ShortByteString -> Word8)

ucd2haskell/exe/UCD2Haskell/Modules/Security/IdentifierStatus.hs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ import qualified Unicode.CharacterDatabase.Parser.Properties.Single as Prop
1616
import UCD2Haskell.Common (Fold (..))
1717
import UCD2Haskell.Generator (
1818
FileRecipe (..),
19+
ShamochuCode (..),
1920
apacheLicense,
2021
genBitmapShamochu,
22+
mkImports,
2123
unlinesBB,
2224
)
2325

@@ -46,14 +48,12 @@ genIdentifierStatusModule moduleName = Fold step mempty done
4648
, "(isAllowedInIdentifier)"
4749
, "where"
4850
, ""
49-
, "import Data.Char (ord)"
50-
, "import Data.Word (Word8)"
51-
, "import GHC.Exts (Ptr(..))"
52-
, "import Unicode.Internal.Bits (lookupBit)"
53-
, ""
54-
, genBitmapShamochu
55-
"isAllowedInIdentifier"
56-
(NE.singleton 6)
57-
[2,3,4,5,6]
58-
(reverse values)
51+
, mkImports imports
52+
, code
5953
]
54+
where
55+
ShamochuCode{..} = genBitmapShamochu
56+
"isAllowedInIdentifier"
57+
(NE.singleton 6)
58+
[2,3,4,5,6]
59+
(reverse values)

ucd2haskell/exe/UCD2Haskell/Modules/Security/IdentifierType.hs

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@ import qualified Unicode.CharacterDatabase.Parser.Properties.Single as Prop
2323
import UCD2Haskell.Common (Fold (..))
2424
import UCD2Haskell.Generator (
2525
FileRecipe (..),
26+
ShamochuCode (..),
2627
apacheLicense,
2728
genEnumBitmapShamochu,
29+
mkImports,
2830
splitPlanes,
2931
unlinesBB,
32+
(<+>),
3033
)
3134

3235
recipe :: FileRecipe Prop.Entry
@@ -152,14 +155,7 @@ genIdentifierTypeModule moduleName = Fold step mempty done
152155
, "(IdentifierType(..), identifierTypes, decodeIdentifierTypes)"
153156
, "where"
154157
, ""
155-
, "import Data.Bits (Bits(..))"
156-
, "import Data.Char (ord)"
157-
, "import Data.Int (Int8)"
158-
, "import Data.List.NonEmpty (NonEmpty)"
159-
, "import Data.Word (Word8, Word16)"
160-
, "import GHC.Exts (Ptr(..))"
161-
, "import Unicode.Internal.Bits (lookupWord8AsInt, lookupWord16AsInt)"
162-
, ""
158+
, mkImports (imports <+> Map.singleton "Data.List.NonEmpty" (Set.singleton "NonEmpty"))
163159
, "-- | Identifier type"
164160
, "--"
165161
, "-- @since 0.1.0"
@@ -203,16 +199,7 @@ genIdentifierTypeModule moduleName = Fold step mempty done
203199
, " _ -> " <> mkHaskellConstructorsList def
204200
, ""
205201
, "-- | Returns the 'IdentifierType's corresponding to a character."
206-
, genEnumBitmapShamochu
207-
"identifierTypes"
208-
False
209-
(NE.singleton 3)
210-
[5]
211-
toWord8
212-
(defIdx, BB.intDec (fromEnum defIdx))
213-
(defIdx, BB.intDec (fromEnum defIdx))
214-
planes0To3
215-
plane14
202+
, code
216203
]
217204
where
218205
toWord8 =
@@ -223,3 +210,13 @@ genIdentifierTypeModule moduleName = Fold step mempty done
223210
(== defIdx)
224211
(reverse identifiersTypes)
225212
(encoding, identifiersTypes, defIdx) = mkIdentifiersTypes acc
213+
ShamochuCode{..} = genEnumBitmapShamochu
214+
"identifierTypes"
215+
False
216+
(NE.singleton 3)
217+
[5]
218+
toWord8
219+
(defIdx, BB.intDec (fromEnum defIdx))
220+
(defIdx, BB.intDec (fromEnum defIdx))
221+
planes0To3
222+
plane14

ucd2haskell/exe/UCD2Haskell/Modules/UnicodeData/CombiningClass.hs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@ import qualified Unicode.CharacterDatabase.Parser.UnicodeData as UD
2121
import UCD2Haskell.Common (Fold (..), showB)
2222
import UCD2Haskell.Generator (
2323
FileRecipe (..),
24+
ShamochuCode (..),
2425
apacheLicense,
2526
genBitmapShamochu,
27+
mkImports,
2628
unlinesBB,
2729
)
2830

@@ -60,24 +62,20 @@ genCombiningClassModule moduleName = Fold step initial done
6062
, "(combiningClass, isCombining)"
6163
, "where"
6264
, ""
63-
, "import Data.Bits (Bits(..))"
64-
, "import Data.Char (ord)"
65-
, "import Data.Int (Int8)"
66-
, "import Data.Word (Word8, Word16)"
67-
, "import GHC.Exts (Ptr(..))"
68-
, "import Unicode.Internal.Bits (lookupBit, lookupWord8AsInt, lookupWord16AsInt)"
69-
, ""
65+
, mkImports imports
7066
, "combiningClass :: Char -> Int"
7167
, "combiningClass = \\case"
7268
, unlinesBB (reverse combiningClasses)
7369
, " _ -> 0\n"
7470
, ""
75-
, genBitmapShamochu
76-
"isCombining"
77-
(NE.singleton 6)
78-
[2,3,4,5,6]
79-
(reverse combiningCodePoints)
71+
, code
8072
]
73+
where
74+
ShamochuCode{..} = genBitmapShamochu
75+
"isCombining"
76+
(NE.singleton 6)
77+
[2,3,4,5,6]
78+
(reverse combiningCodePoints)
8179

8280
genCombiningClassDef c cc = mconcat
8381
[ " "

0 commit comments

Comments
 (0)