@@ -12,13 +12,15 @@ module UCD2Haskell.Generator
1212 , moduleToFileName
1313 , dirFromFileName
1414 -- * Bitmap
15+ , BitmapType (.. )
1516 , genBitmap
1617 , genEnumBitmap
1718 , bitMapToAddrLiteral
1819 , enumMapToAddrLiteral
1920 , chunkAddrLiteral
2021 , word32ToWord8s
2122 , splitPlanes
23+ , genBitmapShamochu
2224 , genEnumBitmapShamochu
2325 , generateShamochuBitmaps
2426 , toLookupBitMapName
@@ -29,6 +31,8 @@ module UCD2Haskell.Generator
2931 , apacheLicense
3032 ) where
3133
34+ import Control.Exception (assert )
35+ import Data.Bifunctor (Bifunctor (.. ))
3236import Data.Bits (Bits (.. ))
3337import qualified Data.ByteString as B
3438import qualified Data.ByteString.Builder as BB
@@ -185,7 +189,7 @@ genBitmap funcName ordList = mconcat
185189 , showPaddedHeXB (minimum ordList)
186190 , " && cp <= 0x"
187191 , showPaddedHeXB (maximum ordList)
188- , " && lookupBit64 bitmap# cp\n "
192+ , " && lookupBit bitmap# cp\n "
189193 , " where\n " ]
190194 , rawBitmap )
191195 -- Planes 0-3 and 14
@@ -203,17 +207,21 @@ genBitmap funcName ordList = mconcat
203207 , " = False\n " ]
204208 else " "
205209 , " | cp < 0x" , showPaddedHeXB bound1
206- , " = lookupBit64 bitmap# cp\n "
210+ , " = lookupBit bitmap# cp\n "
207211 , " | cp < 0xE0000 = False\n "
208212 , " | cp < 0x" , showPaddedHeXB bound2
209- , " = lookupBit64 bitmap# (cp - 0x"
213+ , " = lookupBit bitmap# (cp - 0x"
210214 , showPaddedHeXB (0xE0000 - bound1)
211215 , " )\n "
212216 , " | otherwise = False\n "
213217 , " where\n "
214218 , " cp = ord c\n " ]
215219 , planes0To3 <> plane14 )
216220
221+ {-|
222+ >>> positionsToBitMap [0, 3, 8]
223+ [True,False,False,True,False,False,False,False,True]
224+ -}
217225positionsToBitMap :: [Int ] -> [Bool ]
218226positionsToBitMap = go 0
219227
@@ -450,6 +458,83 @@ word32ToWord8s n = (\k -> fromIntegral ((n `shiftR` k) .&. 0xff)) <$> [0,8..24]
450458-- Bitmaps: Shamochu algorithm
451459--------------------------------------------------------------------------------
452460
461+ genBitmapShamochu ::
462+ HasCallStack
463+ => String
464+ -> NE. NonEmpty Word
465+ -- ^ Chunk size stage 1
466+ -> [Word ]
467+ -- ^ Chunk size stage 2
468+ -> [Int ]
469+ -> BB. Builder
470+ genBitmapShamochu funcNameStr stage1 stage2 ordList = mconcat
471+ [ " {-# INLINE " <> funcName <> " #-}\n "
472+ , funcName, " :: Char -> Bool\n "
473+ , funcName, func
474+ , " \n "
475+ , generateShamochuBitmaps funcNameStr False BitMap stage1 stage2 id (packBits bitmap)
476+ ]
477+ where
478+ funcName = BB. string7 funcNameStr
479+ rawBitmap = positionsToBitMap ordList
480+ lookupFunc = toLookupBitMapName funcNameStr
481+ (func, bitmap) = if length rawBitmap <= 0x40000
482+ -- Only planes 0-3
483+ then
484+ ( mconcat
485+ [ " c = c >= '\\ x"
486+ , showPaddedHeXB (minimum ordList)
487+ , " ' && c <= '\\ x"
488+ , showPaddedHeXB (maximum ordList)
489+ , " ' && " , lookupFunc, " (ord c)\n " ]
490+ , rawBitmap )
491+ -- Planes 0-3 and 14
492+ else
493+ let (planes0To3, plane14) = splitPlanes " genBitmap: cannot build" not rawBitmap
494+ bound0 = pred (minimum ordList)
495+ bound1 = length planes0To3
496+ bound2 = 0xE0000 + length plane14
497+ in ( mconcat
498+ [ " c\n "
499+ , if bound0 > 0
500+ then mconcat
501+ [ " | c < '\\ x"
502+ , showPaddedHeXB bound0
503+ , " ' = False\n " ]
504+ else " "
505+ , " | c < '\\ x" , showPaddedHeXB bound1
506+ , " ' = " , lookupFunc, " (ord c)\n "
507+ , " | c < '\\ xE0000' = False\n "
508+ , " | c < '\\ x" , showPaddedHeXB bound2
509+ , " ' = " , lookupFunc, " (ord c - 0x"
510+ , showPaddedHeXB (0xE0000 - bound1)
511+ , " )\n "
512+ , " | otherwise = False\n "
513+ ]
514+ , planes0To3 <> plane14 )
515+
516+ {-|
517+ >>> packBits [True, False, False, False, False, False, False, False, False, True]
518+ [1,2]
519+ -}
520+ packBits :: [Bool ] -> [Word8 ]
521+ packBits = L. unfoldr go
522+ where
523+ go :: [Bool ] -> Maybe (Word8 , [Bool ])
524+ go [] = Nothing
525+ go xs = Just . first pack . splitAt 8 $ xs
526+
527+ pack :: [Bool ] -> Word8
528+ pack = toByte . padTo8
529+
530+ padTo8 :: [Bool ] -> [Bool ]
531+ padTo8 xs
532+ | length xs >= 8 = xs
533+ | otherwise = xs <> replicate (8 - length xs) False
534+
535+ toByte :: [Bool ] -> Word8
536+ toByte xs = sum $ map (\ i -> if xs !! i then 1 `shiftL` i else 0 ) [0 .. 7 ]
537+
453538genEnumBitmapShamochu
454539 :: forall a . (HasCallStack , Bounded a , Enum a , Eq a , Show a )
455540 => String
@@ -477,7 +562,7 @@ genEnumBitmapShamochu funcNameStr rawInt stage1 stage2 convert (defPUA, pPUA) (d
477562 , funcName, " :: Char -> Int" , rawSuffix, " \n "
478563 , funcName, func
479564 , " \n "
480- , generateShamochuBitmaps funcNameStr rawInt stage1 stage2 convert bitmap
565+ , generateShamochuBitmaps funcNameStr rawInt ByteMap stage1 stage2 convert bitmap
481566 ]
482567 where
483568 rawSuffix = if rawInt then " #" else " "
@@ -532,24 +617,44 @@ genEnumBitmapShamochu funcNameStr rawInt stage1 stage2 convert (defPUA, pPUA) (d
532617 , " " , if rawInt then " !cp@(I# cp#)" else " cp" , " = ord c\n " ]
533618 , planes0To3' <> plane14' )
534619
620+ data BitmapType = BitMap | ByteMap
535621generateShamochuBitmaps ::
536- String -> Bool -> NE. NonEmpty Word -> [Word ] -> (a -> Word8 ) -> [a ] -> BB. Builder
537- generateShamochuBitmaps name rawInt powersStage1 powersStage2 convert xs =
538- case Shamochu. compress powersStage1 powersStage2 (Exts. fromList (convert <$> xs)) of
622+ -- | Name of the function
623+ String ->
624+ -- | Use raw 'Int#' if true
625+ Bool ->
626+ -- | Type
627+ BitmapType ->
628+ -- | Chunk sizes stage 1
629+ NE. NonEmpty Word ->
630+ -- | Chunk sizes stage 2
631+ [Word ] ->
632+ -- | Conversion function
633+ (a -> Word8 ) ->
634+ -- | Input
635+ [a ] ->
636+ BB. Builder
637+ generateShamochuBitmaps name rawInt mapType powersStage1 powersStage2 convert xs =
638+ case Shamochu. compress powersStage1 powersStage2 xs' of
539639 Shamochu. OneStage {.. } -> trace' " OneStage" stats $ mconcat
540640 [ " {-# INLINE " , toLookupBitMapName name, " #-}\n "
541- , toLookupBitMapName name, " :: Int" , rawSuffix, " -> Int " , rawSuffix , " \n "
641+ , toLookupBitMapName name, " :: Int" , rawSuffix, " -> " , outputType , " \n "
542642 , toLookupBitMapName name, " n =\n "
543643 -- Lookup:
544644 -- mask = (1 << chunk_size_log2) - 1;
545645 -- original[i] = data[offsets[i >> chunk_size_log2] + (i & mask)];
546- , mkLookup (Shamochu. dataIntSize stats) " data" 1 . mconcat $
547- [ mkLookup (Shamochu. offsets1IntSize stats) " offsets" 2 $
548- mkIndent 3 <> mkShiftR " n" (Shamochu. dataChunkSizeLog2 stats)
549- , mkAnd " n" " mask" ]
646+ , case mapType of
647+ BitMap -> mkBitLookup " data" 1 . mconcat $
648+ [ mkWordLookup (Shamochu. offsets1IntSize stats) " offsets" 2 $
649+ mkIndent 3 <> mkShiftR " n" (3 + Shamochu. dataChunkSizeLog2 stats)
650+ , mkAnd (mkShiftR' " n" 3 ) " mask" ]
651+ ByteMap -> mkWordLookup (Shamochu. dataIntSize stats) " data" 1 . mconcat $
652+ [ mkWordLookup (Shamochu. offsets1IntSize stats) " offsets" 2 $
653+ mkIndent 3 <> mkShiftR " n" (Shamochu. dataChunkSizeLog2 stats)
654+ , mkAnd " n" " mask" ]
550655 , " \n "
551656 , " where\n "
552- , " " , mkMask " mask" (Shamochu. dataChunkSizeLog2 stats)
657+ , " " , mkMaskDef " mask" (Shamochu. dataChunkSizeLog2 stats)
553658 , " !(Ptr data#) = " , dataBitMap, " \n "
554659 , " !(Ptr offsets#) = " , offsetsBitMap, " \n "
555660 , " \n "
@@ -559,7 +664,7 @@ generateShamochuBitmaps name rawInt powersStage1 powersStage2 convert xs =
559664 4
560665 50
561666 (Shamochu. dataIntSize stats `shiftR` 3 )
562- (Exts. toList array)
667+ (pad ( Exts. toList array) )
563668 " \" #\n "
564669 , " \n "
565670 , offsetsBitMap, " :: Ptr " , offsetType, " \n "
@@ -579,30 +684,39 @@ generateShamochuBitmaps name rawInt powersStage1 powersStage2 convert xs =
579684 offsetType = " Word" <> BB. wordDec (Shamochu. offsets1IntSize stats)
580685 Shamochu. TwoStages {.. } -> trace' " TwoStages" stats $ mconcat
581686 [ " {-# INLINE " , toLookupBitMapName name, " #-}\n "
582- , toLookupBitMapName name, " :: Int" , rawSuffix, " -> Int " , rawSuffix , " \n "
687+ , toLookupBitMapName name, " :: Int" , rawSuffix, " -> " , outputType , " \n "
583688 , toLookupBitMapName name, " n =\n "
584689 -- Lookup:
585690 -- mask_data = (1 << data_chunk_size_log2) - 1
586691 -- mask_offsets = (1 << offsets_chunk_size_log2) - 1
587692 -- data[
588693 -- offsets1[
589- -- offsets2[ks >> (data_chunk_size_log2 + offsets_chunk_size_log2)] +
590- -- ((ks >> data_chunk_size_log2) & mask_offsets)
694+ -- offsets2[i >> (data_chunk_size_log2 + offsets_chunk_size_log2)] +
695+ -- ((i >> data_chunk_size_log2) & mask_offsets)
591696 -- ] +
592- -- (ks & mask_data)
697+ -- (i & mask_data)
593698 -- ];
594- , mkLookup (Shamochu. dataIntSize stats) " data" 1 . mconcat $
595- [ mkLookup (Shamochu. offsets1IntSize stats) " offsets1" 2 . mconcat $
596- [ mkLookup (Shamochu. offsets2IntSize stats) " offsets2" 3 $
597- mkIndent 4 <>
598- mkShiftR " n" (Shamochu. dataChunkSizeLog2 stats + Shamochu. offsets1ChunkSizeLog2 stats)
599- , mkAnd (" (" <> mkShiftR " n" (Shamochu. dataChunkSizeLog2 stats) <> " )" ) " maskOffsets"
600- ]
601- , mkAnd " n" " maskData" ]
699+ , case mapType of
700+ BitMap -> mkBitLookup " data" 1 . mconcat $
701+ [ mkWordLookup (Shamochu. offsets1IntSize stats) " offsets1" 2 . mconcat $
702+ [ mkWordLookup (Shamochu. offsets2IntSize stats) " offsets2" 3 $
703+ mkIndent 4 <>
704+ mkShiftR " n" (3 + Shamochu. dataChunkSizeLog2 stats + Shamochu. offsets1ChunkSizeLog2 stats)
705+ , mkAnd (mkShiftR' " n" (3 + Shamochu. dataChunkSizeLog2 stats)) " maskOffsets"
706+ ]
707+ , mkAnd (mkShiftR' " n" 3 ) " maskData" ]
708+ ByteMap -> mkWordLookup (Shamochu. dataIntSize stats) " data" 1 . mconcat $
709+ [ mkWordLookup (Shamochu. offsets1IntSize stats) " offsets1" 2 . mconcat $
710+ [ mkWordLookup (Shamochu. offsets2IntSize stats) " offsets2" 3 $
711+ mkIndent 4 <>
712+ mkShiftR " n" (Shamochu. dataChunkSizeLog2 stats + Shamochu. offsets1ChunkSizeLog2 stats)
713+ , mkAnd (mkShiftR' " n" (Shamochu. dataChunkSizeLog2 stats)) " maskOffsets"
714+ ]
715+ , mkAnd " n" " maskData" ]
602716 , " \n "
603717 , " where\n "
604- , " " , mkMask " maskData" (Shamochu. dataChunkSizeLog2 stats)
605- , " " , mkMask " maskOffsets" (Shamochu. offsets1ChunkSizeLog2 stats)
718+ , " " , mkMaskDef " maskData" (Shamochu. dataChunkSizeLog2 stats)
719+ , " " , mkMaskDef " maskOffsets" (Shamochu. offsets1ChunkSizeLog2 stats)
606720 , " !(Ptr data#) = " , dataBitMap, " \n "
607721 , " !(Ptr offsets1#) = " , offsets1BitMap, " \n "
608722 , " !(Ptr offsets2#) = " , offsets2BitMap, " \n "
@@ -613,7 +727,7 @@ generateShamochuBitmaps name rawInt powersStage1 powersStage2 convert xs =
613727 4
614728 50
615729 (Shamochu. dataIntSize stats `shiftR` 3 )
616- (Exts. toList dataArray)
730+ (pad ( Exts. toList dataArray) )
617731 " \" #\n "
618732 , " \n "
619733 , offsets1BitMap, " :: Ptr " , offset1Type, " \n "
@@ -644,6 +758,20 @@ generateShamochuBitmaps name rawInt powersStage1 powersStage2 convert xs =
644758 offset1Type = " Word" <> BB. wordDec (Shamochu. offsets1IntSize stats)
645759 offset2Type = " Word" <> BB. wordDec (Shamochu. offsets2IntSize stats)
646760 where
761+ xs' = Exts. fromList (convert <$> xs)
762+ maxWordBitSizeLog2 = 3
763+ maxWordByteSize = assert
764+ (all (>= maxWordBitSizeLog2) powersStage1) -- Chunks should not cut words
765+ (2 ^ maxWordBitSizeLog2 `div` 8 )
766+ outputType = case mapType of
767+ BitMap -> " Bool"
768+ ByteMap -> if rawInt then " Int#" else " Int"
769+ pad ys = case mapType of
770+ -- Ensure lookupBit read full words at the edge
771+ BitMap -> case rem (length ys) maxWordByteSize of
772+ 0 -> ys
773+ k -> ys <> replicate (maxWordByteSize - k) 0
774+ ByteMap -> ys
647775 rawSuffix = if rawInt then " #" else " "
648776 trace' stages stats = trace $ mconcat
649777 [ " * " , name, " : Shamochu: " , stages, " ; savings: "
@@ -653,25 +781,31 @@ generateShamochuBitmaps name rawInt powersStage1 powersStage2 convert xs =
653781 nameBB = BB. string7 name
654782 mkIndent :: Word -> BB. Builder
655783 mkIndent count = foldMap (const " " ) [1 .. count]
656- mkLookup dataSize addrName indent index = mconcat
784+ mkBitLookup addrName indent index = mconcat
657785 [ mkIndent indent
658- , " lookupWord" , BB. wordDec dataSize, " AsInt" , rawSuffix, " " , addrName, " # (\n "
786+ , " lookupBit" , rawSuffix, " " , addrName, " # (\n "
787+ , index, " \n "
788+ , mkIndent indent
789+ , " ) (n "
790+ -- x % 2^n = x & (2^n - 1)
791+ , if rawInt then " `andI#` 7#)" else " .&. 7)" ]
792+ mkWordLookup dataSize addrName indent index = mconcat
793+ [ mkIndent indent
794+ , " lookupWord" , BB. wordDec dataSize, " AsInt" , rawSuffix
795+ , " " , addrName, " # (\n "
659796 , index, " \n "
660797 , mkIndent indent, " )" ]
661- mkMask mask count = if rawInt
798+ mkMaskDef mask count = if rawInt
662799 then mconcat [mask, " = (1# `iShiftL#` " , BB. wordDec count, " #) -# 1#\n " ]
663800 else mconcat [mask, " = (1 `shiftL` " , BB. wordDec count, " ) - 1\n " ]
801+ mkMask n mask = if rawInt
802+ then mconcat [" (" , n, " `andI#` " , mask, " )" ]
803+ else mconcat [" (" , n, " .&. " , mask, " )" ]
804+ mkAnd n mask = (if rawInt then " +# " else " + " ) <> mkMask n mask
664805 mkShiftR n count = if rawInt
665806 then mconcat [n, " `iShiftRL#` " , BB. wordDec count, " #" ]
666807 else mconcat [n, " `shiftR` " , BB. wordDec count]
667- mkAnd n mask = if rawInt
668- then mconcat [" +# (" , n, " `andI#` " , mask, " )" ]
669- else mconcat [" + (" , n, " .&. " , mask, " )" ]
670-
671- toTitle :: String -> String
672- toTitle = \ case
673- c: cs -> toUpper c : cs
674- cs -> cs
808+ mkShiftR' n count = " (" <> mkShiftR n count <> " )"
675809
676810toLookupBitMapName :: String -> BB. Builder
677811toLookupBitMapName name = " lookup" <> BB. string7 (toTitle name) <> " BitMap"
@@ -680,6 +814,11 @@ toLookupBitMapName name = "lookup" <> BB.string7 (toTitle name) <> "BitMap"
680814-- Helpers
681815--------------------------------------------------------------------------------
682816
817+ toTitle :: String -> String
818+ toTitle = \ case
819+ c: cs -> toUpper c : cs
820+ cs -> cs
821+
683822unlinesBB :: [BB. Builder ] -> BB. Builder
684823unlinesBB = (<> " \n " ) . mconcat . L. intersperse " \n "
685824
0 commit comments