@@ -30,24 +30,30 @@ import qualified Data.Bits
3030
3131-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
3232utf8Encode :: Char -> [Word8 ]
33- utf8Encode = map fromIntegral . go . ord
33+ utf8Encode = uncurry (:) . utf8Encode'
34+
35+ utf8Encode' :: Char -> (Word8 , [Word8 ])
36+ utf8Encode' c = case go (ord c) of
37+ (x, xs) -> (fromIntegral x, map fromIntegral xs)
3438 where
3539 go oc
36- | oc <= 0x7f = [oc]
40+ | oc <= 0x7f = ( oc
41+ , [
42+ ])
3743
38- | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6 )
39- , 0x80 + oc Data.Bits. .&. 0x3f
40- ]
44+ | oc <= 0x7ff = ( 0xc0 + (oc `Data.Bits.shiftR` 6 )
45+ , [ 0x80 + oc Data.Bits. .&. 0x3f
46+ ])
4147
42- | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12 )
43- , 0x80 + ((oc `Data.Bits.shiftR` 6 ) Data.Bits. .&. 0x3f )
48+ | oc <= 0xffff = ( 0xe0 + (oc `Data.Bits.shiftR` 12 )
49+ , [ 0x80 + ((oc `Data.Bits.shiftR` 6 ) Data.Bits. .&. 0x3f )
4450 , 0x80 + oc Data.Bits. .&. 0x3f
45- ]
46- | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18 )
47- , 0x80 + ((oc `Data.Bits.shiftR` 12 ) Data.Bits. .&. 0x3f )
51+ ])
52+ | otherwise = ( 0xf0 + (oc `Data.Bits.shiftR` 18 )
53+ , [ 0x80 + ((oc `Data.Bits.shiftR` 12 ) Data.Bits. .&. 0x3f )
4854 , 0x80 + ((oc `Data.Bits.shiftR` 6 ) Data.Bits. .&. 0x3f )
4955 , 0x80 + oc Data.Bits. .&. 0x3f
50- ]
56+ ])
5157
5258#endif
5359
@@ -72,8 +78,8 @@ alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
7278alexGetByte (p,c,(b: bs),s) = Just (b,(p,c,bs,s))
7379alexGetByte (_,_,[] ,[] ) = Nothing
7480alexGetByte (p,_,[] ,(c: s)) = let p' = alexMove p c
75- (b : bs) = utf8Encode c
76- in p' `seq` Just (b, (p', c, bs, s))
81+ in case utf8Encode' c of
82+ (b, bs) -> p' `seq` Just (b, (p', c, bs, s))
7783#endif
7884
7985#if defined(ALEX_POSN_BYTESTRING) || defined(ALEX_MONAD_BYTESTRING)
@@ -334,9 +340,8 @@ alexScanTokens str = go ('\n',[],str)
334340alexGetByte :: AlexInput -> Maybe (Byte ,AlexInput )
335341alexGetByte (c,(b: bs),s) = Just (b,(c,bs,s))
336342alexGetByte (_,[] ,[] ) = Nothing
337- alexGetByte (_,[] ,(c: s)) = case utf8Encode c of
338- (b: bs) -> Just (b, (c, bs, s))
339- [] -> Nothing
343+ alexGetByte (_,[] ,(c: s)) = case utf8Encode' c of
344+ (b, bs) -> Just (b, (c, bs, s))
340345#endif
341346
342347
0 commit comments