|
1 | | -{-# LANGUAGE BangPatterns, ScopedTypeVariables #-} |
| 1 | +{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-} |
2 | 2 | {-# LANGUAGE MagicHash, UnboxedTuples, PatternGuards #-} |
3 | 3 | {-# OPTIONS_GHC -fno-warn-unused-imports #-} |
4 | 4 | {-# LANGUAGE Trustworthy #-} |
@@ -469,6 +469,7 @@ import Data.ByteString.Builder.Prim.ASCII |
469 | 469 | import Foreign |
470 | 470 | import Foreign.C.Types |
471 | 471 | import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr) |
| 472 | +import GHC.Int (Int (..)) |
472 | 473 | import GHC.Word (Word8 (..)) |
473 | 474 | import GHC.Exts |
474 | 475 | import GHC.IO |
@@ -672,50 +673,87 @@ primMapLazyByteStringBounded w = |
672 | 673 | -- |
673 | 674 | -- @since 0.11.0.0 |
674 | 675 | cstring :: Addr# -> Builder |
675 | | -cstring = |
676 | | - \addr0 -> builder $ step addr0 |
677 | | - where |
678 | | - step :: Addr# -> BuildStep r -> BuildStep r |
679 | | - step !addr !k br@(BufferRange op0@(Ptr op0#) ope) |
680 | | - | W8# ch == 0 = k br |
681 | | - | op0 == ope = |
682 | | - return $ bufferFull 1 op0 (step addr k) |
683 | | - | otherwise = do |
684 | | - IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of |
685 | | - s' -> (# s', () #) |
686 | | - let br' = BufferRange (op0 `plusPtr` 1) ope |
687 | | - step (addr `plusAddr#` 1#) k br' |
688 | | - where |
689 | | - !ch = indexWord8OffAddr# addr 0# |
| 676 | +cstring = \addr0 -> builder $ \k br -> do |
| 677 | +#if __GLASGOW_HASKELL__ >= 811 |
| 678 | + let len = cstringLength# addr0 |
| 679 | +#else |
| 680 | + (I# len) <- fromIntegral <$> S.c_strlen (Ptr addr0) |
| 681 | +#endif |
| 682 | + cstring_step addr0 len k br |
| 683 | +{-# INLINE cstring #-} |
| 684 | + |
| 685 | +cstring_step :: Addr# -> Int# -> BuildStep r -> BuildStep r |
| 686 | +cstring_step !addr !len !k br@(BufferRange op0 ope) |
| 687 | + -- String is empty, process the continuation |
| 688 | + | (I# len) == 0 = k br |
| 689 | + -- Buffer is full, allocate some more... We ask for just one more |
| 690 | + -- byte, but the builder allocation strategy will in practice give |
| 691 | + -- us more space, which we'll consume in a single step. |
| 692 | + | op0 == ope = |
| 693 | + return $ bufferFull 1 op0 (cstring_step addr len k) |
| 694 | + -- Copy as much of the string as fits into the available buffer space. |
| 695 | + -- If the string is long enough, we may have asked for less than its |
| 696 | + -- full length, filling the buffer with the rest will go into the next |
| 697 | + -- builder step. |
| 698 | + | otherwise = do |
| 699 | + let !avail@(I# avail#) = min (I# len) (ope `minusPtr` op0) |
| 700 | + br' = BufferRange (op0 `plusPtr` avail) ope |
| 701 | + addr' = addr `plusAddr#` avail# |
| 702 | + len' = len -# avail# |
| 703 | + S.memcpy op0 (Ptr addr) avail |
| 704 | + cstring_step addr' len' k br' |
690 | 705 |
|
691 | 706 | -- | A null-terminated UTF-8 encoded 'Foreign.C.String.CString'. |
692 | 707 | -- Null characters can be encoded as @0xc0 0x80@. |
693 | 708 | -- |
694 | 709 | -- @since 0.11.0.0 |
695 | 710 | cstringUtf8 :: Addr# -> Builder |
696 | | -cstringUtf8 = |
697 | | - \addr0 -> builder $ step addr0 |
698 | | - where |
699 | | - step :: Addr# -> BuildStep r -> BuildStep r |
700 | | - step !addr !k br@(BufferRange op0@(Ptr op0#) ope) |
701 | | - | W8# ch == 0 = k br |
702 | | - | op0 == ope = |
703 | | - return $ bufferFull 1 op0 (step addr k) |
704 | | - -- NULL is encoded as 0xc0 0x80 |
705 | | - | W8# ch == 0xc0 |
706 | | - , W8# (indexWord8OffAddr# addr 1#) == 0x80 = do |
707 | | - let !(W8# nullByte#) = 0 |
708 | | - IO $ \s -> case writeWord8OffAddr# op0# 0# nullByte# s of |
709 | | - s' -> (# s', () #) |
710 | | - let br' = BufferRange (op0 `plusPtr` 1) ope |
711 | | - step (addr `plusAddr#` 2#) k br' |
712 | | - | otherwise = do |
713 | | - IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of |
714 | | - s' -> (# s', () #) |
715 | | - let br' = BufferRange (op0 `plusPtr` 1) ope |
716 | | - step (addr `plusAddr#` 1#) k br' |
717 | | - where |
718 | | - !ch = indexWord8OffAddr# addr 0# |
| 711 | +cstringUtf8 = \addr0 -> builder $ \k br -> do |
| 712 | +#if __GLASGOW_HASKELL__ >= 811 |
| 713 | + let len = cstringLength# addr0 |
| 714 | +#else |
| 715 | + (I# len) <- fromIntegral <$> S.c_strlen (Ptr addr0) |
| 716 | +#endif |
| 717 | + nullAt <- S.c_strstr (Ptr addr0) (Ptr "\xc0\x80"#) |
| 718 | + cstringUtf8_step addr0 len nullAt k br |
| 719 | +{-# INLINE cstringUtf8 #-} |
| 720 | + |
| 721 | +cstringUtf8_step :: Addr# -> Int# -> Ptr Word8 -> BuildStep r -> BuildStep r |
| 722 | +cstringUtf8_step !addr !len !nullAt !k br@(BufferRange op0@(Ptr op0#) ope) |
| 723 | + -- String is empty, process the continuation |
| 724 | + | (I# len) == 0 = k br |
| 725 | + -- Contains no encoded nulls, use simpler 'cstring' code |
| 726 | + | nullPtr == nullAt = |
| 727 | + cstring_step addr len k br |
| 728 | + -- Buffer is full, allocate some more... We ask for just one more |
| 729 | + -- byte, but the builder allocation strategy will in practice give |
| 730 | + -- us more space, which we'll consume in a single step. |
| 731 | + | op0 == ope = |
| 732 | + return $ bufferFull 1 op0 (cstringUtf8_step addr len nullAt k) |
| 733 | + -- We're at the encoded null-byte, append a '\0' to the buffer and |
| 734 | + -- continue with the rest of the input string, after locating the |
| 735 | + -- next encoded null-byte, if any. |
| 736 | + | (Ptr addr) == nullAt = do |
| 737 | + let !(W8# nullByte#) = 0 |
| 738 | + IO $ \s -> case writeWord8OffAddr# op0# 0# nullByte# s of |
| 739 | + s' -> (# s', () #) |
| 740 | + let br' = BufferRange (op0 `plusPtr` 1) ope |
| 741 | + addr' = addr `plusAddr#` 2# |
| 742 | + len' = len -# 2# |
| 743 | + nullAt' <- S.c_strstr (Ptr addr') (Ptr "\xc0\x80"#) |
| 744 | + cstringUtf8_step addr' len' nullAt' k br' |
| 745 | + -- Copy as much of the null-free portion of the string as fits into the |
| 746 | + -- available buffer space. If the string is long enough, we may have asked |
| 747 | + -- for less than its full length, filling the buffer with the rest will go |
| 748 | + -- into the next builder step. |
| 749 | + | otherwise = do |
| 750 | + let !nullFree = nullAt `minusPtr` (Ptr addr) |
| 751 | + !avail@(I# avail#) = min nullFree (ope `minusPtr` op0) |
| 752 | + br' = BufferRange (op0 `plusPtr` avail) ope |
| 753 | + addr' = addr `plusAddr#` avail# |
| 754 | + len' = len -# avail# |
| 755 | + S.memcpy op0 (Ptr addr) avail |
| 756 | + cstringUtf8_step addr' len' nullAt k br' |
719 | 757 |
|
720 | 758 | ------------------------------------------------------------------------------ |
721 | 759 | -- Char8 encoding |
|
0 commit comments