Skip to content

Commit 634c565

Browse files
committed
Employ lexicographical comparison for compare
1 parent ce9916f commit 634c565

File tree

3 files changed

+31
-28
lines changed

3 files changed

+31
-28
lines changed

src/Data/Text.hs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -411,17 +411,11 @@ textDataType = mkDataType "Data.Text.Text" [packConstr]
411411

412412
-- | /O(n)/ Compare two 'Text' values lexicographically.
413413
compareText :: Text -> Text -> Ordering
414-
compareText ta@(Text _arrA _offA lenA) tb@(Text _arrB _offB lenB)
415-
| lenA == 0 && lenB == 0 = EQ
416-
| otherwise = go 0 0
417-
where
418-
go !i !j
419-
| i >= lenA || j >= lenB = compare lenA lenB
420-
| a < b = LT
421-
| a > b = GT
422-
| otherwise = go (i+di) (j+dj)
423-
where Iter a di = iter ta i
424-
Iter b dj = iter tb j
414+
compareText (Text arrA offA lenA) (Text arrB offB lenB) =
415+
A.compare arrA offA arrB offB (min lenA lenB) <> compare lenA lenB
416+
-- This is not a mistake: on contrary to UTF-16 (https://github.com/haskell/text/pull/208),
417+
-- lexicographic ordering of UTF-8 encoded strings matches lexicographic ordering
418+
-- of underlying bytearrays, no decoding is needed.
425419

426420
-- -----------------------------------------------------------------------------
427421
-- * Conversion to/from 'Text'

src/Data/Text/Array.hs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ module Data.Text.Array
3333
, copyI
3434
, empty
3535
, equal
36+
, compare
3637
, run
3738
, run2
3839
, toList
@@ -56,7 +57,8 @@ import Foreign.C.Types (CInt(..))
5657
import GHC.Exts hiding (toList)
5758
import GHC.ST (ST(..), runST)
5859
import GHC.Word (Word8(..))
59-
import Prelude hiding (length, read)
60+
import qualified Prelude
61+
import Prelude hiding (length, read, compare)
6062

6163
-- | Immutable array type.
6264
data Array = ByteArray ByteArray#
@@ -250,13 +252,23 @@ copyI count@(I# count#) (MutableByteArray dst#) dstOff@(I# dstOff#) (ByteArray s
250252

251253
-- | Compare portions of two arrays for equality. No bounds checking
252254
-- is performed.
253-
equal :: Array -- ^ First
255+
equal :: Array -> Int -> Array -> Int -> Int -> Bool
256+
equal src1 off1 src2 off2 count = compareInternal src1 off1 src2 off2 count == 0
257+
{-# INLINE equal #-}
258+
259+
-- | Compare portions of two arrays. No bounds checking is performed.
260+
compare :: Array -> Int -> Array -> Int -> Int -> Ordering
261+
compare src1 off1 src2 off2 count = compareInternal src1 off1 src2 off2 count `Prelude.compare` 0
262+
{-# INLINE compare #-}
263+
264+
compareInternal
265+
:: Array -- ^ First
254266
-> Int -- ^ Offset into first
255267
-> Array -- ^ Second
256268
-> Int -- ^ Offset into second
257269
-> Int -- ^ Count
258-
-> Bool
259-
equal (ByteArray src1#) (I# off1#) (ByteArray src2#) (I# off2#) (I# count#) = i == 0
270+
-> Int
271+
compareInternal (ByteArray src1#) (I# off1#) (ByteArray src2#) (I# off2#) (I# count#) = i
260272
where
261273
#if MIN_VERSION_base(4,11,0)
262274
i = I# (compareByteArrays# src1# off1# src2# off2# count#)
@@ -266,4 +278,4 @@ equal (ByteArray src1#) (I# off1#) (ByteArray src2#) (I# off2#) (I# count#) = i
266278
foreign import ccall unsafe "_hs_text_memcmp" memcmp
267279
:: ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> IO CInt
268280
#endif
269-
{-# INLINE equal #-}
281+
{-# INLINE compareInternal #-}

src/Data/Text/Lazy.hs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ import Data.Monoid (Monoid(..))
215215
import Data.Semigroup (Semigroup(..))
216216
import Data.String (IsString(..))
217217
import qualified Data.Text as T
218+
import qualified Data.Text.Array as A
218219
import qualified Data.Text.Internal as T
219220
import qualified Data.Text.Internal.Fusion.Common as S
220221
import qualified Data.Text.Unsafe as T
@@ -286,18 +287,14 @@ compareText :: Text -> Text -> Ordering
286287
compareText Empty Empty = EQ
287288
compareText Empty _ = LT
288289
compareText _ Empty = GT
289-
compareText (Chunk a0 as) (Chunk b0 bs) = outer a0 b0
290-
where
291-
outer ta@(T.Text arrA offA lenA) tb@(T.Text arrB offB lenB) = go 0 0
292-
where
293-
go !i !j
294-
| i >= lenA = compareText as (chunk (T.Text arrB (offB+j) (lenB-j)) bs)
295-
| j >= lenB = compareText (chunk (T.Text arrA (offA+i) (lenA-i)) as) bs
296-
| a < b = LT
297-
| a > b = GT
298-
| otherwise = go (i+di) (j+dj)
299-
where T.Iter a di = T.iter ta i
300-
T.Iter b dj = T.iter tb j
290+
compareText (Chunk (T.Text arrA offA lenA) as) (Chunk (T.Text arrB offB lenB) bs) =
291+
A.compare arrA offA arrB offB (min lenA lenB) <> case lenA `compare` lenB of
292+
LT -> compareText as (Chunk (T.Text arrB (offB + lenA) (lenB - lenA)) bs)
293+
EQ -> compareText as bs
294+
GT -> compareText (Chunk (T.Text arrA (offA + lenB) (lenA - lenB)) as) bs
295+
-- This is not a mistake: on contrary to UTF-16 (https://github.com/haskell/text/pull/208),
296+
-- lexicographic ordering of UTF-8 encoded strings matches lexicographic ordering
297+
-- of underlying bytearrays, no decoding is needed.
301298

302299
instance Show Text where
303300
showsPrec p ps r = showsPrec p (unpack ps) r

0 commit comments

Comments
 (0)