Skip to content

Commit 3883438

Browse files
alexkalderimisAlex Kalderimis
andauthored
Add groupBy and group for vectors (#427)
This adds `groupBy` and `group` for generic vectors, returning a list of slices of the input vector. Specialized versions of groupBy and group are added for: - Data.Vector - Data.Vector.Unboxed - Data.Vector.Storable - Data.Vector.Primitive These functions are all `O(n)`, and do not fuse, but because they return slices of the input, they do not copy. Co-authored-by: Alex Kalderimis <[email protected]>
1 parent dae9d17 commit 3883438

File tree

7 files changed

+209
-5
lines changed

7 files changed

+209
-5
lines changed

vector/changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
[#382](https://github.com/haskell/vector/pull/382)
4545
* Remove redundant `Storable` constraints on to/from `ForeignPtr` conversions
4646
* Add `unsafeCast` to `Primitive` vectors
47+
* Add `groupBy` and `group` for `Data.Vector.Generic` and the specialized
48+
version in `Data.Vector`, `Data.Vector.Unboxed`, `Data.Vector.Storable` and
49+
`Data.Vector.Primitive`.
4750

4851
# Changes in version 0.12.3.1
4952

vector/src/Data/Vector.hs

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ module Data.Vector (
119119
takeWhile, dropWhile,
120120

121121
-- ** Partitioning
122-
partition, unstablePartition, partitionWith, span, break,
122+
partition, unstablePartition, partitionWith, span, break, groupBy, group,
123123

124124
-- ** Searching
125125
elem, notElem, find, findIndex, findIndices, elemIndex, elemIndices,
@@ -1410,6 +1410,46 @@ break :: (a -> Bool) -> Vector a -> (Vector a, Vector a)
14101410
{-# INLINE break #-}
14111411
break = G.break
14121412

1413+
-- | /O(n)/ Split a vector into a list of slices, using a predicate function.
1414+
--
1415+
-- The concatenation of this list of slices is equal to the argument vector,
1416+
-- and each slice contains only equal elements, as determined by the equality
1417+
-- predicate function.
1418+
--
1419+
-- Does not fuse.
1420+
--
1421+
-- >>> import qualified Data.Vector as V
1422+
-- >>> import Data.Char (isUpper)
1423+
-- >>> V.groupBy (\a b -> isUpper a == isUpper b) (V.fromList "Mississippi River")
1424+
-- ["M","ississippi ","R","iver"]
1425+
--
1426+
-- See also 'Data.List.groupBy', 'group'.
1427+
--
1428+
-- @since 0.13.0.1
1429+
groupBy :: (a -> a -> Bool) -> Vector a -> [Vector a]
1430+
{-# INLINE groupBy #-}
1431+
groupBy = G.groupBy
1432+
1433+
-- | /O(n)/ Split a vector into a list of slices of the input vector.
1434+
--
1435+
-- The concatenation of this list of slices is equal to the argument vector,
1436+
-- and each slice contains only equal elements.
1437+
--
1438+
-- Does not fuse.
1439+
--
1440+
-- This is the equivalent of 'groupBy (==)'.
1441+
--
1442+
-- >>> import qualified Data.Vector as V
1443+
-- >>> V.group (V.fromList "Mississippi")
1444+
-- ["M","i","ss","i","ss","i","pp","i"]
1445+
--
1446+
-- See also 'Data.List.group'.
1447+
--
1448+
-- @since 0.13.0.1
1449+
group :: Eq a => Vector a -> [Vector a]
1450+
{-# INLINE group #-}
1451+
group = G.groupBy (==)
1452+
14131453
-- Searching
14141454
-- ---------
14151455

vector/src/Data/Vector/Generic.hs

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ module Data.Vector.Generic (
107107
takeWhile, dropWhile,
108108

109109
-- ** Partitioning
110-
partition, partitionWith, unstablePartition, span, break,
110+
partition, partitionWith, unstablePartition, span, break, groupBy, group,
111111

112112
-- ** Searching
113113
elem, notElem, find, findIndex, findIndexR, findIndices, elemIndex, elemIndices,
@@ -1552,6 +1552,47 @@ break f xs = case findIndex f xs of
15521552
Just i -> (unsafeSlice 0 i xs, unsafeSlice i (length xs - i) xs)
15531553
Nothing -> (xs, empty)
15541554

1555+
-- | /O(n)/ Split a vector into a list of slices.
1556+
--
1557+
-- The concatenation of this list of slices is equal to the argument vector,
1558+
-- and each slice contains only equal elements, as determined by the equality
1559+
-- predicate function.
1560+
--
1561+
-- >>> import qualified Data.Vector as V
1562+
-- >>> import Data.Char (isUpper)
1563+
-- >>> V.groupBy (\a b -> isUpper a == isUpper b) (V.fromList "Mississippi River")
1564+
-- ["M","ississippi ","R","iver"]
1565+
--
1566+
-- See also 'Data.List.groupBy'.
1567+
--
1568+
-- @since 0.13.0.1
1569+
{-# INLINE groupBy #-}
1570+
groupBy :: (Vector v a) => (a -> a -> Bool) -> v a -> [v a]
1571+
groupBy _ v | null v = []
1572+
groupBy f v =
1573+
let h = unsafeHead v
1574+
tl = unsafeTail v
1575+
in case findIndex (not . f h) tl of
1576+
Nothing -> [v]
1577+
Just n -> unsafeTake (n + 1) v : groupBy f (unsafeDrop (n + 1) v)
1578+
1579+
-- | /O(n)/ Split a vector into a list of slices.
1580+
--
1581+
-- The concatenation of this list of slices is equal to the argument vector,
1582+
-- and each slice contains only equal elements.
1583+
--
1584+
-- This is the equivalent of 'groupBy (==)'.
1585+
--
1586+
-- >>> import qualified Data.Vector as V
1587+
-- >>> V.group (V.fromList "Mississippi")
1588+
-- ["M","i","ss","i","ss","i","pp","i"]
1589+
--
1590+
-- See also 'Data.List.group'.
1591+
--
1592+
-- @since 0.13.0.1
1593+
group :: (Vector v a , Eq a) => v a -> [v a]
1594+
{-# INLINE group #-}
1595+
group = groupBy (==)
15551596

15561597
-- Searching
15571598
-- ---------

vector/src/Data/Vector/Primitive.hs

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ module Data.Vector.Primitive (
104104
takeWhile, dropWhile,
105105

106106
-- ** Partitioning
107-
partition, unstablePartition, partitionWith, span, break,
107+
partition, unstablePartition, partitionWith, span, break, groupBy, group,
108108

109109
-- ** Searching
110110
elem, notElem, find, findIndex, findIndices, elemIndex, elemIndices,
@@ -1169,6 +1169,46 @@ break :: Prim a => (a -> Bool) -> Vector a -> (Vector a, Vector a)
11691169
{-# INLINE break #-}
11701170
break = G.break
11711171

1172+
-- | /O(n)/ Split a vector into a list of slices, using a predicate function.
1173+
--
1174+
-- The concatenation of this list of slices is equal to the argument vector,
1175+
-- and each slice contains only equal elements, as determined by the equality
1176+
-- predicate function.
1177+
--
1178+
-- Does not fuse.
1179+
--
1180+
-- >>> import qualified Data.Vector.Primitive as VP
1181+
-- >>> import Data.Char (isUpper)
1182+
-- >>> VP.groupBy (\a b -> isUpper a == isUpper b) (VP.fromList "Mississippi River")
1183+
-- ["M","ississippi ","R","iver"]
1184+
--
1185+
-- See also 'Data.List.groupBy', 'group'.
1186+
--
1187+
-- @since 0.13.0.1
1188+
groupBy :: Prim a => (a -> a -> Bool) -> Vector a -> [Vector a]
1189+
{-# INLINE groupBy #-}
1190+
groupBy = G.groupBy
1191+
1192+
-- | /O(n)/ Split a vector into a list of slices of the input vector.
1193+
--
1194+
-- The concatenation of this list of slices is equal to the argument vector,
1195+
-- and each slice contains only equal elements.
1196+
--
1197+
-- Does not fuse.
1198+
--
1199+
-- This is the equivalent of 'groupBy (==)'.
1200+
--
1201+
-- >>> import qualified Data.Vector.Primitive as VP
1202+
-- >>> VP.group (VP.fromList "Mississippi")
1203+
-- ["M","i","ss","i","ss","i","pp","i"]
1204+
--
1205+
-- See also 'Data.List.group'.
1206+
--
1207+
-- @since 0.13.0.1
1208+
group :: (Prim a, Eq a) => Vector a -> [Vector a]
1209+
{-# INLINE group #-}
1210+
group = G.groupBy (==)
1211+
11721212
-- Searching
11731213
-- ---------
11741214

vector/src/Data/Vector/Storable.hs

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ module Data.Vector.Storable (
101101
takeWhile, dropWhile,
102102

103103
-- ** Partitioning
104-
partition, unstablePartition, partitionWith, span, break,
104+
partition, unstablePartition, partitionWith, span, break, groupBy, group,
105105

106106
-- ** Searching
107107
elem, notElem, find, findIndex, findIndices, elemIndex, elemIndices,
@@ -1190,6 +1190,46 @@ break :: Storable a => (a -> Bool) -> Vector a -> (Vector a, Vector a)
11901190
{-# INLINE break #-}
11911191
break = G.break
11921192

1193+
-- | /O(n)/ Split a vector into a list of slices, using a predicate function.
1194+
--
1195+
-- The concatenation of this list of slices is equal to the argument vector,
1196+
-- and each slice contains only equal elements, as determined by the equality
1197+
-- predicate function.
1198+
--
1199+
-- Does not fuse.
1200+
--
1201+
-- >>> import qualified Data.Vector.Storable as VS
1202+
-- >>> import Data.Char (isUpper)
1203+
-- >>> VS.groupBy (\a b -> isUpper a == isUpper b) (VS.fromList "Mississippi River")
1204+
-- ["M","ississippi ","R","iver"]
1205+
--
1206+
-- See also 'Data.List.groupBy', 'group'.
1207+
--
1208+
-- @since 0.13.0.1
1209+
groupBy :: Storable a => (a -> a -> Bool) -> Vector a -> [Vector a]
1210+
{-# INLINE groupBy #-}
1211+
groupBy = G.groupBy
1212+
1213+
-- | /O(n)/ Split a vector into a list of slices of the input vector.
1214+
--
1215+
-- The concatenation of this list of slices is equal to the argument vector,
1216+
-- and each slice contains only equal elements.
1217+
--
1218+
-- Does not fuse.
1219+
--
1220+
-- This is the equivalent of 'groupBy (==)'.
1221+
--
1222+
-- >>> import qualified Data.Vector.Storable as VS
1223+
-- >>> VS.group (VS.fromList "Mississippi")
1224+
-- ["M","i","ss","i","ss","i","pp","i"]
1225+
--
1226+
-- See also 'Data.List.group'.
1227+
--
1228+
-- @since 0.13.0.1
1229+
group :: (Storable a, Eq a) => Vector a -> [Vector a]
1230+
{-# INLINE group #-}
1231+
group = G.groupBy (==)
1232+
11931233
-- Searching
11941234
-- ---------
11951235

vector/src/Data/Vector/Unboxed.hs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ module Data.Vector.Unboxed (
146146
takeWhile, dropWhile,
147147

148148
-- ** Partitioning
149-
partition, unstablePartition, partitionWith, span, break,
149+
partition, unstablePartition, partitionWith, span, break, groupBy, group,
150150

151151
-- ** Searching
152152
elem, notElem, find, findIndex, findIndices, elemIndex, elemIndices,
@@ -1182,6 +1182,44 @@ break :: Unbox a => (a -> Bool) -> Vector a -> (Vector a, Vector a)
11821182
{-# INLINE break #-}
11831183
break = G.break
11841184

1185+
-- | /O(n)/ Split a vector into a list of slices, using a predicate function.
1186+
--
1187+
-- The concatenation of this list of slices is equal to the argument vector,
1188+
-- and each slice contains only equal elements, as determined by the equality
1189+
-- predicate function.
1190+
--
1191+
-- Does not fuse.
1192+
--
1193+
-- >>> import qualified Data.Vector.Unboxed as VU
1194+
-- >>> import Data.Char (isUpper)
1195+
-- >>> VU.groupBy (\a b -> isUpper a == isUpper b) (VU.fromList "Mississippi River")
1196+
-- ["M","ississippi ","R","iver"]
1197+
--
1198+
-- See also 'Data.List.groupBy', 'group'.
1199+
--
1200+
-- @since 0.13.0.1
1201+
groupBy :: Unbox a => (a -> a -> Bool) -> Vector a -> [Vector a]
1202+
groupBy = G.groupBy
1203+
1204+
-- | /O(n)/ Split a vector into a list of slices of the input vector.
1205+
--
1206+
-- The concatenation of this list of slices is equal to the argument vector,
1207+
-- and each slice contains only equal elements.
1208+
--
1209+
-- Does not fuse.
1210+
--
1211+
-- This is the equivalent of 'groupBy (==)'.
1212+
--
1213+
-- >>> import qualified Data.Vector.Unboxed as VU
1214+
-- >>> VU.group (VU.fromList "Mississippi")
1215+
-- ["M","i","ss","i","ss","i","pp","i"]
1216+
--
1217+
-- See also 'Data.List.group'.
1218+
--
1219+
-- @since 0.13.0.1
1220+
group :: (Unbox a, Eq a) => Vector a -> [Vector a]
1221+
group = G.groupBy (==)
1222+
11851223
-- Searching
11861224
-- ---------
11871225

vector/tests/Tests/Vector/Property.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ testPolymorphicFunctions _ = $(testProperties [
169169
'prop_partition, {- 'prop_unstablePartition, -}
170170
'prop_partitionWith,
171171
'prop_span, 'prop_break,
172+
'prop_groupBy,
172173

173174
-- Searching
174175
'prop_elem, 'prop_notElem,
@@ -334,6 +335,7 @@ testPolymorphicFunctions _ = $(testProperties [
334335
= V.partitionWith `eq` partitionWith
335336
prop_span :: P ((a -> Bool) -> v a -> (v a, v a)) = V.span `eq` span
336337
prop_break :: P ((a -> Bool) -> v a -> (v a, v a)) = V.break `eq` break
338+
prop_groupBy :: P ((a -> a -> Bool) -> v a -> [v a]) = V.groupBy `eq` groupBy
337339

338340
prop_elem :: P (a -> v a -> Bool) = V.elem `eq` elem
339341
prop_notElem :: P (a -> v a -> Bool) = V.notElem `eq` notElem

0 commit comments

Comments
 (0)