Skip to content

Commit 74ed087

Browse files
jdrakens-circle-ci
andauthored
LEXIO-37883 Cogroup by multiple fields (#7)
* cogroup * Version bumped to 0.8.0 Co-authored-by: ns-circle-ci <devops-team+circleci@narrativescience.com>
1 parent 8ee27c3 commit 74ed087

File tree

4 files changed

+51
-7
lines changed

4 files changed

+51
-7
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "pysaql"
3-
version = "0.7.0"
3+
version = "0.8.0"
44
description = "Python SAQL query builder"
55
authors = ["Jonathan Drake <jon.drake@salesforce.com>"]
66
license = "BSD-3-Clause"

pysaql/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Python SAQL query builder"""
22

3-
__version__ = "0.7.0"
3+
__version__ = "0.8.0"

pysaql/stream.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -335,15 +335,17 @@ class CogroupStatement(StreamStatement):
335335
def __init__(
336336
self,
337337
stream: Stream,
338-
streams: Sequence[Tuple[Stream, Scalar]],
338+
streams: Sequence[Tuple[Stream, Union[Scalar, Sequence[Scalar], str]]],
339339
join_type: JoinType = JoinType.inner,
340340
) -> None:
341341
"""Initializer
342342
343343
Args:
344344
stream: Stream containing this statement
345345
streams: List of tuples that each define the stream to combine and the
346-
common field that will be used to combine results
346+
common field(s) that will be used to combine results. If there are no
347+
specific fields to group by, pass "all" as the second item in the stream
348+
tuple.
347349
join_type: Type of join that determines how records are included in the
348350
combined stream
349351
@@ -361,7 +363,18 @@ def __str__(self) -> str:
361363
streams = []
362364
for i, item in enumerate(self.streams):
363365
stream, field_ = item
364-
s = f"{stream.ref} by {field_}"
366+
if isinstance(field_, Scalar):
367+
groups = stringify(field_)
368+
elif field_ == "all":
369+
groups = "all"
370+
elif isinstance(field_, Sequence):
371+
groups = stringify_list(field_)
372+
else:
373+
raise ValueError(
374+
f"Cogroup field type not supported. Provided: {field_}"
375+
)
376+
377+
s = f"{stream.ref} by {groups}"
365378
if i == 0 and self.join_type != JoinType.inner:
366379
s += f" {self.join_type}"
367380

@@ -432,8 +445,9 @@ def cogroup(
432445
"""Combine data from two or more data streams into a single data stream
433446
434447
Args:
435-
streams: Each item is a tuple of the stream to combine and the common field
436-
that will be used to combine results
448+
streams: Each item is a tuple of the stream to combine and the common field(s)
449+
that will be used to combine results. If there are no specific fields to
450+
group by, pass "all" as the second item in the stream tuple.
437451
join_type: Type of join that determines how records are included in the
438452
combined stream. Defaults to JoinType.inner.
439453

tests/unit/test_stream.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,36 @@ def test_cogroup():
4646
]
4747

4848

49+
def test_cogroup__all():
50+
"""Should cogroup by all"""
51+
52+
q0 = load("q0_dataset")
53+
q1 = load("q1_dataset")
54+
55+
c0 = cogroup((q0, "all"), (q1, "all"))
56+
57+
assert str(c0).split("\n") == [
58+
"""q0 = load "q0_dataset";""",
59+
"""q1 = load "q1_dataset";""",
60+
"""q2 = cogroup q0 by all, q1 by all;""",
61+
]
62+
63+
64+
def test_cogroup__multiple():
65+
"""Should cogroup by multiple fields"""
66+
67+
q0 = load("q0_dataset")
68+
q1 = load("q1_dataset")
69+
70+
c0 = cogroup((q0, [field("a"), field("b")]), (q1, [field("a"), field("b")]))
71+
72+
assert str(c0).split("\n") == [
73+
"""q0 = load "q0_dataset";""",
74+
"""q1 = load "q1_dataset";""",
75+
"""q2 = cogroup q0 by ('a', 'b'), q1 by ('a', 'b');""",
76+
]
77+
78+
4979
def test_foreach__invalid():
5080
"""Should raise when no fields provided"""
5181
stream = Stream()

0 commit comments

Comments
 (0)