Skip to content

Commit e7cee98

Browse files
KamilPiechowiakManul from Pathway
authored andcommitted
allow to set instance in diff operator (#7634)
GitOrigin-RevId: 9c968d8a6b7a816efbd271a5b60d7ca4e81e71f2
1 parent 0dfeb04 commit e7cee98

File tree

3 files changed

+65
-2
lines changed

3 files changed

+65
-2
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
88
### Added
99
- `pw.io.kafka.read` now supports reading entries starting from a specified timestamp.
1010

11+
### Changed
12+
- `pw.Table.diff` now supports setting `instance` parameter that allows computing differences for multiple groups.
13+
1114
## [0.15.3] - 2024-11-07
1215

1316
### Added

python/pathway/stdlib/ordered/diff.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ def diff(
1111
self: pw.Table,
1212
timestamp: pw.ColumnReference,
1313
*values: pw.ColumnReference,
14+
instance: pw.ColumnReference | None = None,
1415
) -> pw.Table:
1516
"""
1617
Compute the difference between the values in the ``values`` columns and the previous values
@@ -22,6 +23,9 @@ def diff(
2223
The column reference to the ``timestamp`` column on which the order is computed.
2324
- *values (pw.ColumnReference[int | float | datetime]):
2425
Variable-length argument representing the column references to the ``values`` columns.
26+
- instance (pw.ColumnReference):
27+
Can be used to group the values. The difference is only computed between rows with
28+
the same ``instance`` value.
2529
2630
Returns:
2731
``Table``: A new table where each column is replaced with a new column containing
@@ -31,7 +35,7 @@ def diff(
3135
ValueError: If the columns are not ColumnReference.
3236
3337
Note:
34-
- The value of the "first" value (the row with the lower value \
38+
- The value of the "first" value (the row with the lowest value \
3539
in the ``timestamp`` column) is ``None``.
3640
3741
Example:
@@ -55,6 +59,27 @@ def diff(
5559
4 | 7 | 3
5660
5 | 11 | 4
5761
6 | 16 | 5
62+
63+
>>> table = pw.debug.table_from_markdown(
64+
... '''
65+
... timestamp | instance | values
66+
... 1 | 0 | 1
67+
... 2 | 1 | 2
68+
... 3 | 1 | 4
69+
... 3 | 0 | 7
70+
... 6 | 1 | 11
71+
... 6 | 0 | 16
72+
... '''
73+
... )
74+
>>> table += table.diff(pw.this.timestamp, pw.this.values, instance=pw.this.instance)
75+
>>> pw.debug.compute_and_print(table, include_id=False)
76+
timestamp | instance | values | diff_values
77+
1 | 0 | 1 |
78+
2 | 1 | 2 |
79+
3 | 0 | 7 | 6
80+
3 | 1 | 4 | 2
81+
6 | 0 | 16 | 9
82+
6 | 1 | 11 | 7
5883
"""
5984

6085
if isinstance(timestamp, pw.ColumnReference):
@@ -69,7 +94,7 @@ def diff(
6994
"statistical.diff(): Invalid column reference for the parameter timestamp."
7095
)
7196

72-
ordered_table = self.sort(key=timestamp)
97+
ordered_table = self.sort(key=timestamp, instance=instance)
7398

7499
for value in values:
75100
if isinstance(value, pw.ColumnReference):

python/pathway/tests/ordered/test_diff.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,38 @@ def test_diff_multiple_columns():
7373
)
7474

7575
assert_table_equality_wo_index(res, expected)
76+
77+
78+
def test_diff_instance():
79+
t = T(
80+
"""
81+
| t | i | v
82+
1 | 1 | 0 | 1
83+
2 | 2 | 1 | 2
84+
3 | 3 | 1 | 4
85+
4 | 3 | 0 | 7
86+
5 | 5 | 1 | 11
87+
6 | 5 | 0 | 16
88+
7 | 7 | 0 | 22
89+
8 | 8 | 1 | 29
90+
9 | 9 | 0 | 37
91+
"""
92+
)
93+
res = t.diff(t.t, t.v, instance=t.i)
94+
95+
expected = T(
96+
"""
97+
| diff_v
98+
1 |
99+
2 |
100+
3 | 2
101+
4 | 6
102+
5 | 7
103+
6 | 9
104+
7 | 6
105+
8 | 18
106+
9 | 15
107+
"""
108+
)
109+
110+
assert_table_equality_wo_index(res, expected)

0 commit comments

Comments
 (0)