Skip to content

Commit ab1de2c

Browse files
authored
Enhance LastValueAccumulator logic and add SQL logic tests for last_value function (#13980)
- Updated LastValueAccumulator to include requirement satisfaction check before updating the last value. - Added SQL logic tests to verify the behavior of the last_value function with merge batches and ensure correct aggregation in various scenarios.
1 parent 33437f7 commit ab1de2c

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

datafusion/functions-aggregate/src/first_last.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,7 @@ impl Accumulator for LastValueAccumulator {
651651
// Either there is no existing value, or there is a newer (latest)
652652
// version in the new data:
653653
if !self.is_set
654+
|| self.requirement_satisfied
654655
|| compare_rows(&self.orderings, last_ordering, &sort_options)?.is_lt()
655656
{
656657
// Update with last value in the state. Note that we should exclude the

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6137,3 +6137,69 @@ SELECT v1 FROM t1 WHERE ((count(v1) % 1) << 1) > 0;
61376137

61386138
statement ok
61396139
DROP TABLE t1;
6140+
6141+
# Test last function with merge batch
6142+
query II
6143+
with A as (
6144+
select 1 as id, 10 as foo
6145+
UNION ALL
6146+
select 1, 10
6147+
UNION ALL
6148+
select 1, 10
6149+
UNION ALL
6150+
select 1, 10
6151+
UNION ALL
6152+
select 1, 10
6153+
---- The order is non-deterministic, keep the value the same
6154+
) select last_value(a.foo), sum(distinct 1) from A a group by a.id;
6155+
----
6156+
10 1
6157+
6158+
# It has only AggregateExec with FinalPartitioned mode, so `merge_batch` is used
6159+
# If the plan is changed, whether the `merge_batch` is used should be verified to ensure the test coverage
6160+
query TT
6161+
explain with A as (
6162+
select 1 as id, 2 as foo
6163+
UNION ALL
6164+
select 1, 4
6165+
UNION ALL
6166+
select 1, 5
6167+
UNION ALL
6168+
select 1, 3
6169+
UNION ALL
6170+
select 1, 2
6171+
) select last_value(a.foo order by a.foo), sum(distinct 1) from A a group by a.id;
6172+
----
6173+
logical_plan
6174+
01)Projection: last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST], sum(DISTINCT Int64(1))
6175+
02)--Aggregate: groupBy=[[a.id]], aggr=[[last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST], sum(DISTINCT Int64(1))]]
6176+
03)----SubqueryAlias: a
6177+
04)------SubqueryAlias: a
6178+
05)--------Union
6179+
06)----------Projection: Int64(1) AS id, Int64(2) AS foo
6180+
07)------------EmptyRelation
6181+
08)----------Projection: Int64(1) AS id, Int64(4) AS foo
6182+
09)------------EmptyRelation
6183+
10)----------Projection: Int64(1) AS id, Int64(5) AS foo
6184+
11)------------EmptyRelation
6185+
12)----------Projection: Int64(1) AS id, Int64(3) AS foo
6186+
13)------------EmptyRelation
6187+
14)----------Projection: Int64(1) AS id, Int64(2) AS foo
6188+
15)------------EmptyRelation
6189+
physical_plan
6190+
01)ProjectionExec: expr=[last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST]@1 as last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST], sum(DISTINCT Int64(1))@2 as sum(DISTINCT Int64(1))]
6191+
02)--AggregateExec: mode=FinalPartitioned, gby=[id@0 as id], aggr=[last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST], sum(DISTINCT Int64(1))], ordering_mode=Sorted
6192+
03)----CoalesceBatchesExec: target_batch_size=8192
6193+
04)------RepartitionExec: partitioning=Hash([id@0], 4), input_partitions=5
6194+
05)--------AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[last_value(a.foo) ORDER BY [a.foo ASC NULLS LAST], sum(DISTINCT Int64(1))], ordering_mode=Sorted
6195+
06)----------UnionExec
6196+
07)------------ProjectionExec: expr=[1 as id, 2 as foo]
6197+
08)--------------PlaceholderRowExec
6198+
09)------------ProjectionExec: expr=[1 as id, 4 as foo]
6199+
10)--------------PlaceholderRowExec
6200+
11)------------ProjectionExec: expr=[1 as id, 5 as foo]
6201+
12)--------------PlaceholderRowExec
6202+
13)------------ProjectionExec: expr=[1 as id, 3 as foo]
6203+
14)--------------PlaceholderRowExec
6204+
15)------------ProjectionExec: expr=[1 as id, 2 as foo]
6205+
16)--------------PlaceholderRowExec

0 commit comments

Comments
 (0)