From e0104c5cf9ce28d82a0499fd2896f2947156ea84 Mon Sep 17 00:00:00 2001 From: YongChul Kwon Date: Mon, 13 Oct 2025 16:27:27 -0700 Subject: [PATCH 1/4] feat: aggregate rel compatibility options --- proto/substrait/algebra.proto | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 3c15f7931..eb2564ced 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -346,6 +346,8 @@ message AggregateRel { // `Grouping.expression_references`. repeated Expression grouping_expressions = 5; + Compatibility compatibility = 6; + substrait.extensions.AdvancedExtension advanced_extension = 10; message Grouping { @@ -366,6 +368,14 @@ message AggregateRel { // Helps to support SUM() FILTER(WHERE...) syntax without masking opportunities for optimization Expression filter = 2; } + + // Various modes of operations of AggregateRel to capture different behaviors across systems. + message Compatibility { + // If true, the AggregateRel must not yield a row on empty input + // when specified with non-zero groupings even when groupings includes + // empty grouping sets. + bool groupings_yield_no_rows_on_empty_input = 1; + } } // ConsistentPartitionWindowRel provides the ability to perform calculations across sets of rows From 4a2a7351c13d0ac679a8892bd825692dcd9c81bb Mon Sep 17 00:00:00 2001 From: YongChul Kwon Date: Mon, 17 Nov 2025 12:11:43 -0800 Subject: [PATCH 2/4] feat: add AggregateRel compatibility --- proto/substrait/algebra.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index eb2564ced..d823f0ea1 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -372,7 +372,7 @@ message AggregateRel { // Various modes of operations of AggregateRel to capture different behaviors across systems. message Compatibility { // If true, the AggregateRel must not yield a row on empty input - // when specified with non-zero groupings even when groupings includes + // when specified with non-empty groupings field even when groupings includes // empty grouping sets. bool groupings_yield_no_rows_on_empty_input = 1; } From f8f32d7d5239826e287cfe0a1a6980b255812de4 Mon Sep 17 00:00:00 2001 From: YongChul Kwon Date: Tue, 9 Dec 2025 15:14:28 -0800 Subject: [PATCH 3/4] Adding compatibility documentation. Simplify field name --- proto/substrait/algebra.proto | 6 ++---- site/docs/relations/logical_relations.md | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 9d2647bfd..0639cbcaa 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -375,10 +375,8 @@ message AggregateRel { // Various modes of operations of AggregateRel to capture different behaviors across systems. message Compatibility { - // If true, the AggregateRel must not yield a row on empty input - // when specified with non-empty groupings field even when groupings includes - // empty grouping sets. - bool groupings_yield_no_rows_on_empty_input = 1; + // If true, the AggregateRel must not yield a row on empty input. + bool yield_no_rows_on_empty_input = 1; } } diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index a6b0990aa..68b5b2e31 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -407,6 +407,25 @@ If at least one grouping expression is present, the aggregation is allowed to no | Per Grouping Set | A list of expression grouping that the aggregation measured should be calculated for. | Optional. | | Measures | A list of one or more aggregate expressions along with an optional filter. | Optional, required if no grouping sets. | +### Aggregate Compatibility + +The aggregate operation is one of the most complex operations in the spec. Although implementations mostly agree on behaviors, there may be gaps in corner cases. Those behavioral differences are captured in compatibility. + +NOTE: The compatibility is meant to address gaps in the core implementation of aggregation such as grouping sets. For custom aggregations, consider using aggregate extension functions. If you want to introduce a new compatibility mode, reach out Substrait PMC to discuss. + +#### yield_no_rows_on_empty_input + +AggregateRel **MUST NOT** produce a row on empty input even if the `groupings` is empty or `groupings` include an empty grounping set. + +**Default:** ***false***. Both empty `groupings` or any empty grounping sets in `groupings` yield a row on empty input. +**Compatibility for**: Microsoft SQL server family, Oracle. + +**Example:** +```sql +-- The following two SQL statements yields a single row with value 0 in the systems DO NOT require this compatibility. +SELECT COUNT(*) FROM T -- [(0)] when T is empty. +SELECT COUNT(*) FROM T GROUP BY GROUNPING SETS (()) -- [] when T is empty in systems requiring this compatibility. +``` === "AggregateRel Message" From 028b6d30dd36799dce82a158acdcd5052ff4e203 Mon Sep 17 00:00:00 2001 From: YongChul Kwon Date: Tue, 9 Dec 2025 22:34:41 -0800 Subject: [PATCH 4/4] Change bool to enum. Updated documentation accordingly. --- proto/substrait/algebra.proto | 18 ++++++++++++++++-- site/docs/relations/logical_relations.md | 10 ++++++---- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 0639cbcaa..800e14682 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -375,8 +375,22 @@ message AggregateRel { // Various modes of operations of AggregateRel to capture different behaviors across systems. message Compatibility { - // If true, the AggregateRel must not yield a row on empty input. - bool yield_no_rows_on_empty_input = 1; + // Defines the behavior of AggregateRel when there is an empty grouping set in the `groupings` + // and the input is empty. An empty grouping set is an aggregation over the entire input and some + // systems implement different behaviors when the input is empty. + enum EmptyGroupingSetOnEmptyInput { + // Default is `EMPTY_GROUPING_SET_ON_EMPTY_INPUT_YIELDS_ROWS`. + EMPTY_GROUPING_SET_ON_EMPTY_INPUT_UNSPECIFIED = 0; + // If there is an empty grouping set in the `groupings`, the AggregateRel yields a single row + // for the empty grouping set on empty input (i.e., explicit grouping over the entire input). + // For example, AggregateRel[(), COUNT] yields one record of value 0 when the input is empty. + EMPTY_GROUPING_SET_ON_EMPTY_INPUT_YIELDS_ROWS = 1; + // The AggregateRel yields no row for the empty grouping set on empty input (i.e., grouping over the rows). + // For example, AggregateRel[(), COUNT] yields no record when the input is empty. + EMPTY_GROUPING_SET_ON_EMPTY_INPUT_YIELDS_NO_ROWS = 2; + } + + EmptyGroupingSetOnEmptyInput empty_grouping_set_on_empty_input = 1; } } diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index 68b5b2e31..be2393c9c 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -413,12 +413,14 @@ The aggregate operation is one of the most complex operations in the spec. Altho NOTE: The compatibility is meant to address gaps in the core implementation of aggregation such as grouping sets. For custom aggregations, consider using aggregate extension functions. If you want to introduce a new compatibility mode, reach out Substrait PMC to discuss. -#### yield_no_rows_on_empty_input +#### Empty Grouping Set on Empty Input -AggregateRel **MUST NOT** produce a row on empty input even if the `groupings` is empty or `groupings` include an empty grounping set. +This compatibility mode defines how the AggregateRel behaves with empty grouping set on an empty input. Default is `EMPTY_GROUPING_SET_ON_EMPTY_INPUT_YIELDS_ROWS`. -**Default:** ***false***. Both empty `groupings` or any empty grounping sets in `groupings` yield a row on empty input. -**Compatibility for**: Microsoft SQL server family, Oracle. +| Mode | Behavior | Example Systems | +| -------------------------------------------------|-------------------------------|-----------------| +| EMPTY_GROUPING_SET_ON_EMPTY_INPUT_YIELDS_ROWS | A row for empty grouping set | PostgreSQL | +| EMPTY_GROUPING_SET_ON_EMPTY_INPUT_YIELDS_NO_ROWS | No row for empty grouping set | Microsoft SQL Sever family, Oracle | **Example:** ```sql