Skip to content

Commit 1d1f353

Browse files
authored
refactor: Consolidate single group by column code into sub modules (#13392)
* sort out codes of single column group by. * sort out codes. * move row to suitable place, and improve comments. * fix doc.
1 parent 2d86725 commit 1d1f353

File tree

11 files changed

+66
-23
lines changed

11 files changed

+66
-23
lines changed

datafusion/physical-plan/src/aggregates/group_values/mod.rs

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,27 @@
2020
use arrow::record_batch::RecordBatch;
2121
use arrow_array::{downcast_primitive, ArrayRef};
2222
use arrow_schema::{DataType, SchemaRef};
23-
use bytes_view::GroupValuesBytesView;
2423
use datafusion_common::Result;
2524

26-
pub(crate) mod primitive;
2725
use datafusion_expr::EmitTo;
28-
use primitive::GroupValuesPrimitive;
2926

30-
mod multi_column;
27+
pub(crate) mod multi_group_by;
28+
3129
mod row;
32-
use multi_column::GroupValuesColumn;
30+
mod single_group_by;
31+
use datafusion_physical_expr::binary_map::OutputType;
32+
use multi_group_by::GroupValuesColumn;
3333
use row::GroupValuesRows;
3434

35-
mod bytes;
36-
mod bytes_view;
37-
use bytes::GroupValuesByes;
38-
use datafusion_physical_expr::binary_map::OutputType;
35+
pub(crate) use single_group_by::primitive::HashValue;
3936

40-
use crate::aggregates::order::GroupOrdering;
37+
use crate::aggregates::{
38+
group_values::single_group_by::{
39+
bytes::GroupValuesByes, bytes_view::GroupValuesBytesView,
40+
primitive::GroupValuesPrimitive,
41+
},
42+
order::GroupOrdering,
43+
};
4144

4245
mod null_builder;
4346

@@ -77,7 +80,7 @@ mod null_builder;
7780
/// Each distinct group in a hash aggregation is identified by a unique group id
7881
/// (usize) which is assigned by instances of this trait. Group ids are
7982
/// continuous without gaps, starting from 0.
80-
pub trait GroupValues: Send {
83+
pub(crate) trait GroupValues: Send {
8184
/// Calculates the group id for each input row of `cols`, assigning new
8285
/// group ids as necessary.
8386
///
@@ -106,7 +109,21 @@ pub trait GroupValues: Send {
106109
}
107110

108111
/// Return a specialized implementation of [`GroupValues`] for the given schema.
109-
pub fn new_group_values(
112+
///
113+
/// [`GroupValues`] implementations choosing logic:
114+
///
115+
/// - If group by single column, and type of this column has
116+
/// the specific [`GroupValues`] implementation, such implementation
117+
/// will be chosen.
118+
///
119+
/// - If group by multiple columns, and all column types have the specific
120+
/// [`GroupColumn`] implementations, [`GroupValuesColumn`] will be chosen.
121+
///
122+
/// - Otherwise, the general implementation [`GroupValuesRows`] will be chosen.
123+
///
124+
/// [`GroupColumn`]: crate::aggregates::group_values::multi_group_by::GroupColumn
125+
///
126+
pub(crate) fn new_group_values(
110127
schema: SchemaRef,
111128
group_ordering: &GroupOrdering,
112129
) -> Result<Box<dyn GroupValues>> {
@@ -147,7 +164,7 @@ pub fn new_group_values(
147164
}
148165
}
149166

150-
if multi_column::supported_schema(schema.as_ref()) {
167+
if multi_group_by::supported_schema(schema.as_ref()) {
151168
if matches!(group_ordering, GroupOrdering::None) {
152169
Ok(Box::new(GroupValuesColumn::<false>::try_new(schema)?))
153170
} else {

datafusion/physical-plan/src/aggregates/group_values/multi_column/bytes.rs renamed to datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::aggregates::group_values::multi_column::{nulls_equal_to, GroupColumn};
18+
use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn};
1919
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
2020
use arrow::array::{AsArray, BufferBuilder, GenericBinaryArray, GenericStringArray};
2121
use arrow::buffer::{OffsetBuffer, ScalarBuffer};
@@ -403,7 +403,7 @@ where
403403
mod tests {
404404
use std::sync::Arc;
405405

406-
use crate::aggregates::group_values::multi_column::bytes::ByteGroupValueBuilder;
406+
use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder;
407407
use arrow_array::{ArrayRef, StringArray};
408408
use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
409409
use datafusion_physical_expr::binary_map::OutputType;

datafusion/physical-plan/src/aggregates/group_values/multi_column/bytes_view.rs renamed to datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::aggregates::group_values::multi_column::{nulls_equal_to, GroupColumn};
18+
use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn};
1919
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
2020
use arrow::array::{make_view, AsArray, ByteView};
2121
use arrow::buffer::ScalarBuffer;
@@ -544,7 +544,7 @@ impl<B: ByteViewType> GroupColumn for ByteViewGroupValueBuilder<B> {
544544
mod tests {
545545
use std::sync::Arc;
546546

547-
use crate::aggregates::group_values::multi_column::bytes_view::ByteViewGroupValueBuilder;
547+
use crate::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder;
548548
use arrow::array::AsArray;
549549
use arrow::datatypes::StringViewType;
550550
use arrow_array::{ArrayRef, StringViewArray};

datafusion/physical-plan/src/aggregates/group_values/multi_column/mod.rs renamed to datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! `GroupValues` implementations for multi group by cases
19+
1820
mod bytes;
1921
mod bytes_view;
2022
mod primitive;
2123

2224
use std::mem::{self, size_of};
2325

24-
use crate::aggregates::group_values::multi_column::{
26+
use crate::aggregates::group_values::multi_group_by::{
2527
bytes::ByteGroupValueBuilder, bytes_view::ByteViewGroupValueBuilder,
2628
primitive::PrimitiveGroupValueBuilder,
2729
};
@@ -1138,7 +1140,9 @@ mod tests {
11381140
use datafusion_common::utils::proxy::RawTableAllocExt;
11391141
use datafusion_expr::EmitTo;
11401142

1141-
use crate::aggregates::group_values::{multi_column::GroupValuesColumn, GroupValues};
1143+
use crate::aggregates::group_values::{
1144+
multi_group_by::GroupValuesColumn, GroupValues,
1145+
};
11421146

11431147
use super::GroupIndexView;
11441148

datafusion/physical-plan/src/aggregates/group_values/multi_column/primitive.rs renamed to datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::aggregates::group_values::multi_column::{nulls_equal_to, GroupColumn};
18+
use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn};
1919
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
2020
use arrow::buffer::ScalarBuffer;
2121
use arrow_array::cast::AsArray;
@@ -208,7 +208,7 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
208208
mod tests {
209209
use std::sync::Arc;
210210

211-
use crate::aggregates::group_values::multi_column::primitive::PrimitiveGroupValueBuilder;
211+
use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder;
212212
use arrow::datatypes::Int64Type;
213213
use arrow_array::{ArrayRef, Int64Array};
214214
use arrow_buffer::{BooleanBufferBuilder, NullBuffer};

datafusion/physical-plan/src/aggregates/group_values/bytes.rs renamed to datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs

File renamed without changes.

datafusion/physical-plan/src/aggregates/group_values/bytes_view.rs renamed to datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes_view.rs

File renamed without changes.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! `GroupValues` implementations for single group by cases
19+
20+
pub(crate) mod bytes;
21+
pub(crate) mod bytes_view;
22+
pub(crate) mod primitive;

datafusion/physical-plan/src/aggregates/group_values/primitive.rs renamed to datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs

File renamed without changes.

datafusion/physical-plan/src/aggregates/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ use crate::execution_plan::CardinalityEffect;
5252
use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
5353
use itertools::Itertools;
5454

55-
pub mod group_values;
55+
pub(crate) mod group_values;
5656
mod no_grouping;
5757
pub mod order;
5858
mod row_hash;

0 commit comments

Comments
 (0)