@@ -32,11 +32,11 @@ use arrow::datatypes::GenericBinaryType;
3232use arrow:: datatypes:: GenericStringType ;
3333use datafusion_common:: utils:: proxy:: VecAllocExt ;
3434
35+ use crate :: aggregates:: group_values:: null_builder:: MaybeNullBufferBuilder ;
36+ use datafusion_physical_expr_common:: binary_map:: { OutputType , INITIAL_BUFFER_CAPACITY } ;
3537use std:: sync:: Arc ;
3638use std:: vec;
3739
38- use datafusion_physical_expr_common:: binary_map:: { OutputType , INITIAL_BUFFER_CAPACITY } ;
39-
4040/// Trait for storing a single column of group values in [`GroupValuesColumn`]
4141///
4242/// Implementations of this trait store an in-progress collection of group values
@@ -47,6 +47,8 @@ use datafusion_physical_expr_common::binary_map::{OutputType, INITIAL_BUFFER_CAP
4747pub trait GroupColumn : Send + Sync {
4848 /// Returns equal if the row stored in this builder at `lhs_row` is equal to
4949 /// the row in `array` at `rhs_row`
50+ ///
51+ /// Note that this comparison returns true if both elements are NULL
5052 fn equal_to ( & self , lhs_row : usize , array : & ArrayRef , rhs_row : usize ) -> bool ;
5153 /// Appends the row at `row` in `array` to this builder
5254 fn append_val ( & mut self , array : & ArrayRef , row : usize ) ;
@@ -61,61 +63,96 @@ pub trait GroupColumn: Send + Sync {
6163 fn take_n ( & mut self , n : usize ) -> ArrayRef ;
6264}
6365
64- /// An implementation of [`GroupColumn`] for primitive types.
65- pub struct PrimitiveGroupValueBuilder < T : ArrowPrimitiveType > {
66+ /// An implementation of [`GroupColumn`] for primitive values which are known to have no nulls
67+ #[ derive( Debug ) ]
68+ pub struct NonNullPrimitiveGroupValueBuilder < T : ArrowPrimitiveType > {
6669 group_values : Vec < T :: Native > ,
67- nulls : Vec < bool > ,
68- /// whether the array contains at least one null, for fast non-null path
69- has_null : bool ,
70- /// Can the input array contain nulls?
71- nullable : bool ,
7270}
7371
74- impl < T > PrimitiveGroupValueBuilder < T >
72+ impl < T > NonNullPrimitiveGroupValueBuilder < T >
7573where
7674 T : ArrowPrimitiveType ,
7775{
78- pub fn new ( nullable : bool ) -> Self {
76+ pub fn new ( ) -> Self {
7977 Self {
8078 group_values : vec ! [ ] ,
81- nulls : vec ! [ ] ,
82- has_null : false ,
83- nullable,
8479 }
8580 }
8681}
8782
88- impl < T : ArrowPrimitiveType > GroupColumn for PrimitiveGroupValueBuilder < T > {
83+ impl < T : ArrowPrimitiveType > GroupColumn for NonNullPrimitiveGroupValueBuilder < T > {
8984 fn equal_to ( & self , lhs_row : usize , array : & ArrayRef , rhs_row : usize ) -> bool {
90- // non-null fast path
91- // both non-null
92- if !self . nullable {
93- return self . group_values [ lhs_row]
94- == array. as_primitive :: < T > ( ) . value ( rhs_row) ;
95- }
85+ // know input has no nulls
86+ self . group_values [ lhs_row] == array. as_primitive :: < T > ( ) . value ( rhs_row)
87+ }
9688
97- // lhs is non-null
98- if self . nulls [ lhs_row] {
99- if array. is_null ( rhs_row) {
100- return false ;
101- }
89+ fn append_val ( & mut self , array : & ArrayRef , row : usize ) {
90+ // input can't possibly have nulls, so don't worry about them
91+ self . group_values . push ( array. as_primitive :: < T > ( ) . value ( row) )
92+ }
93+
94+ fn len ( & self ) -> usize {
95+ self . group_values . len ( )
96+ }
97+
98+ fn size ( & self ) -> usize {
99+ self . group_values . allocated_size ( )
100+ }
101+
102+ fn build ( self : Box < Self > ) -> ArrayRef {
103+ let Self { group_values } = * self ;
102104
103- return self . group_values [ lhs_row]
104- == array. as_primitive :: < T > ( ) . value ( rhs_row) ;
105+ let nulls = None ;
106+
107+ Arc :: new ( PrimitiveArray :: < T > :: new (
108+ ScalarBuffer :: from ( group_values) ,
109+ nulls,
110+ ) )
111+ }
112+
113+ fn take_n ( & mut self , n : usize ) -> ArrayRef {
114+ let first_n = self . group_values . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
115+ let first_n_nulls = None ;
116+
117+ Arc :: new ( PrimitiveArray :: < T > :: new (
118+ ScalarBuffer :: from ( first_n) ,
119+ first_n_nulls,
120+ ) )
121+ }
122+ }
123+
124+ /// An implementation of [`GroupColumn`] for primitive values which may have nulls
125+ #[ derive( Debug ) ]
126+ pub struct PrimitiveGroupValueBuilder < T : ArrowPrimitiveType > {
127+ group_values : Vec < T :: Native > ,
128+ nulls : MaybeNullBufferBuilder ,
129+ }
130+
131+ impl < T > PrimitiveGroupValueBuilder < T >
132+ where
133+ T : ArrowPrimitiveType ,
134+ {
135+ pub fn new ( ) -> Self {
136+ Self {
137+ group_values : vec ! [ ] ,
138+ nulls : MaybeNullBufferBuilder :: new ( ) ,
105139 }
140+ }
141+ }
106142
107- array. is_null ( rhs_row)
143+ impl < T : ArrowPrimitiveType > GroupColumn for PrimitiveGroupValueBuilder < T > {
144+ fn equal_to ( & self , lhs_row : usize , array : & ArrayRef , rhs_row : usize ) -> bool {
145+ self . nulls . is_null ( lhs_row) == array. is_null ( rhs_row)
146+ && self . group_values [ lhs_row] == array. as_primitive :: < T > ( ) . value ( rhs_row)
108147 }
109148
110149 fn append_val ( & mut self , array : & ArrayRef , row : usize ) {
111- if self . nullable && array. is_null ( row) {
150+ if array. is_null ( row) {
151+ self . nulls . append ( true ) ;
112152 self . group_values . push ( T :: default_value ( ) ) ;
113- self . nulls . push ( false ) ;
114- self . has_null = true ;
115153 } else {
116- let elem = array. as_primitive :: < T > ( ) . value ( row) ;
117- self . group_values . push ( elem) ;
118- self . nulls . push ( true ) ;
154+ self . nulls . append ( false ) ;
155+ self . group_values . push ( array. as_primitive :: < T > ( ) . value ( row) ) ;
119156 }
120157 }
121158
@@ -128,32 +165,27 @@ impl<T: ArrowPrimitiveType> GroupColumn for PrimitiveGroupValueBuilder<T> {
128165 }
129166
130167 fn build ( self : Box < Self > ) -> ArrayRef {
131- if self . has_null {
132- Arc :: new ( PrimitiveArray :: < T > :: new (
133- ScalarBuffer :: from ( self . group_values ) ,
134- Some ( NullBuffer :: from ( self . nulls ) ) ,
135- ) )
136- } else {
137- Arc :: new ( PrimitiveArray :: < T > :: new (
138- ScalarBuffer :: from ( self . group_values ) ,
139- None ,
140- ) )
141- }
168+ let Self {
169+ group_values ,
170+ nulls ,
171+ } = * self ;
172+
173+ let nulls = nulls . build ( ) ;
174+
175+ Arc :: new ( PrimitiveArray :: < T > :: new (
176+ ScalarBuffer :: from ( group_values ) ,
177+ nulls ,
178+ ) )
142179 }
143180
144181 fn take_n ( & mut self , n : usize ) -> ArrayRef {
145- if self . has_null {
146- let first_n = self . group_values . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
147- let first_n_nulls = self . nulls . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
148- Arc :: new ( PrimitiveArray :: < T > :: new (
149- ScalarBuffer :: from ( first_n) ,
150- Some ( NullBuffer :: from ( first_n_nulls) ) ,
151- ) )
152- } else {
153- let first_n = self . group_values . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
154- self . nulls . truncate ( self . nulls . len ( ) - n) ;
155- Arc :: new ( PrimitiveArray :: < T > :: new ( ScalarBuffer :: from ( first_n) , None ) )
156- }
182+ let first_n = self . group_values . drain ( 0 ..n) . collect :: < Vec < _ > > ( ) ;
183+ let first_n_nulls = self . nulls . take_n ( n) ;
184+
185+ Arc :: new ( PrimitiveArray :: < T > :: new (
186+ ScalarBuffer :: from ( first_n) ,
187+ first_n_nulls,
188+ ) )
157189 }
158190}
159191
0 commit comments