@@ -242,11 +242,9 @@ impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
242242///
243243/// For calculating the accurate medians of groups, we need to store all values
244244/// of groups before final evaluation.
245- /// And values in each group will be stored in a `Vec<T>`, so the total group values
245+ /// So values in each group will be stored in a `Vec<T>`, so the total group values
246246/// will be actually organized as a `Vec<Vec<T>>`.
247247///
248- /// In partial aggregation stage, the `values`
249- ///
250248#[ derive( Debug ) ]
251249struct MedianGroupsAccumulator < T : ArrowNumericType + Send > {
252250 data_type : DataType ,
@@ -273,7 +271,7 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
273271 assert_eq ! ( values. len( ) , 1 , "single argument to update_batch" ) ;
274272 let values = values[ 0 ] . as_primitive :: < T > ( ) ;
275273
276- // increment counts, update sums
274+ // Push the `not nulls + not filtered` row into its group
277275 self . group_values . resize ( total_num_groups, Vec :: new ( ) ) ;
278276 accumulate (
279277 group_indices,
@@ -297,30 +295,43 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for MedianGroupsAccumulator<T
297295 ) -> Result < ( ) > {
298296 assert_eq ! ( values. len( ) , 1 , "one argument to merge_batch" ) ;
299297
300- // The merged values should be organized like as a `non-nullable ListArray` like:
298+ // The merged values should be organized like as a `ListArray` which is nullable,
299+ // but `values` in it is `non-nullable`(`values` with nulls usually generated
300+ // from `convert_to_state`).
301+ //
302+ // Following is the possible and impossible input `values`:
301303 //
304+ // # Possible values
302305 // ```text
303306 // group 0: [1, 2, 3]
304- // group 1: [4, 5]
307+ // group 1: null (list array is nullable)
305308 // group 2: [6, 7, 8]
306309 // ...
307310 // group n: [...]
308311 // ```
309312 //
313+ // # Impossible values
314+ // ```text
315+ // group x: [1, 2, null] (values in list array is non-nullable)
316+ // ```
317+ //
310318 let input_group_values = values[ 0 ] . as_list :: < i32 > ( ) ;
311- assert ! ( input_group_values. null_count( ) == 0 ) ;
312319
313320 // Ensure group values big enough
314321 self . group_values . resize ( total_num_groups, Vec :: new ( ) ) ;
315322
316323 // Extend values to related groups
324+ // TODO: avoid using iterator of the `ListArray`, this will lead to
325+ // many calls of `slice` of its `values` array, and `slice` is not
326+ // so efficient.
317327 group_indices
318328 . iter ( )
319329 . zip ( input_group_values. iter ( ) )
320330 . for_each ( |( & group_index, values_opt) | {
321- let values = values_opt. unwrap ( ) ;
322- let values = values. as_primitive :: < T > ( ) ;
323- self . group_values [ group_index] . extend ( values. values ( ) . iter ( ) ) ;
331+ if let Some ( values) = values_opt {
332+ let values = values. as_primitive :: < T > ( ) ;
333+ self . group_values [ group_index] . extend ( values. values ( ) . iter ( ) ) ;
334+ }
324335 } ) ;
325336
326337 Ok ( ( ) )
0 commit comments