@@ -452,35 +452,37 @@ struct GroupedStatisticImpl : public GroupedAggregator {
452452 Status InitInternal (ExecContext* ctx, const KernelInitArgs& args,
453453 StatisticType stat_type, const VarianceOptions& options) {
454454 return InitInternal (ctx, args, stat_type, options.ddof , options.skip_nulls ,
455- options.min_count );
455+ /* biased= */ false , options.min_count );
456456 }
457457
458458 // Init helper for hash_skew and hash_kurtosis
459459 Status InitInternal (ExecContext* ctx, const KernelInitArgs& args,
460460 StatisticType stat_type, const SkewOptions& options) {
461461 return InitInternal (ctx, args, stat_type, /* ddof=*/ 0 , options.skip_nulls ,
462- options.min_count );
462+ options.biased , options. min_count );
463463 }
464464
465465 Status InitInternal (ExecContext* ctx, const KernelInitArgs& args,
466- StatisticType stat_type, int ddof, bool skip_nulls,
466+ StatisticType stat_type, int ddof, bool skip_nulls, bool biased,
467467 uint32_t min_count) {
468468 if constexpr (is_decimal_type<Type>::value) {
469469 int32_t decimal_scale =
470470 checked_cast<const DecimalType&>(*args.inputs [0 ].type ).scale ();
471- return InitInternal (ctx, stat_type, decimal_scale, ddof, skip_nulls, min_count);
471+ return InitInternal (ctx, stat_type, decimal_scale, ddof, skip_nulls, biased,
472+ min_count);
472473 } else {
473- return InitInternal (ctx, stat_type, /* decimal_scale=*/ 0 , ddof, skip_nulls,
474+ return InitInternal (ctx, stat_type, /* decimal_scale=*/ 0 , ddof, skip_nulls, biased,
474475 min_count);
475476 }
476477 }
477478
478479 Status InitInternal (ExecContext* ctx, StatisticType stat_type, int32_t decimal_scale,
479- int ddof, bool skip_nulls, uint32_t min_count) {
480+ int ddof, bool skip_nulls, bool biased, uint32_t min_count) {
480481 stat_type_ = stat_type;
481482 moments_level_ = moments_level_for_statistic (stat_type_);
482483 decimal_scale_ = decimal_scale;
483484 skip_nulls_ = skip_nulls;
485+ biased_ = biased;
484486 min_count_ = min_count;
485487 ddof_ = ddof;
486488 ctx_ = ctx;
@@ -539,7 +541,7 @@ struct GroupedStatisticImpl : public GroupedAggregator {
539541 Status ConsumeGeneric (const ExecSpan& batch) {
540542 GroupedStatisticImpl<Type> state;
541543 RETURN_NOT_OK (state.InitInternal (ctx_, stat_type_, decimal_scale_, ddof_, skip_nulls_,
542- min_count_));
544+ biased_, min_count_));
543545 RETURN_NOT_OK (state.Resize (num_groups_));
544546 int64_t * counts = state.counts_ .mutable_data ();
545547 double * means = state.means_ .mutable_data ();
@@ -612,7 +614,7 @@ struct GroupedStatisticImpl : public GroupedAggregator {
612614 var_std.resize (num_groups_);
613615 GroupedStatisticImpl<Type> state;
614616 RETURN_NOT_OK (state.InitInternal (ctx_, stat_type_, decimal_scale_, ddof_,
615- skip_nulls_, min_count_));
617+ skip_nulls_, biased_, min_count_));
616618 RETURN_NOT_OK (state.Resize (num_groups_));
617619 int64_t * other_counts = state.counts_ .mutable_data ();
618620 double * other_means = state.means_ .mutable_data ();
@@ -739,7 +741,9 @@ struct GroupedStatisticImpl : public GroupedAggregator {
739741 const double * m3s = m3s_data ();
740742 const double * m4s = m4s_data ();
741743 for (int64_t i = 0 ; i < num_groups_; ++i) {
742- if (counts[i] > ddof_ && counts[i] >= min_count_) {
744+ if (counts[i] > ddof_ && counts[i] >= min_count_ &&
745+ (stat_type_ != StatisticType::Skew || biased_ || counts[i] > 2 ) &&
746+ (stat_type_ != StatisticType::Kurtosis || biased_ || counts[i] > 3 )) {
743747 const auto moments = Moments (counts[i], means[i], m2s[i], m3s[i], m4s[i]);
744748 switch (stat_type_) {
745749 case StatisticType::Var:
@@ -749,10 +753,10 @@ struct GroupedStatisticImpl : public GroupedAggregator {
749753 results[i] = moments.Stddev (ddof_);
750754 break ;
751755 case StatisticType::Skew:
752- results[i] = moments.Skew ();
756+ results[i] = moments.Skew (biased_ );
753757 break ;
754758 case StatisticType::Kurtosis:
755- results[i] = moments.Kurtosis ();
759+ results[i] = moments.Kurtosis (biased_ );
756760 break ;
757761 default :
758762 return Status::NotImplemented (" Statistic type " ,
@@ -809,6 +813,7 @@ struct GroupedStatisticImpl : public GroupedAggregator {
809813 int moments_level_;
810814 int32_t decimal_scale_;
811815 bool skip_nulls_;
816+ bool biased_;
812817 uint32_t min_count_;
813818 int ddof_;
814819 int64_t num_groups_ = 0 ;
0 commit comments