2222 min as min_ ,
2323 object_construct ,
2424 sum as sum_ ,
25+ count_distinct ,
2526)
2627from snowflake .snowpark .modin .plugin ._internal .aggregation_utils import (
2728 get_pandas_aggr_func_name ,
@@ -768,7 +769,7 @@ def prepare_pivot_aggregation_for_handling_missing_and_null_values(
768769 bar | 0.0 | Nan | 0.0 | Nan
769770 foo | 1.0 | 1.0 | 0.0 | 1.0
770771
771- To match pandas behavior, we do an upfront group-by aggregation for count and sum to get the correct
772+ To match pandas behavior, we do an upfront group-by aggregation for count, nunique and sum to get the correct
772773 values for all null values via snowflake query:
773774
774775 select a, b, coalesce(sum(C), 0) as sum_c, count(C) as cnt_c from df_small_data group by a, b;
@@ -792,16 +793,21 @@ def prepare_pivot_aggregation_for_handling_missing_and_null_values(
792793 Snowpark dataframe that has done an pre-pivot aggregation needed for matching pandas pivot behavior as
793794 described earlier.
794795 """
795- if snowpark_aggr_func in [sum_ , count ]:
796- agg_expr = (
797- coalesce (sum_ (aggr_snowflake_quoted_identifier ), pandas_lit (0 )).as_ (
796+ if snowpark_aggr_func in [sum_ , count , count_distinct ]:
797+ if snowpark_aggr_func == sum_ :
798+ agg_expr = coalesce (
799+ sum_ (aggr_snowflake_quoted_identifier ), pandas_lit (0 )
800+ ).as_ (aggr_snowflake_quoted_identifier )
801+ elif snowpark_aggr_func == count :
802+ agg_expr = count (aggr_snowflake_quoted_identifier ).as_ (
798803 aggr_snowflake_quoted_identifier
799804 )
800- if snowpark_aggr_func == sum_
801- else count (aggr_snowflake_quoted_identifier ).as_ (
805+ elif snowpark_aggr_func == count_distinct :
806+ agg_expr = count_distinct (aggr_snowflake_quoted_identifier ).as_ (
802807 aggr_snowflake_quoted_identifier
803808 )
804- )
809+ else :
810+ raise NotImplementedError ("Aggregate function not supported for pivot" )
805811 pre_pivot_ordered_dataframe = pivot_ordered_dataframe .group_by (
806812 grouping_snowflake_quoted_identifiers , agg_expr
807813 )
0 commit comments