@@ -2088,19 +2088,7 @@ private static List<Long> extractNDVGroupingColumns(List<ColStatistics> colStats
20882088 // compute product of distinct values of grouping columns
20892089 for (ColStatistics cs : colStats ) {
20902090 if (cs != null ) {
2091- long ndv = cs .getCountDistint ();
2092-
2093- if (ndv == 0L ) {
2094- // Typically, ndv == 0 means "NDV unknown", and no safe GROUPBY adjustments are possible
2095- // However, there is a special exception for "constant NULL" columns. They are intentionally generated
2096- // with NDV values of 0 and numNulls == numRows, while their actual NDV is 1
2097- if (cs .getNumNulls () >= parentStats .getNumRows ()) {
2098- ndv = 1L ;
2099- }
2100- } else if (cs .getNumNulls () > 0L ) {
2101- ndv = StatsUtils .safeAdd (ndv , 1L );
2102- }
2103- ndvValues .add (ndv );
2091+ ndvValues .add (getGroupingColumnNdv (cs , parentStats ));
21042092 } else {
21052093 if (parentStats .getColumnStatsState ().equals (Statistics .State .COMPLETE )) {
21062094 // the column must be an aggregate column inserted by GBY. We
@@ -2119,4 +2107,20 @@ private static List<Long> extractNDVGroupingColumns(List<ColStatistics> colStats
21192107
21202108 return ndvValues ;
21212109 }
2110+
2111+ private static long getGroupingColumnNdv (ColStatistics cs , Statistics parentStats ) {
2112+ long ndv = cs .getCountDistint ();
2113+
2114+ if (ndv == 0L ) {
2115+ // Typically, ndv == 0 means "NDV unknown", and no safe GROUPBY adjustments are possible
2116+ // However, there is a special exception for "constant NULL" columns. They are intentionally generated
2117+ // with NDV values of 0 and numNulls == numRows, while their actual NDV is 1
2118+ if (cs .getNumNulls () >= parentStats .getNumRows ()) {
2119+ ndv = 1L ;
2120+ }
2121+ } else if (cs .getNumNulls () > 0L ) {
2122+ ndv = StatsUtils .safeAdd (ndv , 1L );
2123+ }
2124+ return ndv ;
2125+ }
21222126}
0 commit comments