Skip to content

Commit 1285297

Browse files
committed
HIVE-29368: Sonar Qube feedback + one more test
1 parent 8b361de commit 1285297

File tree

2 files changed

+29
-13
lines changed

2 files changed

+29
-13
lines changed

ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2088,19 +2088,7 @@ private static List<Long> extractNDVGroupingColumns(List<ColStatistics> colStats
20882088
// compute product of distinct values of grouping columns
20892089
for (ColStatistics cs : colStats) {
20902090
if (cs != null) {
2091-
long ndv = cs.getCountDistint();
2092-
2093-
if (ndv == 0L) {
2094-
// Typically, ndv == 0 means "NDV unknown", and no safe GROUPBY adjustments are possible
2095-
// However, there is a special exception for "constant NULL" columns. They are intentionally generated
2096-
// with NDV values of 0 and numNulls == numRows, while their actual NDV is 1
2097-
if (cs.getNumNulls() >= parentStats.getNumRows()) {
2098-
ndv = 1L;
2099-
}
2100-
} else if (cs.getNumNulls() > 0L) {
2101-
ndv = StatsUtils.safeAdd(ndv, 1L);
2102-
}
2103-
ndvValues.add(ndv);
2091+
ndvValues.add(getGroupingColumnNdv(cs, parentStats));
21042092
} else {
21052093
if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) {
21062094
// the column must be an aggregate column inserted by GBY. We
@@ -2119,4 +2107,20 @@ private static List<Long> extractNDVGroupingColumns(List<ColStatistics> colStats
21192107

21202108
return ndvValues;
21212109
}
2110+
2111+
private static long getGroupingColumnNdv(ColStatistics cs, Statistics parentStats) {
2112+
long ndv = cs.getCountDistint();
2113+
2114+
if (ndv == 0L) {
2115+
// Typically, ndv == 0 means "NDV unknown", and no safe GROUPBY adjustments are possible
2116+
// However, there is a special exception for "constant NULL" columns. They are intentionally generated
2117+
// with NDV values of 0 and numNulls == numRows, while their actual NDV is 1
2118+
if (cs.getNumNulls() >= parentStats.getNumRows()) {
2119+
ndv = 1L;
2120+
}
2121+
} else if (cs.getNumNulls() > 0L) {
2122+
ndv = StatsUtils.safeAdd(ndv, 1L);
2123+
}
2124+
return ndv;
2125+
}
21222126
}

ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,4 +321,16 @@ void testComputeNDVGroupingColumnsPartialStats() {
321321
assertEquals(0, ndv, "Partial stats (ndv=0, numNulls<numRows) should return 0, not inflate to 1");
322322
}
323323

324+
@Test
325+
void testComputeNDVGroupingColumnsAllNulls() {
326+
// When ndv=0 and numNulls >= numRows, it's a "constant NULL" column, so NDV should be 1
327+
ColStatistics cs = createColStats("all_nulls_col", 0, 1000);
328+
Statistics parentStats = createParentStats(1000);
329+
List<ColStatistics> colStats = Collections.singletonList(cs);
330+
331+
long ndv = StatsUtils.computeNDVGroupingColumns(colStats, parentStats, false);
332+
333+
assertEquals(1, ndv, "All-null column (ndv=0, numNulls==numRows) should have NDV inflated to 1");
334+
}
335+
324336
}

0 commit comments

Comments
 (0)