Skip to content

Commit bd86e3c

Browse files
committed
HIVE-29368: only increment ndv by one inextractNDVGroupingColumns() if it is "known"
1 parent f0022f7 commit bd86e3c

File tree

2 files changed

+14
-13
lines changed

2 files changed

+14
-13
lines changed

ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2087,7 +2087,8 @@ private static List<Long> extractNDVGroupingColumns(List<ColStatistics> colStats
20872087
for (ColStatistics cs : colStats) {
20882088
if (cs != null) {
20892089
long ndv = cs.getCountDistint();
2090-
if (cs.getNumNulls() > 0) {
2090+
// Only increment ndv value if it is "known"
2091+
if (ndv > 0 && cs.getNumNulls() > 0) {
20912092
ndv = StatsUtils.safeAdd(ndv, 1);
20922093
}
20932094
ndvValues.add(ndv);

ql/src/test/results/clientpositive/llap/subquery_notin.q.out

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,13 +1536,13 @@ STAGE PLANS:
15361536
minReductionHashAggr: 0.99
15371537
mode: hash
15381538
outputColumnNames: _col0
1539-
Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
1539+
Statistics: Num rows: 83 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
15401540
Reduce Output Operator
15411541
key expressions: _col0 (type: string)
15421542
null sort order: z
15431543
sort order: +
15441544
Map-reduce partition columns: _col0 (type: string)
1545-
Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
1545+
Statistics: Num rows: 83 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
15461546
Execution mode: vectorized, llap
15471547
LLAP IO: all inputs
15481548
Reducer 2
@@ -1555,11 +1555,11 @@ STAGE PLANS:
15551555
0 _col0 (type: string)
15561556
1 _col0 (type: string)
15571557
outputColumnNames: _col0, _col2
1558-
Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE
1558+
Statistics: Num rows: 230 Data size: 20270 Basic stats: COMPLETE Column stats: COMPLETE
15591559
Reduce Output Operator
15601560
null sort order:
15611561
sort order:
1562-
Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE
1562+
Statistics: Num rows: 230 Data size: 20270 Basic stats: COMPLETE Column stats: COMPLETE
15631563
value expressions: _col0 (type: string), _col2 (type: boolean)
15641564
Reducer 3
15651565
Execution mode: llap
@@ -1571,21 +1571,21 @@ STAGE PLANS:
15711571
0
15721572
1
15731573
outputColumnNames: _col0, _col2, _col3, _col4
1574-
Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE
1574+
Statistics: Num rows: 230 Data size: 23950 Basic stats: COMPLETE Column stats: COMPLETE
15751575
Select Operator
15761576
expressions: _col0 (type: string), _col3 (type: bigint), _col4 (type: bigint), _col2 (type: boolean)
15771577
outputColumnNames: _col0, _col1, _col2, _col4
1578-
Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE
1578+
Statistics: Num rows: 230 Data size: 23950 Basic stats: COMPLETE Column stats: COMPLETE
15791579
Filter Operator
15801580
predicate: ((_col1 = 0L) or (_col4 is null and (_col2 >= _col1) and _col0 is not null)) (type: boolean)
1581-
Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE
1581+
Statistics: Num rows: 230 Data size: 23950 Basic stats: COMPLETE Column stats: COMPLETE
15821582
Select Operator
15831583
expressions: _col0 (type: string)
15841584
outputColumnNames: _col0
1585-
Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE
1585+
Statistics: Num rows: 230 Data size: 20010 Basic stats: COMPLETE Column stats: COMPLETE
15861586
File Output Operator
15871587
compressed: false
1588-
Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE
1588+
Statistics: Num rows: 230 Data size: 20010 Basic stats: COMPLETE Column stats: COMPLETE
15891589
table:
15901590
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
15911591
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1610,17 +1610,17 @@ STAGE PLANS:
16101610
keys: KEY._col0 (type: string)
16111611
mode: mergepartial
16121612
outputColumnNames: _col0
1613-
Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
1613+
Statistics: Num rows: 41 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE
16141614
Select Operator
16151615
expressions: _col0 (type: string), true (type: boolean)
16161616
outputColumnNames: _col0, _col1
1617-
Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
1617+
Statistics: Num rows: 41 Data size: 251 Basic stats: COMPLETE Column stats: COMPLETE
16181618
Reduce Output Operator
16191619
key expressions: _col0 (type: string)
16201620
null sort order: z
16211621
sort order: +
16221622
Map-reduce partition columns: _col0 (type: string)
1623-
Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE
1623+
Statistics: Num rows: 41 Data size: 251 Basic stats: COMPLETE Column stats: COMPLETE
16241624
value expressions: _col1 (type: boolean)
16251625

16261626
Stage: Stage-0

0 commit comments

Comments
 (0)