Skip to content

Commit 199c441

Browse files
committed
HIVE-29368: regenerated impacted test results + added an explanation comment
1 parent 633951c commit 199c441

File tree

8 files changed

+54
-46
lines changed

8 files changed

+54
-46
lines changed

ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,15 @@ public void add(ColStatistics stat) {
4141
if (stat.getAvgColLen() > result.getAvgColLen()) {
4242
result.setAvgColLen(stat.getAvgColLen());
4343
}
44+
45+
// NDVs can only be accurately combined if full information about columns, query branches and
46+
// their relationships is available. Without that info, there is only one "truly conservative"
47+
// value of NDV which is 0, which means that the NDV is unknown. It forces optimized
48+
// to make the most conservative decisions possible, which is the exact goal of
49+
// PessimisticStatCombiner. It does inflate statistics in multiple cases, but at the same time it
50+
// also ensures than the query execution does not "blow up" due to too optimistic stats estimates
4451
result.setCountDistint(0L);
52+
4553
if (stat.getNumNulls() > result.getNumNulls()) {
4654
result.setNumNulls(stat.getNumNulls());
4755
}

ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -492,13 +492,13 @@ STAGE PLANS:
492492
minReductionHashAggr: 0.99
493493
mode: hash
494494
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
495-
Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE
495+
Statistics: Num rows: 158 Data size: 103016 Basic stats: COMPLETE Column stats: COMPLETE
496496
Reduce Output Operator
497497
key expressions: _col0 (type: string), _col1 (type: string)
498498
null sort order: zz
499499
sort order: ++
500500
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
501-
Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE
501+
Statistics: Num rows: 158 Data size: 103016 Basic stats: COMPLETE Column stats: COMPLETE
502502
value expressions: _col2 (type: int), _col3 (type: struct<count:bigint,sum:double,input:int>), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct<count:bigint,sum:double,input:int>), _col9 (type: bigint), _col10 (type: binary)
503503
Reducer 3
504504
Execution mode: vectorized, llap
@@ -508,14 +508,14 @@ STAGE PLANS:
508508
keys: KEY._col0 (type: string), KEY._col1 (type: string)
509509
mode: mergepartial
510510
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
511-
Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
511+
Statistics: Num rows: 79 Data size: 40764 Basic stats: COMPLETE Column stats: COMPLETE
512512
Select Operator
513513
expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string)
514514
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
515-
Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
515+
Statistics: Num rows: 79 Data size: 56248 Basic stats: COMPLETE Column stats: COMPLETE
516516
File Output Operator
517517
compressed: false
518-
Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
518+
Statistics: Num rows: 79 Data size: 56248 Basic stats: COMPLETE Column stats: COMPLETE
519519
table:
520520
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
521521
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

ql/src/test/results/clientpositive/llap/innerjoin1.q.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,14 +142,14 @@ STAGE PLANS:
142142
0 _col2 (type: string)
143143
1 _col2 (type: string)
144144
outputColumnNames: _col0, _col1, _col3, _col4
145-
Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
145+
Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
146146
Select Operator
147147
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
148148
outputColumnNames: _col0, _col1, _col2, _col3
149-
Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
149+
Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
150150
File Output Operator
151151
compressed: false
152-
Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
152+
Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
153153
table:
154154
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
155155
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -249,14 +249,14 @@ STAGE PLANS:
249249
0 _col2 (type: string)
250250
1 _col2 (type: string)
251251
outputColumnNames: _col0, _col1, _col3, _col4
252-
Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
252+
Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
253253
Select Operator
254254
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string)
255255
outputColumnNames: _col0, _col1, _col2, _col3
256-
Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
256+
Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
257257
File Output Operator
258258
compressed: false
259-
Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE
259+
Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE
260260
table:
261261
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
262262
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,15 @@ STAGE PLANS:
9696
minReductionHashAggr: 0.99
9797
mode: hash
9898
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
99-
Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE
99+
Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE
100100
Reduce Output Operator
101101
bucketingVersion: 2
102102
key expressions: _col0 (type: string), _col1 (type: string)
103103
null sort order: zz
104104
numBuckets: -1
105105
sort order: ++
106106
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
107-
Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE
107+
Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE
108108
tag: -1
109109
value expressions: _col2 (type: int), _col3 (type: struct<count:bigint,sum:double,input:int>), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct<count:bigint,sum:double,input:int>), _col9 (type: bigint), _col10 (type: binary)
110110
auto parallelism: true
@@ -199,18 +199,18 @@ STAGE PLANS:
199199
keys: KEY._col0 (type: string), KEY._col1 (type: string)
200200
mode: mergepartial
201201
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
202-
Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
202+
Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE
203203
Select Operator
204204
expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string)
205205
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
206-
Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
206+
Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE
207207
File Output Operator
208208
bucketingVersion: 2
209209
compressed: false
210210
GlobalTableId: 0
211211
#### A masked pattern was here ####
212212
NumFilesPerFileSink: 1
213-
Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
213+
Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE
214214
#### A masked pattern was here ####
215215
table:
216216
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -461,15 +461,15 @@ STAGE PLANS:
461461
minReductionHashAggr: 0.99
462462
mode: hash
463463
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
464-
Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE
464+
Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE
465465
Reduce Output Operator
466466
bucketingVersion: 2
467467
key expressions: _col0 (type: string), _col1 (type: string)
468468
null sort order: zz
469469
numBuckets: -1
470470
sort order: ++
471471
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
472-
Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE
472+
Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE
473473
tag: -1
474474
value expressions: _col2 (type: int), _col3 (type: struct<count:bigint,sum:double,input:int>), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct<count:bigint,sum:double,input:int>), _col9 (type: bigint), _col10 (type: binary)
475475
auto parallelism: true
@@ -564,18 +564,18 @@ STAGE PLANS:
564564
keys: KEY._col0 (type: string), KEY._col1 (type: string)
565565
mode: mergepartial
566566
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
567-
Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
567+
Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE
568568
Select Operator
569569
expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string)
570570
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
571-
Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
571+
Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE
572572
File Output Operator
573573
bucketingVersion: 2
574574
compressed: false
575575
GlobalTableId: 0
576576
#### A masked pattern was here ####
577577
NumFilesPerFileSink: 1
578-
Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE
578+
Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE
579579
#### A masked pattern was here ####
580580
table:
581581
input format: org.apache.hadoop.mapred.SequenceFileInputFormat

0 commit comments

Comments
 (0)