Skip to content

Commit 4218877

Browse files
authored
HIVE-28637: Fix the issue of datasize becoming negative due to overflow during addition (yijiuqi, reviewed by Seonggon Namgung, Shohei Okumiya)
1 parent 3483bc3 commit 4218877

File tree

3 files changed

+416
-4
lines changed

3 files changed

+416
-4
lines changed

ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,14 +220,14 @@ public Statistics clone() {
220220
}
221221

222222
public void addBasicStats(Statistics stats) {
223-
dataSize += stats.dataSize;
224-
numRows += stats.numRows;
223+
dataSize = StatsUtils.safeAdd(dataSize, stats.dataSize);
224+
numRows = StatsUtils.safeAdd(numRows, stats.numRows);
225225
basicStatsState = inferColumnStatsState(basicStatsState, stats.basicStatsState);
226226
}
227227

228228
@Deprecated
229229
public void addToDataSize(long rds) {
230-
dataSize += rds;
230+
dataSize = StatsUtils.safeAdd(dataSize, rds);
231231
}
232232

233233
public void setColumnStats(Map<String, ColStatistics> colStats) {
@@ -255,7 +255,7 @@ public void addToColumnStats(List<ColStatistics> colStats) {
255255
if (columnStats.containsKey(key) && columnStats.get(key) != null) {
256256
updatedCS = columnStats.get(key);
257257
updatedCS.setAvgColLen(Math.max(updatedCS.getAvgColLen(), cs.getAvgColLen()));
258-
updatedCS.setNumNulls(updatedCS.getNumNulls() + cs.getNumNulls());
258+
updatedCS.setNumNulls(StatsUtils.safeAdd(updatedCS.getNumNulls(), cs.getNumNulls()));
259259
updatedCS.setCountDistint(Math.max(updatedCS.getCountDistint(), cs.getCountDistint()));
260260
columnStats.put(key, updatedCS);
261261
} else {
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
create table explain_multiple_ptf_big_table
2+
(
3+
key1 string,
4+
value_str1 string,
5+
key3 string
6+
);
7+
create table explain_multiple_ptf_big_table2
8+
(
9+
key21 string,
10+
value_str21 string,
11+
key23 string
12+
);
13+
alter table explain_multiple_ptf_big_table
14+
update statistics set('numRows' = '4611686036854775807',
15+
'rawDataSize' = '922337203685477500');
16+
alter table explain_multiple_ptf_big_table2
17+
update statistics set('numRows' = '4611686036854775807',
18+
'rawDataSize' = '9223372036854775800');
19+
explain
20+
select *,
21+
row_number() over (partition by key order by key2 desc) rn,
22+
row_number() over (partition by key2 order by key desc) rn2,
23+
max(value_str) over (partition by key2 order by key desc) max1,
24+
max(value_str) over (partition by key order by key2 desc) max3,
25+
min(value_str) over (partition by key2 order by key desc) min1,
26+
min(value_str) over (partition by key order by key2 desc) min3,
27+
last_value(value_str) over (partition by key) lv,
28+
first_value(value_str) over (partition by key2) fv,
29+
max(value_str) over (partition by key) fv21
30+
from (select key1 key, value_str1 value_str, key3 key2
31+
from explain_multiple_ptf_big_table
32+
union all
33+
select key21 key, value_str21 value_str, key23 key2
34+
from explain_multiple_ptf_big_table2) a1;
35+

0 commit comments

Comments
 (0)