Skip to content

Commit c346ce9

Browse files
Merge pull request ClickHouse#88224 from ClickHouse/backport/25.8/86560
Backport ClickHouse#86560 to 25.8: Fix stack overflow in quantileDD merge
2 parents 3830a70 + 0b375d7 commit c346ce9

File tree

7 files changed

+192
-80
lines changed

7 files changed

+192
-80
lines changed

src/AggregateFunctions/DDSketch.h

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,49 @@
11
#pragma once
22

3-
#include <memory> // for std::unique_ptr
4-
#include <cmath>
5-
#include <stdexcept>
63
#include <limits>
7-
#include <iostream>
4+
#include <memory> // for std::unique_ptr
85
#include <base/types.h>
96

10-
#include <IO/ReadBuffer.h>
11-
#include <IO/WriteBuffer.h>
12-
7+
#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
138
#include <AggregateFunctions/DDSketch/Mapping.h>
149
#include <AggregateFunctions/DDSketch/Store.h>
15-
#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
10+
#include <IO/ReadBuffer.h>
11+
#include <IO/ReadHelpers.h>
12+
#include <IO/WriteBuffer.h>
13+
#include <IO/WriteHelpers.h>
14+
#include <Common/Exception.h>
1615

1716
namespace DB
1817
{
1918

2019
namespace ErrorCodes
2120
{
22-
extern const int BAD_ARGUMENTS;
23-
extern const int INCORRECT_DATA;
21+
extern const int BAD_ARGUMENTS;
22+
extern const int INCORRECT_DATA;
2423
}
2524

2625
class DDSketchDenseLogarithmic
2726
{
2827
public:
2928
explicit DDSketchDenseLogarithmic(Float64 relative_accuracy = 0.01)
30-
: mapping(std::make_unique<DDSketchLogarithmicMapping>(relative_accuracy)),
31-
store(std::make_unique<DDSketchDenseStore>()),
32-
negative_store(std::make_unique<DDSketchDenseStore>()),
33-
zero_count(0.0),
34-
count(0.0)
29+
: mapping(std::make_unique<DDSketchLogarithmicMapping>(relative_accuracy))
30+
, store(std::make_unique<DDSketchDenseStore>())
31+
, negative_store(std::make_unique<DDSketchDenseStore>())
32+
, zero_count(0.0)
33+
, count(0.0)
3534
{
3635
}
3736

38-
DDSketchDenseLogarithmic(std::unique_ptr<DDSketchLogarithmicMapping> mapping_,
39-
std::unique_ptr<DDSketchDenseStore> store_,
40-
std::unique_ptr<DDSketchDenseStore> negative_store_,
41-
Float64 zero_count_)
42-
: mapping(std::move(mapping_)),
43-
store(std::move(store_)),
44-
negative_store(std::move(negative_store_)),
45-
zero_count(zero_count_),
46-
count(store->count + negative_store->count + zero_count_)
37+
DDSketchDenseLogarithmic(
38+
std::unique_ptr<DDSketchLogarithmicMapping> mapping_,
39+
std::unique_ptr<DDSketchDenseStore> store_,
40+
std::unique_ptr<DDSketchDenseStore> negative_store_,
41+
Float64 zero_count_)
42+
: mapping(std::move(mapping_))
43+
, store(std::move(store_))
44+
, negative_store(std::move(negative_store_))
45+
, zero_count(zero_count_)
46+
, count(store->count + negative_store->count + zero_count_)
4747
{
4848
}
4949

@@ -97,7 +97,11 @@ class DDSketchDenseLogarithmic
9797
return quantile_value;
9898
}
9999

100-
void copy(const DDSketchDenseLogarithmic& other)
100+
Float64 getGamma() const { return mapping->getGamma(); }
101+
102+
Float64 getCount() const { return count; }
103+
104+
void copy(const DDSketchDenseLogarithmic & other)
101105
{
102106
Float64 rel_acc = (other.mapping->getGamma() - 1) / (other.mapping->getGamma() + 1);
103107
mapping = std::make_unique<DDSketchLogarithmicMapping>(rel_acc);
@@ -109,9 +113,9 @@ class DDSketchDenseLogarithmic
109113
count = other.count;
110114
}
111115

112-
void merge(const DDSketchDenseLogarithmic& other)
116+
void merge(const DDSketchDenseLogarithmic & other)
113117
{
114-
if (mapping->getGamma() != other.mapping->getGamma())
118+
if (*mapping != *other.mapping)
115119
{
116120
// modify the one with higher precision to match the one with lower precision
117121
if (mapping->getGamma() > other.mapping->getGamma())
@@ -147,7 +151,7 @@ class DDSketchDenseLogarithmic
147151

148152
/// NOLINTBEGIN(readability-static-accessed-through-instance)
149153

150-
void serialize(WriteBuffer& buf) const
154+
void serialize(WriteBuffer & buf) const
151155
{
152156
// Write the mapping
153157
writeBinary(enc.FlagIndexMappingBaseLogarithmic.byte, buf);
@@ -165,7 +169,7 @@ class DDSketchDenseLogarithmic
165169
writeBinary(zero_count, buf);
166170
}
167171

168-
void deserialize(ReadBuffer& buf)
172+
void deserialize(ReadBuffer & buf)
169173
{
170174
// Read the mapping
171175
UInt8 flag = 0;
@@ -219,7 +223,7 @@ class DDSketchDenseLogarithmic
219223
auto new_positive_store = std::make_unique<DDSketchDenseStore>();
220224
auto new_negative_store = std::make_unique<DDSketchDenseStore>();
221225

222-
auto remap_store = [this, &new_mapping](DDSketchDenseStore& old_store, std::unique_ptr<DDSketchDenseStore>& target_store)
226+
auto remap_store = [this, &new_mapping](DDSketchDenseStore & old_store, std::unique_ptr<DDSketchDenseStore> & target_store)
223227
{
224228
for (int i = 0; i < old_store.length(); ++i)
225229
{

src/AggregateFunctions/DDSketch/DDSketchEncoding.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#pragma once
22

3-
#include <vector>
4-
#include <stdexcept>
3+
#include <base/types.h>
54

65
/**
76
* An encoded DDSketch comprises multiple contiguous blocks (sequences of bytes).
@@ -36,7 +35,10 @@ class DDSketchEncoding
3635
{
3736
public:
3837
UInt8 byte;
39-
Flag(UInt8 t, UInt8 s) : byte(t | s) { }
38+
Flag(UInt8 t, UInt8 s)
39+
: byte(t | s)
40+
{
41+
}
4042
[[maybe_unused]] UInt8 Type() const { return byte & flagTypeMask; }
4143
[[maybe_unused]] UInt8 SubFlag() const { return byte & subFlagMask; }
4244
};

src/AggregateFunctions/DDSketch/Mapping.h

Lines changed: 27 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,29 @@
11
#pragma once
22

3-
#include <base/types.h>
43
#include <cmath>
5-
#include <stdexcept>
64
#include <limits>
5+
#include <base/types.h>
76

87
#include <IO/ReadBuffer.h>
8+
#include <IO/ReadHelpers.h>
99
#include <IO/WriteBuffer.h>
10+
#include <IO/WriteHelpers.h>
11+
#include <Common/Exception.h>
1012

1113
namespace DB
1214
{
1315

1416
namespace ErrorCodes
1517
{
16-
extern const int BAD_ARGUMENTS;
18+
extern const int BAD_ARGUMENTS;
1719
}
1820

1921
class DDSketchLogarithmicMapping
2022
{
2123
public:
2224
explicit DDSketchLogarithmicMapping(Float64 relative_accuracy_, Float64 offset_ = 0.0)
23-
: relative_accuracy(relative_accuracy_), offset(offset_)
25+
: relative_accuracy(relative_accuracy_)
26+
, offset(offset_)
2427
{
2528
if (relative_accuracy <= 0 || relative_accuracy >= 1)
2629
{
@@ -44,48 +47,40 @@ class DDSketchLogarithmicMapping
4447
return static_cast<int>(logGamma(value) + offset);
4548
}
4649

47-
Float64 value(int key) const
48-
{
49-
return lowerBound(key) * (1 + relative_accuracy);
50-
}
50+
Float64 value(int key) const { return lowerBound(key) * (1 + relative_accuracy); }
5151

52-
Float64 logGamma(Float64 value) const
53-
{
54-
return std::log(value) * multiplier;
55-
}
52+
Float64 logGamma(Float64 value) const { return std::log(value) * multiplier; }
5653

57-
Float64 powGamma(Float64 value) const
58-
{
59-
return std::exp(value / multiplier);
60-
}
54+
Float64 powGamma(Float64 value) const { return std::exp(value / multiplier); }
6155

62-
Float64 lowerBound(int index) const
63-
{
64-
return powGamma(static_cast<Float64>(index) - offset);
65-
}
56+
Float64 lowerBound(int index) const { return powGamma(static_cast<Float64>(index) - offset); }
6657

67-
Float64 getGamma() const
68-
{
69-
return gamma;
70-
}
58+
Float64 getGamma() const { return gamma; }
7159

72-
Float64 getMinPossible() const
73-
{
74-
return min_possible;
75-
}
60+
Float64 getMinPossible() const { return min_possible; }
7661

77-
[[maybe_unused]] Float64 getMaxPossible() const
62+
[[maybe_unused]] Float64 getMaxPossible() const { return max_possible; }
63+
64+
bool operator==(const DDSketchLogarithmicMapping & other) const
7865
{
79-
return max_possible;
66+
if (gamma == other.gamma)
67+
{
68+
return true;
69+
}
70+
71+
auto [min, max] = std::minmax(gamma, other.gamma);
72+
const Float64 difference = max - min;
73+
const Float64 acceptable = (std::nextafter(min, max) - min) * min;
74+
return difference <= acceptable;
8075
}
8176

82-
void serialize(WriteBuffer& buf) const
77+
void serialize(WriteBuffer & buf) const
8378
{
8479
writeBinary(gamma, buf);
8580
writeBinary(offset, buf);
8681
}
8782

88-
void deserialize(ReadBuffer& buf)
83+
void deserialize(ReadBuffer & buf)
8984
{
9085
readBinary(gamma, buf);
9186
readBinary(offset, buf);

src/AggregateFunctions/DDSketch/Store.h

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
#pragma once
22

3-
#include <base/types.h>
4-
#include <vector>
53
#include <cmath>
64
#include <limits>
5+
#include <vector>
6+
#include <base/types.h>
77

8+
#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
89
#include <IO/ReadBuffer.h>
10+
#include <IO/ReadHelpers.h>
911
#include <IO/WriteBuffer.h>
10-
#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
12+
#include <IO/WriteHelpers.h>
1113

1214

1315
// We start with 128 bins and grow the number of bins by 128
@@ -18,6 +20,11 @@ constexpr UInt32 CHUNK_SIZE = 128;
1820
namespace DB
1921
{
2022

23+
namespace ErrorCodes
24+
{
25+
extern const int INCORRECT_DATA;
26+
}
27+
2128
class DDSketchDenseStore
2229
{
2330
public:
@@ -27,9 +34,12 @@ class DDSketchDenseStore
2734
int offset = 0;
2835
std::vector<Float64> bins;
2936

30-
explicit DDSketchDenseStore(UInt32 chunk_size_ = CHUNK_SIZE) : chunk_size(chunk_size_) {}
37+
explicit DDSketchDenseStore(UInt32 chunk_size_ = CHUNK_SIZE)
38+
: chunk_size(chunk_size_)
39+
{
40+
}
3141

32-
void copy(DDSketchDenseStore* other)
42+
void copy(DDSketchDenseStore * other)
3343
{
3444
bins = other->bins;
3545
count = other->count;
@@ -38,10 +48,7 @@ class DDSketchDenseStore
3848
offset = other->offset;
3949
}
4050

41-
int length() const
42-
{
43-
return static_cast<int>(bins.size());
44-
}
51+
int length() const { return static_cast<int>(bins.size()); }
4552

4653
void add(int key, Float64 weight)
4754
{
@@ -64,9 +71,10 @@ class DDSketchDenseStore
6471
return max_key;
6572
}
6673

67-
void merge(DDSketchDenseStore* other)
74+
void merge(DDSketchDenseStore * other)
6875
{
69-
if (other->count == 0) return;
76+
if (other->count == 0)
77+
return;
7078

7179
if (count == 0)
7280
{
@@ -89,9 +97,8 @@ class DDSketchDenseStore
8997

9098
/// NOLINTBEGIN(readability-static-accessed-through-instance)
9199

92-
void serialize(WriteBuffer& buf) const
100+
void serialize(WriteBuffer & buf) const
93101
{
94-
95102
// Calculate the size of the dense and sparse encodings to choose the smallest one
96103
UInt64 num_bins = 0;
97104
UInt64 num_non_empty_bins = 0;
@@ -144,8 +151,10 @@ class DDSketchDenseStore
144151
}
145152
}
146153

147-
void deserialize(ReadBuffer& buf)
154+
void deserialize(ReadBuffer & buf)
148155
{
156+
count = 0;
157+
149158
UInt8 encoding_mode;
150159
readBinary(encoding_mode, buf);
151160
if (encoding_mode == enc.BinEncodingContiguousCounts)
@@ -165,7 +174,7 @@ class DDSketchDenseStore
165174
start_key += index_delta;
166175
}
167176
}
168-
else
177+
else if (encoding_mode == enc.BinEncodingIndexDeltasAndCounts)
169178
{
170179
UInt64 num_non_empty_bins;
171180
readVarUInt(num_non_empty_bins, buf);
@@ -180,6 +189,10 @@ class DDSketchDenseStore
180189
add(previous_index, bin_count);
181190
}
182191
}
192+
else
193+
{
194+
throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for encoding mode");
195+
}
183196
}
184197

185198
/// NOLINTEND(readability-static-accessed-through-instance)

0 commit comments

Comments
 (0)