Skip to content

Commit e3050b4

Browse files
Merge pull request ClickHouse#90501 from ClickHouse/backport/25.8/90031
Backport ClickHouse#90031 to 25.8: Do size checks when deserializing data from aggregation states and other sources
2 parents e35cc54 + 0986e3e commit e3050b4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+435
-347
lines changed

src/AggregateFunctions/AggregateFunctionGroupArray.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <AggregateFunctions/AggregateFunctionFactory.h>
22
#include <AggregateFunctions/Helpers.h>
33
#include <AggregateFunctions/FactoryHelpers.h>
4+
#include <AggregateFunctions/KeyHolderHelpers.h>
45
#include <Interpreters/Context.h>
56
#include <Core/ServerSettings.h>
67

@@ -464,7 +465,10 @@ struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
464465
return node;
465466
}
466467

467-
void insertInto(IColumn & column) { std::ignore = column.deserializeAndInsertAggregationStateValueFromArena(data()); }
468+
void insertInto(IColumn & column)
469+
{
470+
deserializeAndInsert<false>({data(), size}, column);
471+
}
468472
};
469473

470474
template <typename Node, bool has_sampler>

src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <AggregateFunctions/FactoryHelpers.h>
2222
#include <AggregateFunctions/Helpers.h>
2323
#include <AggregateFunctions/IAggregateFunction.h>
24+
#include <AggregateFunctions/KeyHolderHelpers.h>
2425

2526
#include <memory>
2627

@@ -341,10 +342,7 @@ class AggregateFunctionGroupArrayIntersectGeneric final
341342

342343
for (auto & elem : set)
343344
{
344-
if constexpr (is_plain_column)
345-
data_to.insertData(elem.getValue().data, elem.getValue().size);
346-
else
347-
std::ignore = data_to.deserializeAndInsertAggregationStateValueFromArena(elem.getValue().data);
345+
deserializeAndInsert<is_plain_column>(elem.getValue(), data_to);
348346
}
349347
}
350348
};

src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,10 @@ struct AggregateFunctionDistinctMultipleGenericData : public AggregateFunctionDi
138138
Set::LookupResult it;
139139
bool inserted;
140140
history.emplace(ArenaKeyHolder{value, *arena}, it, inserted);
141-
const char * pos = it->getValue().data;
141+
ReadBufferFromString in({it->getValue().data, it->getValue().size});
142+
/// Multiple columns are serialized one by one
142143
for (auto & column : argument_columns)
143-
pos = column->deserializeAndInsertAggregationStateValueFromArena(pos);
144+
column->deserializeAndInsertAggregationStateValueFromArena(in);
144145
}
145146
}
146147
}

src/AggregateFunctions/KeyHolderHelpers.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
#pragma once
22

3-
#include <Common/HashTable/HashTableKeyHolder.h>
43
#include <Columns/IColumn.h>
4+
#include <Common/HashTable/HashTableKeyHolder.h>
5+
#include <IO/ReadBufferFromString.h>
56

67
namespace DB
78
{
89
struct Settings;
910

11+
namespace ErrorCodes
12+
{
13+
extern const int INCORRECT_DATA;
14+
}
15+
1016
template <bool is_plain_column = false>
1117
static auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena)
1218
{
@@ -29,7 +35,14 @@ static void deserializeAndInsert(StringRef str, IColumn & data_to)
2935
if constexpr (is_plain_column)
3036
data_to.insertData(str.data, str.size);
3137
else
32-
std::ignore = data_to.deserializeAndInsertAggregationStateValueFromArena(str.data);
38+
{
39+
ReadBufferFromString in({str.data, str.size});
40+
data_to.deserializeAndInsertAggregationStateValueFromArena(in);
41+
if (!in.eof())
42+
{
43+
throw Exception(ErrorCodes::INCORRECT_DATA, "Extra bytes ({}) found after deserializing aggregation state", in.available());
44+
}
45+
}
3346
}
3447

3548
}

src/Columns/ColumnAggregateFunction.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,7 @@ StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & are
609609
return out.complete();
610610
}
611611

612-
const char * ColumnAggregateFunction::deserializeAndInsertFromArena(const char * src_arena)
612+
void ColumnAggregateFunction::deserializeAndInsertFromArena(ReadBuffer & in)
613613
{
614614
ensureOwnership();
615615

@@ -618,21 +618,10 @@ const char * ColumnAggregateFunction::deserializeAndInsertFromArena(const char *
618618
*/
619619
Arena & dst_arena = createOrGetArena();
620620
pushBackAndCreateState(data, dst_arena, func.get());
621-
622-
/** We will read from src_arena.
623-
* There is no limit for reading - it is assumed, that we can read all that we need after src_arena pointer.
624-
* Buf ReadBufferFromMemory requires some bound. We will use arbitrary big enough number, that will not overflow pointer.
625-
* NOTE Technically, this is not compatible with C++ standard,
626-
* as we cannot legally compare pointers after last element + 1 of some valid memory region.
627-
* Probably this will not work under UBSan.
628-
*/
629-
ReadBufferFromMemory read_buffer(src_arena, std::numeric_limits<char *>::max() - src_arena - 1);
630-
func->deserialize(data.back(), read_buffer, version, &dst_arena);
631-
632-
return read_buffer.position();
621+
func->deserialize(data.back(), in, version, &dst_arena);
633622
}
634623

635-
const char * ColumnAggregateFunction::skipSerializedInArena(const char *) const
624+
void ColumnAggregateFunction::skipSerializedInArena(ReadBuffer &) const
636625
{
637626
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method skipSerializedInArena is not supported for {}", getName());
638627
}

src/Columns/ColumnAggregateFunction.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,9 @@ class ColumnAggregateFunction final : public COWHelper<IColumnHelper<ColumnAggre
173173

174174
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
175175

176-
const char * deserializeAndInsertFromArena(const char * src_arena) override;
176+
void deserializeAndInsertFromArena(ReadBuffer & in) override;
177177

178-
const char * skipSerializedInArena(const char *) const override;
178+
void skipSerializedInArena(ReadBuffer & in) const override;
179179

180180
void updateHashWithValue(size_t n, SipHash & hash) const override;
181181

src/Columns/ColumnArray.cpp

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <Common/WeakHash.h>
1818
#include <Common/HashTable/Hash.h>
1919
#include <cstring> // memcpy
20+
#include <IO/ReadHelpers.h>
2021

2122

2223
namespace DB
@@ -296,39 +297,35 @@ std::optional<size_t> ColumnArray::getSerializedValueSize(size_t n) const
296297
}
297298

298299

299-
const char * ColumnArray::deserializeAndInsertFromArena(const char * pos)
300+
void ColumnArray::deserializeAndInsertFromArena(ReadBuffer & in)
300301
{
301-
size_t array_size = unalignedLoad<size_t>(pos);
302-
pos += sizeof(array_size);
302+
size_t array_size;
303+
readBinaryLittleEndian<size_t>(array_size, in);
303304

304305
for (size_t i = 0; i < array_size; ++i)
305-
pos = getData().deserializeAndInsertFromArena(pos);
306+
getData().deserializeAndInsertFromArena(in);
306307

307308
getOffsets().push_back(getOffsets().back() + array_size);
308-
return pos;
309309
}
310310

311-
const char * ColumnArray::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
311+
void ColumnArray::deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in)
312312
{
313-
size_t array_size = unalignedLoad<size_t>(pos);
314-
pos += sizeof(array_size);
313+
size_t array_size;
314+
readBinaryLittleEndian<size_t>(array_size, in);
315315

316316
for (size_t i = 0; i < array_size; ++i)
317-
pos = getData().deserializeAndInsertAggregationStateValueFromArena(pos);
317+
getData().deserializeAndInsertAggregationStateValueFromArena(in);
318318

319319
getOffsets().push_back(getOffsets().back() + array_size);
320-
return pos;
321320
}
322321

323-
const char * ColumnArray::skipSerializedInArena(const char * pos) const
322+
void ColumnArray::skipSerializedInArena(ReadBuffer & in) const
324323
{
325-
size_t array_size = unalignedLoad<size_t>(pos);
326-
pos += sizeof(array_size);
324+
size_t array_size;
325+
readBinaryLittleEndian<size_t>(array_size, in);
327326

328327
for (size_t i = 0; i < array_size; ++i)
329-
pos = getData().skipSerializedInArena(pos);
330-
331-
return pos;
328+
getData().skipSerializedInArena(in);
332329
}
333330

334331
void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const

src/Columns/ColumnArray.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,9 @@ class ColumnArray final : public COWHelper<IColumnHelper<ColumnArray>, ColumnArr
8282
StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
8383
char * serializeValueIntoMemory(size_t, char * memory) const override;
8484
std::optional<size_t> getSerializedValueSize(size_t n) const override;
85-
const char * deserializeAndInsertFromArena(const char * pos) override;
86-
const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
87-
const char * skipSerializedInArena(const char * pos) const override;
85+
void deserializeAndInsertFromArena(ReadBuffer & in) override;
86+
void deserializeAndInsertAggregationStateValueFromArena(ReadBuffer & in) override;
87+
void skipSerializedInArena(ReadBuffer & in) const override;
8888
void updateHashWithValue(size_t n, SipHash & hash) const override;
8989
WeakHash32 getWeakHash32() const override;
9090
void updateHashFast(SipHash & hash) const override;

src/Columns/ColumnBLOB.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ class ColumnBLOB : public COWHelper<IColumnHelper<ColumnBLOB>, ColumnBLOB>
163163
void popBack(size_t) override { throwInapplicable(); }
164164
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwInapplicable(); }
165165
char * serializeValueIntoMemory(size_t, char *) const override { throwInapplicable(); }
166-
const char * deserializeAndInsertFromArena(const char *) override { throwInapplicable(); }
167-
const char * skipSerializedInArena(const char *) const override { throwInapplicable(); }
166+
void deserializeAndInsertFromArena(ReadBuffer &) override { throwInapplicable(); }
167+
void skipSerializedInArena(ReadBuffer &) const override { throwInapplicable(); }
168168
void updateHashWithValue(size_t, SipHash &) const override { throwInapplicable(); }
169169
WeakHash32 getWeakHash32() const override { throwInapplicable(); }
170170
void updateHashFast(SipHash &) const override { throwInapplicable(); }

src/Columns/ColumnCompressed.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ class ColumnCompressed : public COWHelper<IColumnHelper<ColumnCompressed>, Colum
9999
void popBack(size_t) override { throwMustBeDecompressed(); }
100100
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
101101
char * serializeValueIntoMemory(size_t, char *) const override { throwMustBeDecompressed(); }
102-
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
103-
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
102+
void deserializeAndInsertFromArena(ReadBuffer &) override { throwMustBeDecompressed(); }
103+
void skipSerializedInArena(ReadBuffer &) const override { throwMustBeDecompressed(); }
104104
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
105105
WeakHash32 getWeakHash32() const override { throwMustBeDecompressed(); }
106106
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }

0 commit comments

Comments
 (0)