Skip to content

Commit 38ddeb0

Browse files
Backport ClickHouse#87049 to 25.8: Fix compatibility of some aggregate function states with String argument
1 parent 6008911 commit 38ddeb0

28 files changed

+435
-16
lines changed

src/AggregateFunctions/AggregateFunctionGroupArray.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -456,15 +456,15 @@ struct GroupArrayNodeGeneral : public GroupArrayNodeBase<GroupArrayNodeGeneral>
456456
static Node * allocate(const IColumn & column, size_t row_num, Arena * arena)
457457
{
458458
const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node));
459-
StringRef value = column.serializeValueIntoArena(row_num, *arena, begin);
459+
StringRef value = column.serializeAggregationStateValueIntoArena(row_num, *arena, begin);
460460

461461
Node * node = reinterpret_cast<Node *>(const_cast<char *>(begin));
462462
node->size = value.size;
463463

464464
return node;
465465
}
466466

467-
void insertInto(IColumn & column) { std::ignore = column.deserializeAndInsertFromArena(data()); }
467+
void insertInto(IColumn & column) { std::ignore = column.deserializeAndInsertAggregationStateValueFromArena(data()); }
468468
};
469469

470470
template <typename Node, bool has_sampler>

src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ class AggregateFunctionGroupArrayIntersectGeneric final
235235
else
236236
{
237237
const char * begin = nullptr;
238-
StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin);
238+
StringRef serialized = data_column->serializeAggregationStateValueIntoArena(offset + i, *arena, begin);
239239
chassert(serialized.data != nullptr);
240240
set.emplace(SerializedKeyHolder{serialized, *arena}, it, inserted);
241241
}
@@ -255,7 +255,7 @@ class AggregateFunctionGroupArrayIntersectGeneric final
255255
else
256256
{
257257
const char * begin = nullptr;
258-
StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin);
258+
StringRef serialized = data_column->serializeAggregationStateValueIntoArena(offset + i, *arena, begin);
259259
chassert(serialized.data != nullptr);
260260
it = set.find(serialized);
261261

@@ -344,7 +344,7 @@ class AggregateFunctionGroupArrayIntersectGeneric final
344344
if constexpr (is_plain_column)
345345
data_to.insertData(elem.getValue().data, elem.getValue().size);
346346
else
347-
std::ignore = data_to.deserializeAndInsertFromArena(elem.getValue().data);
347+
std::ignore = data_to.deserializeAndInsertAggregationStateValueFromArena(elem.getValue().data);
348348
}
349349
}
350350
};

src/AggregateFunctions/AggregateFunctionTopK.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ class AggregateFunctionTopKGeneric final
310310
else
311311
{
312312
const char * begin = nullptr;
313-
StringRef str_serialized = columns[0]->serializeValueIntoArena(row_num, *arena, begin);
313+
StringRef str_serialized = columns[0]->serializeAggregationStateValueIntoArena(row_num, *arena, begin);
314314
if constexpr (is_weighted)
315315
set.insert(str_serialized, columns[1]->getUInt(row_num));
316316
else

src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ struct AggregateFunctionDistinctMultipleGenericData : public AggregateFunctionDi
116116
StringRef value(begin, 0);
117117
for (size_t i = 0; i < columns_num; ++i)
118118
{
119-
auto cur_ref = columns[i]->serializeValueIntoArena(row_num, *arena, begin);
119+
auto cur_ref = columns[i]->serializeAggregationStateValueIntoArena(row_num, *arena, begin);
120120
value.data = cur_ref.data - value.size;
121121
value.size += cur_ref.size;
122122
}
@@ -140,7 +140,7 @@ struct AggregateFunctionDistinctMultipleGenericData : public AggregateFunctionDi
140140
history.emplace(ArenaKeyHolder{value, *arena}, it, inserted);
141141
const char * pos = it->getValue().data;
142142
for (auto & column : argument_columns)
143-
pos = column->deserializeAndInsertFromArena(pos);
143+
pos = column->deserializeAndInsertAggregationStateValueFromArena(pos);
144144
}
145145
}
146146
}

src/AggregateFunctions/KeyHolderHelpers.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ static auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena)
1717
else
1818
{
1919
const char * begin = nullptr;
20-
StringRef serialized = column.serializeValueIntoArena(row_num, arena, begin);
20+
StringRef serialized = column.serializeAggregationStateValueIntoArena(row_num, arena, begin);
2121
assert(serialized.data != nullptr);
2222
return SerializedKeyHolder{serialized, arena};
2323
}
@@ -29,7 +29,7 @@ static void deserializeAndInsert(StringRef str, IColumn & data_to)
2929
if constexpr (is_plain_column)
3030
data_to.insertData(str.data, str.size);
3131
else
32-
std::ignore = data_to.deserializeAndInsertFromArena(str.data);
32+
std::ignore = data_to.deserializeAndInsertAggregationStateValueFromArena(str.data);
3333
}
3434

3535
}

src/Columns/ColumnArray.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,27 @@ StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char con
243243
}
244244

245245

246+
StringRef ColumnArray::serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const
247+
{
248+
size_t array_size = sizeAt(n);
249+
size_t offset = offsetAt(n);
250+
251+
char * pos = arena.allocContinue(sizeof(array_size), begin);
252+
memcpy(pos, &array_size, sizeof(array_size));
253+
254+
StringRef res(pos, sizeof(array_size));
255+
256+
for (size_t i = 0; i < array_size; ++i)
257+
{
258+
auto value_ref = getData().serializeAggregationStateValueIntoArena(offset + i, arena, begin);
259+
res.data = value_ref.data - res.size;
260+
res.size += value_ref.size;
261+
}
262+
263+
return res;
264+
}
265+
266+
246267
char * ColumnArray::serializeValueIntoMemory(size_t n, char * memory) const
247268
{
248269
size_t array_size = sizeAt(n);
@@ -287,6 +308,18 @@ const char * ColumnArray::deserializeAndInsertFromArena(const char * pos)
287308
return pos;
288309
}
289310

311+
const char * ColumnArray::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
312+
{
313+
size_t array_size = unalignedLoad<size_t>(pos);
314+
pos += sizeof(array_size);
315+
316+
for (size_t i = 0; i < array_size; ++i)
317+
pos = getData().deserializeAndInsertAggregationStateValueFromArena(pos);
318+
319+
getOffsets().push_back(getOffsets().back() + array_size);
320+
return pos;
321+
}
322+
290323
const char * ColumnArray::skipSerializedInArena(const char * pos) const
291324
{
292325
size_t array_size = unalignedLoad<size_t>(pos);

src/Columns/ColumnArray.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,11 @@ class ColumnArray final : public COWHelper<IColumnHelper<ColumnArray>, ColumnArr
7979
bool isDefaultAt(size_t n) const override;
8080
void insertData(const char * pos, size_t length) override;
8181
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
82+
StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
8283
char * serializeValueIntoMemory(size_t, char * memory) const override;
8384
std::optional<size_t> getSerializedValueSize(size_t n) const override;
8485
const char * deserializeAndInsertFromArena(const char * pos) override;
86+
const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
8587
const char * skipSerializedInArena(const char * pos) const override;
8688
void updateHashWithValue(size_t n, SipHash & hash) const override;
8789
WeakHash32 getWeakHash32() const override;

src/Columns/ColumnLowCardinality.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,11 @@ StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena,
279279
return getDictionary().serializeValueIntoArena(getIndexes().getUInt(n), arena, begin);
280280
}
281281

282+
StringRef ColumnLowCardinality::serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const
283+
{
284+
return getDictionary().serializeAggregationStateValueIntoArena(getIndexes().getUInt(n), arena, begin);
285+
}
286+
282287
char * ColumnLowCardinality::serializeValueIntoMemory(size_t n, char * memory) const
283288
{
284289
return getDictionary().serializeValueIntoMemory(getIndexes().getUInt(n), memory);
@@ -312,6 +317,16 @@ const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * po
312317
return new_pos;
313318
}
314319

320+
const char * ColumnLowCardinality::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
321+
{
322+
compactIfSharedDictionary();
323+
324+
const char * new_pos;
325+
idx.insertPosition(getDictionary().uniqueDeserializeAndInsertAggregationStateValueFromArena(pos, new_pos));
326+
327+
return new_pos;
328+
}
329+
315330
const char * ColumnLowCardinality::skipSerializedInArena(const char * pos) const
316331
{
317332
return getDictionary().skipSerializedInArena(pos);

src/Columns/ColumnLowCardinality.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,11 +102,13 @@ class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardi
102102
void popBack(size_t n) override { idx.popBack(n); }
103103

104104
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
105+
StringRef serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
105106
char * serializeValueIntoMemory(size_t n, char * memory) const override;
106107

107108
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
108109

109110
const char * deserializeAndInsertFromArena(const char * pos) override;
111+
const char * deserializeAndInsertAggregationStateValueFromArena(const char * pos) override;
110112

111113
const char * skipSerializedInArena(const char * pos) const override;
112114

src/Columns/ColumnMap.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const
155155
return nested->serializeValueIntoArena(n, arena, begin);
156156
}
157157

158+
StringRef ColumnMap::serializeAggregationStateValueIntoArena(size_t n, Arena & arena, char const *& begin) const
159+
{
160+
return nested->serializeAggregationStateValueIntoArena(n, arena, begin);
161+
}
162+
158163
char * ColumnMap::serializeValueIntoMemory(size_t n, char * memory) const
159164
{
160165
return nested->serializeValueIntoMemory(n, memory);
@@ -170,6 +175,11 @@ const char * ColumnMap::deserializeAndInsertFromArena(const char * pos)
170175
return nested->deserializeAndInsertFromArena(pos);
171176
}
172177

178+
const char * ColumnMap::deserializeAndInsertAggregationStateValueFromArena(const char * pos)
179+
{
180+
return nested->deserializeAndInsertAggregationStateValueFromArena(pos);
181+
}
182+
173183
const char * ColumnMap::skipSerializedInArena(const char * pos) const
174184
{
175185
return nested->skipSerializedInArena(pos);

0 commit comments

Comments
 (0)