Skip to content

Commit 4f04b19

Browse files
authored
Merge pull request ClickHouse#76094 from ClickHouse/backport/24.8/75512
Backport ClickHouse#75512 to 24.8: Fix check for nested elements arrays sizes in case of distributed JOIN and JSON subcolumns
2 parents a564c68 + 5a13496 commit 4f04b19

File tree

7 files changed

+75
-20
lines changed

7 files changed

+75
-20
lines changed

src/Interpreters/InterpreterInsertQuery.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <Processors/Transforms/SquashingTransform.h>
3434
#include <Processors/Transforms/PlanSquashingTransform.h>
3535
#include <Processors/Transforms/getSourceFromASTInsertQuery.h>
36+
#include <Processors/Transforms/NestedElementsValidationTransform.h>
3637
#include <Processors/QueryPlan/QueryPlan.h>
3738
#include <QueryPipeline/QueryPipelineBuilder.h>
3839
#include <Storages/StorageDistributed.h>
@@ -380,6 +381,12 @@ Chain InterpreterInsertQuery::buildPreSinkChain(
380381

381382
/// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order.
382383

384+
/// Add transform to check if the sizes of arrays - elements of nested data structures doesn't match.
385+
/// We have to make this assertion before writing to table, because storage engine may assume that they have equal sizes.
386+
/// NOTE It'd better to do this check in serialization of nested structures (in place when this assumption is required),
387+
/// but currently we don't have methods for serialization of nested structures "as a whole".
388+
out.addSource(std::make_shared<NestedElementsValidationTransform>(input_header()));
389+
383390
/// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns.
384391
if (const auto & constraints = metadata_snapshot->getConstraints(); !constraints.empty())
385392
out.addSource(std::make_shared<CheckConstraintsTransform>(

src/Interpreters/inplaceBlockConversions.cpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -257,18 +257,6 @@ static std::unordered_map<String, ColumnPtr> collectOffsetsColumns(
257257

258258
if (offsets_column->size() != current_offsets_column->size() && inside_variant_element)
259259
offsets_column = offsets_column->size() < current_offsets_column->size() ? offsets_column : current_offsets_column;
260-
#ifndef NDEBUG
261-
else
262-
{
263-
const auto & offsets_data = assert_cast<const ColumnUInt64 &>(*offsets_column).getData();
264-
const auto & current_offsets_data = assert_cast<const ColumnUInt64 &>(*current_offsets_column).getData();
265-
266-
if (offsets_data != current_offsets_data)
267-
throw Exception(ErrorCodes::LOGICAL_ERROR,
268-
"Found non-equal columns with offsets (sizes: {} and {}) for stream {}",
269-
offsets_data.size(), current_offsets_data.size(), stream_name);
270-
}
271-
#endif
272260
}
273261
}
274262
}, available_column->type, res_columns[i]);

src/Processors/Sinks/SinkToStorage.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#include <Processors/Sinks/SinkToStorage.h>
2-
#include <DataTypes/NestedUtils.h>
32

43
namespace DB
54
{
@@ -8,13 +7,6 @@ SinkToStorage::SinkToStorage(const Block & header) : ExceptionKeepingTransform(h
87

98
void SinkToStorage::onConsume(Chunk chunk)
109
{
11-
/** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match.
12-
* We have to make this assertion before writing to table, because storage engine may assume that they have equal sizes.
13-
* NOTE It'd better to do this check in serialization of nested structures (in place when this assumption is required),
14-
* but currently we don't have methods for serialization of nested structures "as a whole".
15-
*/
16-
Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns()));
17-
1810
consume(chunk);
1911
cur_chunk = std::move(chunk);
2012
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#include <Processors/Transforms/NestedElementsValidationTransform.h>
2+
#include <DataTypes/NestedUtils.h>
3+
4+
namespace DB
5+
{
6+
7+
NestedElementsValidationTransform::NestedElementsValidationTransform(const Block & header) : ISimpleTransform(header, header, false)
8+
{
9+
}
10+
11+
void NestedElementsValidationTransform::transform(Chunk & chunk)
12+
{
13+
Nested::validateArraySizes(getOutputPort().getHeader().cloneWithColumns(chunk.getColumns()));
14+
}
15+
16+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#pragma once
2+
3+
#include <Processors/ISimpleTransform.h>
4+
5+
namespace DB
6+
{
7+
8+
class NestedElementsValidationTransform : public ISimpleTransform
9+
{
10+
public:
11+
explicit NestedElementsValidationTransform(const Block & header);
12+
13+
String getName() const override { return "NestedElementsValidationTransform"; }
14+
15+
protected:
16+
void transform(Chunk & chunk) override;
17+
};
18+
19+
}
20+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
SET allow_experimental_json_type=1;
2+
SET allow_experimental_analyzer=1;
3+
4+
DROP TABLE IF EXISTS test_distr;
5+
DROP TABLE IF EXISTS test;
6+
7+
CREATE TABLE test
8+
(
9+
id Int64,
10+
data JSON(arr1 Array(String), arr2 Array(Int32))
11+
)
12+
ENGINE = MergeTree ORDER BY id;
13+
14+
15+
CREATE TABLE test_distr
16+
(
17+
id Int64,
18+
data JSON(arr1 Array(String), arr2 Array(Int32))
19+
)
20+
ENGINE = Distributed(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 'test', murmurHash2_32(id));
21+
22+
INSERT INTO test FORMAT Values (1, '{"arr1" : ["s1", "s2", "s3"], "arr2" : []}'), (2, '{"arr1" : ["s4", "s5"], "arr2" : [42]}');
23+
24+
SELECT count()
25+
FROM test_distr as left
26+
GLOBAL INNER JOIN test_distr as right on left.id = right.id
27+
WHERE has(right.data.arr1, 's3') AND has(right.data.arr2, 42);
28+
29+
DROP TABLE test_distr;
30+
DROP TABLE test;
31+

0 commit comments

Comments
 (0)