Skip to content

Commit e6e71a4

Browse files
Merge pull request ClickHouse#90086 from ClickHouse/backport/25.8/90018
Backport ClickHouse#90018 to 25.8: Fix logical error caused by empty tuple column in `BSONEachRow` and `MsgPack`
2 parents 955ed4a + 7d54f13 commit e6e71a4

9 files changed

+308
-3
lines changed

src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,14 +433,20 @@ void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr &
433433

434434
assertChar(BSON_DOCUMENT_END, *in);
435435

436-
if (read_nested_columns != data_type_tuple->getElements().size())
436+
const auto elements_size = data_type_tuple->getElements().size();
437+
if (read_nested_columns != elements_size)
437438
throw Exception(
438439
ErrorCodes::INCORRECT_DATA,
439440
"Cannot parse tuple column with type {} from BSON array/embedded document field, "
440441
"the number of fields in tuple and BSON document doesn't match: {} != {}",
441442
data_type->getName(),
442-
data_type_tuple->getElements().size(),
443+
elements_size,
443444
read_nested_columns);
445+
446+
/// There are no nested columns to grow, so we must explicitly increment the column size.
447+
/// Otherwise, `column.size()` will return 0 for empty tuples columns.
448+
if (elements_size == 0)
449+
tuple_column.addSize(1);
444450
}
445451

446452
void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & data_type, BSONType bson_type)

src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,13 @@ bool MsgPackVisitor::start_array(size_t size) // NOLINT
407407

408408
ColumnTuple & column_tuple = assert_cast<ColumnTuple &>(info_stack.top().column);
409409
/// Push nested columns into stack in reverse order.
410-
for (ssize_t i = nested_types.size() - 1; i >= 0; --i)
410+
for (ssize_t i = static_cast<ssize_t>(nested_types.size()) - 1; i >= 0; --i)
411411
info_stack.push(Info{column_tuple.getColumn(i), nested_types[i], true, std::nullopt, nullptr});
412+
413+
/// There are no nested columns to grow, so we must explicitly increment the column size.
414+
/// Otherwise, `column.size()` will return 0 for empty tuples columns.
415+
if (nested_types.empty())
416+
column_tuple.addSize(1);
412417
}
413418
else
414419
{

tests/queries/0_stateless/03277_empty_tuple_formats.reference

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ RowBinary
3232
Values
3333
()
3434
BSONEachRow
35+
()
3536
MsgPack
37+
()
3638
Native
3739
()
3840
TSV
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
1 ()
2+
1 ()
3+
1 ()
4+
1 ()
5+
1 ()
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
DROP TABLE IF EXISTS t0;
2+
DROP TABLE IF EXISTS random_filename;
3+
4+
CREATE TABLE t0 (c0 Int32, c1 Tuple()) ENGINE = Memory;
5+
CREATE TABLE random_filename (name String) ENGINE = Memory;
6+
7+
INSERT INTO random_filename SELECT concat('03716_test_bson_empty_tuple_', toString(generateUUIDv4()), '.bson');
8+
9+
INSERT INTO FUNCTION file((SELECT name FROM random_filename LIMIT 1), 'BSONEachRow', 'c0 Int32, c1 Tuple()')
10+
SELECT 1, tuple() FROM numbers(5) SETTINGS engine_file_truncate_on_insert = 1;
11+
12+
INSERT INTO t0 SELECT * FROM file((SELECT name FROM random_filename LIMIT 1), 'BSONEachRow', 'c0 Int32, c1 Tuple()');
13+
14+
SELECT * FROM t0 ORDER BY c0;
15+
16+
DROP TABLE t0;
17+
DROP TABLE random_filename;
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
1 ()
2+
1 ()
3+
1 ()
4+
1 ()
5+
1 ()
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
-- Tags: no-fasttest
2+
-- no-fasttest: 'MsgPack` format is not supported
3+
4+
DROP TABLE IF EXISTS t0;
5+
DROP TABLE IF EXISTS random_filename;
6+
7+
CREATE TABLE t0 (c0 Int32, c1 Tuple()) ENGINE = Memory;
8+
CREATE TABLE random_filename (name String) ENGINE = Memory;
9+
10+
INSERT INTO random_filename SELECT concat('03716_test_msgpack_empty_tuple_', toString(generateUUIDv4()), '.msgpack');
11+
12+
INSERT INTO FUNCTION file((SELECT name FROM random_filename LIMIT 1), 'MsgPack', 'c0 Int32, c1 Tuple()')
13+
SELECT 1, tuple() FROM numbers(5) SETTINGS engine_file_truncate_on_insert = 1;
14+
15+
INSERT INTO t0 SELECT * FROM file((SELECT name FROM random_filename LIMIT 1), 'MsgPack', 'c0 Int32, c1 Tuple()');
16+
17+
SELECT * FROM t0 ORDER BY c0;
18+
19+
DROP TABLE t0;
20+
DROP TABLE random_filename;
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
Native
2+
0 ()
3+
1 ()
4+
2 ()
5+
3 ()
6+
4 ()
7+
TSV
8+
0 ()
9+
1 ()
10+
2 ()
11+
3 ()
12+
4 ()
13+
CSV
14+
0 \N
15+
1 \N
16+
2 \N
17+
3 \N
18+
4 \N
19+
TSKV
20+
0 ()
21+
1 ()
22+
2 ()
23+
3 ()
24+
4 ()
25+
JSON
26+
0 ()
27+
1 ()
28+
2 ()
29+
3 ()
30+
4 ()
31+
JSONCompact
32+
0 ()
33+
1 ()
34+
2 ()
35+
3 ()
36+
4 ()
37+
JSONEachRow
38+
0 []
39+
1 []
40+
2 []
41+
3 []
42+
4 []
43+
JSONObjectEachRow
44+
0 []
45+
1 []
46+
2 []
47+
3 []
48+
4 []
49+
JSONCompactEachRow
50+
0 []
51+
1 []
52+
2 []
53+
3 []
54+
4 []
55+
JSONColumns
56+
0 []
57+
1 []
58+
2 []
59+
3 []
60+
4 []
61+
JSONCompactColumns
62+
0 []
63+
1 []
64+
2 []
65+
3 []
66+
4 []
67+
JSONColumnsWithMetadata
68+
0 ()
69+
1 ()
70+
2 ()
71+
3 ()
72+
4 ()
73+
ORC
74+
0 ()
75+
1 ()
76+
2 ()
77+
3 ()
78+
4 ()
79+
Arrow
80+
0 ()
81+
1 ()
82+
2 ()
83+
3 ()
84+
4 ()
85+
Avro
86+
0 ()
87+
1 ()
88+
2 ()
89+
3 ()
90+
4 ()
91+
RowBinary
92+
0 ()
93+
1 ()
94+
2 ()
95+
3 ()
96+
4 ()
97+
Values
98+
0 ()
99+
1 ()
100+
2 ()
101+
3 ()
102+
4 ()
103+
BSONEachRow
104+
0 ()
105+
1 ()
106+
2 ()
107+
3 ()
108+
4 ()
109+
MsgPack
110+
0 ()
111+
1 ()
112+
2 ()
113+
3 ()
114+
4 ()
115+
Native
116+
0 ()
117+
1 ()
118+
2 ()
119+
3 ()
120+
4 ()
121+
TSV
122+
0 ()
123+
1 ()
124+
2 ()
125+
3 ()
126+
4 ()
127+
CSV
128+
0 ()
129+
1 ()
130+
2 ()
131+
3 ()
132+
4 ()
133+
TSKV
134+
0 ()
135+
1 ()
136+
2 ()
137+
3 ()
138+
4 ()
139+
JSON
140+
0 ()
141+
1 ()
142+
2 ()
143+
3 ()
144+
4 ()
145+
JSONCompact
146+
0 ()
147+
1 ()
148+
2 ()
149+
3 ()
150+
4 ()
151+
JSONEachRow
152+
0 ()
153+
1 ()
154+
2 ()
155+
3 ()
156+
4 ()
157+
JSONObjectEachRow
158+
0 ()
159+
1 ()
160+
2 ()
161+
3 ()
162+
4 ()
163+
JSONCompactEachRow
164+
0 ()
165+
1 ()
166+
2 ()
167+
3 ()
168+
4 ()
169+
JSONColumns
170+
0 ()
171+
1 ()
172+
2 ()
173+
3 ()
174+
4 ()
175+
JSONCompactColumns
176+
0 ()
177+
1 ()
178+
2 ()
179+
3 ()
180+
4 ()
181+
JSONColumnsWithMetadata
182+
0 ()
183+
1 ()
184+
2 ()
185+
3 ()
186+
4 ()
187+
ORC
188+
0 ()
189+
1 ()
190+
2 ()
191+
3 ()
192+
4 ()
193+
Arrow
194+
0 ()
195+
1 ()
196+
2 ()
197+
3 ()
198+
4 ()
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/env bash
2+
# Tags: no-fasttest
3+
4+
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
5+
# shellcheck source=../shell_config.sh
6+
. "$CUR_DIR"/../shell_config.sh
7+
8+
9+
# Test that no formats crash or return LOGICAL_ERROR on empty tuple
10+
# This test is on the same spirit as 03277_empty_tuple_formats.sh except it has additional column in addition to empty tuple.
11+
# For example this logical error: Invalid number of rows in Chunk Int32(size = 5) Tuple(size = 0) column Tuple() at position 1: expected 5, got 0
12+
# was triggered when there was another column in addition to the empty tuple. This test covers that case.
13+
14+
FILE=03277_$CLICKHOUSE_DATABASE
15+
16+
# With schema inference.
17+
for format in Native TSV CSV TSKV JSON JSONCompact JSONEachRow JSONObjectEachRow JSONCompactEachRow JSONColumns JSONCompactColumns JSONColumnsWithMetadata ORC Arrow
18+
do
19+
echo $format
20+
$CLICKHOUSE_LOCAL -q "
21+
insert into function file('$FILE', '$format') select number, () from numbers(5) settings engine_file_truncate_on_insert=1;
22+
select * from file('$FILE', '$format');"
23+
done
24+
25+
# Picky about column names.
26+
echo Avro
27+
$CLICKHOUSE_LOCAL -q "
28+
insert into function file('$FILE', 'Avro') select number as x, () as y from numbers(5) settings engine_file_truncate_on_insert=1;
29+
select * from file('$FILE', 'Avro');"
30+
31+
# Without schema inference.
32+
for format in RowBinary Values BSONEachRow MsgPack Native TSV CSV TSKV JSON JSONCompact JSONEachRow JSONObjectEachRow JSONCompactEachRow JSONColumns JSONCompactColumns JSONColumnsWithMetadata ORC Arrow
33+
do
34+
echo $format
35+
$CLICKHOUSE_LOCAL -q "
36+
insert into function file('$FILE', '$format', 'x UInt64, y Tuple()') select number as x, () as y from numbers(5) settings engine_file_truncate_on_insert=1;
37+
select * from file('$FILE', '$format', 'x UInt64, y Tuple()');"
38+
done
39+
40+
# Formats that don't support empty tuples/multiple columns.
41+
$CLICKHOUSE_LOCAL -q "
42+
insert into function file('$FILE', 'Parquet') select number, () from numbers(5) settings engine_file_truncate_on_insert=1; -- {serverError BAD_ARGUMENTS}
43+
insert into function file('$FILE', 'Npy') select number, () from numbers(5) settings engine_file_truncate_on_insert=1; -- {serverError TOO_MANY_COLUMNS}
44+
insert into function file('$FILE', 'CapnProto', 'x UInt64, y Tuple()') select number as x, () as y from numbers(5) settings engine_file_truncate_on_insert=1; -- {serverError CAPN_PROTO_BAD_CAST}
45+
insert into function file('$FILE', 'RawBLOB') select number, () from numbers(5) settings engine_file_truncate_on_insert=1; -- {serverError NOT_IMPLEMENTED}"
46+
47+
rm $FILE

0 commit comments

Comments
 (0)