Skip to content

Commit 73b4f3b

Browse files
authored
Merge pull request #1067 from Altinity/backports/24.8.14/87863
24.8.14 Backport of ClickHouse#87863: ArrowStream processing crash if non unique dictionary
2 parents 2d89291 + 0f43094 commit 73b4f3b

File tree

3 files changed

+11
-0
lines changed

3 files changed

+11
-0
lines changed

src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,16 @@ static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn(
10241024
auto tmp_lc_column = lc_type->createColumn();
10251025
auto tmp_dict_column = IColumn::mutate(assert_cast<ColumnLowCardinality *>(tmp_lc_column.get())->getDictionaryPtr());
10261026
dynamic_cast<IColumnUnique *>(tmp_dict_column.get())->uniqueInsertRangeFrom(*dict_column.column, 0, dict_column.column->size());
1027+
size_t expected_dictionary_size = dict_column.column->size() + (dict_info.default_value_index == -1) + is_lc_nullable;
1028+
if (tmp_dict_column->size() != expected_dictionary_size)
1029+
{
1030+
throw Exception(
1031+
ErrorCodes::INCORRECT_DATA,
1032+
"Expected Dictionary size {}, real Dictionary size is {}. The discrepancy probably caused by duplicated values",
1033+
expected_dictionary_size,
1034+
tmp_dict_column->size());
1035+
}
1036+
10271037
dict_column.column = std::move(tmp_dict_column);
10281038
dict_info.values = std::make_shared<ColumnWithTypeAndName>(std::move(dict_column));
10291039
dict_info.dictionary_size = arrow_dict_column->length();

tests/queries/0_stateless/02904_arrow_dictionary_indexes.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ $CLICKHOUSE_LOCAL -q "select uniqExact(a) from file('$CLICKHOUSE_TMP/$CLICKHOUSE
1818

1919
$CLICKHOUSE_LOCAL -q "select * from file('$CUR_DIR/data_arrow/different_dicts.arrowstream') order by x"
2020

21+
$CLICKHOUSE_LOCAL -q "select * from file('$CUR_DIR/data_arrow/non_unique_dict.arrowstream') -- { serverError INCORRECT_DATA }"
528 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)