Skip to content

Commit 436f3ea

Browse files
authored
Merge pull request ClickHouse#86941 from ClickHouse/backport/25.8/86812
Backport ClickHouse#86812 to 25.8: Add addition validations in ColumnObject
2 parents 6f57cd8 + 8b36102 commit 436f3ea

File tree

5 files changed

+34
-1
lines changed

5 files changed

+34
-1
lines changed

src/Columns/ColumnObject.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,7 @@ void ColumnObject::doInsertFrom(const IColumn & src, size_t n)
631631

632632
/// Finally, insert paths from shared data.
633633
insertFromSharedDataAndFillRemainingDynamicPaths(src_object_column, std::move(src_dynamic_paths_for_shared_data), n, 1);
634+
validateDynamicPathsSizes();
634635
}
635636

636637
#if !defined(DEBUG_OR_SANITIZER_BUILD)
@@ -665,6 +666,7 @@ void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t l
665666

666667
/// Finally, insert paths from shared data.
667668
insertFromSharedDataAndFillRemainingDynamicPaths(src_object_column, std::move(src_dynamic_paths_for_shared_data), start, length);
669+
validateDynamicPathsSizes();
668670
}
669671

670672
void ColumnObject::insertFromSharedDataAndFillRemainingDynamicPaths(const DB::ColumnObject & src_object_column, std::vector<std::string_view> && src_dynamic_paths_for_shared_data, size_t start, size_t length)
@@ -726,6 +728,8 @@ void ColumnObject::insertFromSharedDataAndFillRemainingDynamicPaths(const DB::Co
726728
if (auto it = dynamic_paths_ptrs.find(path); it != dynamic_paths_ptrs.end())
727729
{
728730
/// Deserialize binary value into dynamic column from shared data.
731+
if (it->second->size() != current_size)
732+
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of dynamic path {}: {} != {}. It may indicate duplicated data for this path", path, it->second->size(), current_size);
729733
deserializeValueFromSharedData(src_shared_data_values, i, *it->second);
730734
}
731735
else
@@ -1995,4 +1999,14 @@ int ColumnObject::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_d
19951999
return 1;
19962000
}
19972001

2002+
void ColumnObject::validateDynamicPathsSizes() const
2003+
{
2004+
size_t expected_size = shared_data->size();
2005+
for (const auto & [path, column] : dynamic_paths)
2006+
{
2007+
if (column->size() != expected_size)
2008+
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of dynamic path {}: {} != {}", path, column->size(), expected_size);
2009+
}
2010+
}
2011+
19982012
}

src/Columns/ColumnObject.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,9 @@ class ColumnObject final : public COWHelper<IColumnHelper<ColumnObject>, ColumnO
279279
/// Insert all the data from shared data with specified path to dynamic column.
280280
static void fillPathColumnFromSharedData(IColumn & path_column, StringRef path, const ColumnPtr & shared_data_column, size_t start, size_t end);
281281

282+
/// Validate that all dynamic paths have correct sizes.
283+
void validateDynamicPathsSizes() const;
284+
282285
private:
283286
class SortedPathsIterator;
284287

src/DataTypes/Serializations/SerializationObject.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,7 @@ void SerializationObject::serializeBinaryBulkWithMultipleStreams(
780780
}
781781

782782
const auto & column_object = assert_cast<const ColumnObject &>(column);
783+
column_object.validateDynamicPathsSizes();
783784
const auto & typed_paths = column_object.getTypedPaths();
784785

785786
if (object_state->serialization_version.value == SerializationVersion::FLATTENED)
@@ -1035,6 +1036,8 @@ void SerializationObject::deserializeBinaryBulkWithMultipleStreams(
10351036
object_state->shared_data_serialization->deserializeBinaryBulkWithMultipleStreams(shared_data, rows_offset, limit, settings, object_state->shared_data_state, cache);
10361037
settings.path.pop_back();
10371038
settings.path.pop_back();
1039+
1040+
column_object.validateDynamicPathsSizes();
10381041
}
10391042

10401043
void SerializationObject::serializeBinary(const Field & field, WriteBuffer & ostr, const DB::FormatSettings & settings) const
@@ -1226,7 +1229,9 @@ void SerializationObject::deserializeBinary(IColumn & col, ReadBuffer & istr, co
12261229
String value;
12271230
Field field;
12281231
readParsedValueIntoString(value, istr, [&](ReadBuffer & buf){ dynamic_serialization->deserializeBinary(field, buf, settings); });
1229-
paths_and_values_for_shared_data.emplace_back(std::move(path), std::move(value));
1232+
/// Don't write nulls into shared data.
1233+
if (!field.isNull())
1234+
paths_and_values_for_shared_data.emplace_back(std::move(path), std::move(value));
12301235
}
12311236
}
12321237
else
@@ -1272,6 +1277,8 @@ void SerializationObject::deserializeBinary(IColumn & col, ReadBuffer & istr, co
12721277
if (column->size() == prev_size)
12731278
column->insertDefault();
12741279
}
1280+
1281+
column_object.validateDynamicPathsSizes();
12751282
}
12761283

12771284
SerializationPtr SerializationObject::TypedPathSubcolumnCreator::create(const DB::SerializationPtr & prev, const DataTypePtr &) const
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
00000000 00 |.|
2+
00000001
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/usr/bin/env bash
2+
3+
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
4+
# shellcheck source=../shell_config.sh
5+
. "$CUR_DIR"/../shell_config.sh
6+
7+
$CLICKHOUSE_LOCAL -q "select '{\"a\" : null}'::JSON(a Dynamic) format RowBinary" | $CLICKHOUSE_LOCAL -q "select * from table format RowBinary" --input-format RowBinary --structure "json JSON(max_dynamic_paths=0)" | hexdump -C

0 commit comments

Comments
 (0)