diff --git a/be/src/core/block/block.cpp b/be/src/core/block/block.cpp index 2bb156325443e3..99c8ece8b512e1 100644 --- a/be/src/core/block/block.cpp +++ b/be/src/core/block/block.cpp @@ -79,6 +79,51 @@ template void clear_blocks(moodycamel::ConcurrentQueue&, template void clear_blocks(moodycamel::ConcurrentQueue&, RuntimeProfile::Counter* memory_used_counter); +namespace { + +// The no-clone fast path is only safe when the whole column tree is uniquely +// owned. A composite column with shared children still needs COW detachment. +bool is_recursively_exclusive(const IColumn& column) { + if (!column.is_exclusive()) { + return false; + } + + bool exclusive = true; + IColumn::ColumnCallback callback = [&](IColumn::WrappedPtr& subcolumn) { + if (!exclusive) { + return; + } + const ColumnPtr& subcolumn_ptr = const_cast(subcolumn); + DCHECK(subcolumn_ptr); + exclusive = is_recursively_exclusive(*subcolumn_ptr); + }; + // `for_each_subcolumn` only exposes a mutable callback type. This callback + // only reads the wrapped pointers and never calls the non-const accessors. + const_cast(column).for_each_subcolumn(callback); + return exclusive; +} + +// Acquire one live Block slot transactionally. Shared columns are detached while +// the original slot is still intact, so a clone failure cannot leave Block with +// a moved-from/null column. Exclusive column trees keep the stealing fast path. +MutableColumnPtr scoped_mutate_column(ColumnPtr& column, const DataTypePtr& type) { + DCHECK(type); + if (!column) { + return type->create_column(); + } + + MutableColumnPtr mutable_column; + if (is_recursively_exclusive(*column)) { + mutable_column = std::move(*column).mutate(); + } else { + mutable_column = IColumn::mutate(column); + } + column = nullptr; + return mutable_column; +} + +} // namespace + Block::Block(std::initializer_list il) : data {il} {} Block::Block(ColumnsWithTypeAndName data_) : data {std::move(data_)} {} @@ -576,12 +621,127 @@ Columns Block::get_columns_and_convert() { return columns; } -MutableColumns Block::mutate_columns() { +Block::ScopedMutableColumns::ScopedMutableColumns(Block& block) : _block(&block) { + const size_t num_columns = block.data.size(); + _columns.resize(num_columns); + size_t acquired_columns = 0; + try { + for (; acquired_columns < num_columns; ++acquired_columns) { + auto& column_with_type_and_name = block.data[acquired_columns]; + _columns[acquired_columns] = scoped_mutate_column(column_with_type_and_name.column, + column_with_type_and_name.type); + } + } catch (...) { + for (size_t i = 0; i < acquired_columns; ++i) { + block.data[i].column = std::move(_columns[i]); + } + _block = nullptr; + throw; + } +} + +Block::ScopedMutableColumns::~ScopedMutableColumns() { + restore(); +} + +Block::ScopedMutableColumns::ScopedMutableColumns(ScopedMutableColumns&& other) noexcept + : _block(std::exchange(other._block, nullptr)), _columns(std::move(other._columns)) {} + +Block::ScopedMutableColumns& Block::ScopedMutableColumns::operator=( + ScopedMutableColumns&& other) noexcept { + if (this != &other) { + restore(); + _block = std::exchange(other._block, nullptr); + _columns = std::move(other._columns); + } + return *this; +} + +const DataTypePtr& Block::ScopedMutableColumns::get_datatype_by_position(size_t position) const { + DCHECK(_block != nullptr); + return _block->get_by_position(position).type; +} + +const std::string& Block::ScopedMutableColumns::get_name_by_position(size_t position) const { + DCHECK(_block != nullptr); + return _block->get_by_position(position).name; +} + +MutableColumns Block::ScopedMutableColumns::release() { + DCHECK(_block != nullptr); + _block = nullptr; + return std::move(_columns); +} + +void Block::ScopedMutableColumns::restore() { + if (_block != nullptr) { + _block->set_columns(std::move(_columns)); + _block = nullptr; + } +} + +Block::ScopedMutableColumn::ScopedMutableColumn(Block& block, size_t position) + : _block(&block), _position(position) { + DCHECK_LT(_position, _block->data.size()); + auto& column_with_type_and_name = _block->data[_position]; + DCHECK(column_with_type_and_name.type); + _column = + scoped_mutate_column(column_with_type_and_name.column, column_with_type_and_name.type); +} + +Block::ScopedMutableColumn::~ScopedMutableColumn() { + restore(); +} + +Block::ScopedMutableColumn::ScopedMutableColumn(ScopedMutableColumn&& other) noexcept + : _block(std::exchange(other._block, nullptr)), + _position(other._position), + _column(std::move(other._column)) {} + +Block::ScopedMutableColumn& Block::ScopedMutableColumn::operator=( + ScopedMutableColumn&& other) noexcept { + if (this != &other) { + restore(); + _block = std::exchange(other._block, nullptr); + _position = other._position; + _column = std::move(other._column); + } + return *this; +} + +void Block::ScopedMutableColumn::restore() { + if (_block != nullptr) { + DCHECK_LT(_position, _block->data.size()); + _block->data[_position].column = std::move(_column); + _block = nullptr; + } +} + +Block::ScopedMutableColumns Block::mutate_columns_scoped() & { + return ScopedMutableColumns(*this); +} + +Block::ScopedMutableColumn Block::mutate_column_scoped(size_t position) & { + return ScopedMutableColumn(*this, position); +} + +ScopedMutableBlock::ScopedMutableBlock(Block* block) { + DCHECK(block != nullptr); + DataTypes data_types = block->get_data_types(); + std::vector names = block->get_names(); + auto columns_guard = block->mutate_columns_scoped(); + _mutable_block.data_types() = std::move(data_types); + _mutable_block.get_names() = std::move(names); + _mutable_block.set_mutable_columns(columns_guard.release()); + _block = block; +} + +MutableColumns Block::mutate_columns() && { size_t num_columns = data.size(); MutableColumns columns(num_columns); for (size_t i = 0; i < num_columns; ++i) { DCHECK(data[i].type); - columns[i] = data[i].column ? (*std::move(data[i].column)).mutate() + columns[i] = data[i].column ? IColumn::mutate(std::move(data[i].column)) : data[i].type->create_column(); } return columns; @@ -644,7 +804,7 @@ void Block::clear() { data.clear(); } -void Block::clear_column_data(int64_t column_size) noexcept { +void Block::clear_column_data(int64_t column_size) { SCOPED_SKIP_MEMORY_CHECK(); // data.size() greater than column_size, means here have some // function exec result in block, need erase it here @@ -655,9 +815,26 @@ void Block::clear_column_data(int64_t column_size) noexcept { } for (auto& d : data) { if (d.column) { - // Temporarily disable reference count check because a column might be referenced multiple times within a block. - // Queries like this: `select c, c from t1;` - (*std::move(d.column)).assume_mutable()->clear(); + if (d.column->is_exclusive()) { + d.column->assume_mutable()->clear(); + } else { + d.column = d.column->clone_empty(); + } + } + } +} + +void Block::clear_column_data(const std::vector& columns_to_clear) { + SCOPED_SKIP_MEMORY_CHECK(); + for (auto col : columns_to_clear) { + DCHECK_LT(col, data.size()); + auto& column = data[col].column; + if (column) { + if (column->is_exclusive()) { + column->assume_mutable()->clear(); + } else { + column = column->clone_empty(); + } } } } @@ -1085,7 +1262,13 @@ void Block::shrink_char_type_column_suffix_zero(const std::vector& char_ for (auto idx : char_type_idx) { if (idx < data.size()) { auto& col_and_name = this->get_by_position(idx); - col_and_name.column->assume_mutable()->shrink_padding_chars(); + if (col_and_name.column->is_exclusive()) { + col_and_name.column->assume_mutable()->shrink_padding_chars(); + } else { + auto mutable_col = std::move(*col_and_name.column).mutate(); + mutable_col->shrink_padding_chars(); + col_and_name.column = std::move(mutable_col); + } } } } diff --git a/be/src/core/block/block.h b/be/src/core/block/block.h index 62186b36cced7e..3b97cc0fcf86ee 100644 --- a/be/src/core/block/block.h +++ b/be/src/core/block/block.h @@ -212,8 +212,73 @@ class Block { /** Get empty columns with the same types as in block. */ MutableColumns clone_empty_columns() const; - /** Get columns from block for mutation. Columns in block will be nullptr. */ - MutableColumns mutate_columns(); + // RAII owner for mutating columns borrowed from a live Block. While the + // guard is alive, the Block's column slots are moved out and column data + // must be accessed through mutable_columns(). The guard restores columns on + // destruction, so use it when the caller may exit early after detaching. + class ScopedMutableColumns { + public: + explicit ScopedMutableColumns(Block& block); + ~ScopedMutableColumns(); + + ScopedMutableColumns(const ScopedMutableColumns&) = delete; + ScopedMutableColumns& operator=(const ScopedMutableColumns&) = delete; + ScopedMutableColumns(ScopedMutableColumns&& other) noexcept; + ScopedMutableColumns& operator=(ScopedMutableColumns&& other) noexcept; + + MutableColumns& mutable_columns() { return _columns; } + const MutableColumns& mutable_columns() const { return _columns; } + const DataTypePtr& get_datatype_by_position(size_t position) const; + const std::string& get_name_by_position(size_t position) const; + + // Transfer the borrowed owners to another RAII object that will restore + // them. After release(), the original Block remains without columns + // until that owner restores them. Normal callers should let this guard + // restore on destruction. + MutableColumns release(); + void restore(); + + private: + Block* _block = nullptr; + MutableColumns _columns; + }; + + // Single-column variant for localized mutation of a live Block slot. The + // selected slot is unavailable from the Block until this guard restores it. + class ScopedMutableColumn { + public: + ScopedMutableColumn(Block& block, size_t position); + ~ScopedMutableColumn(); + + ScopedMutableColumn(const ScopedMutableColumn&) = delete; + ScopedMutableColumn& operator=(const ScopedMutableColumn&) = delete; + ScopedMutableColumn(ScopedMutableColumn&& other) noexcept; + ScopedMutableColumn& operator=(ScopedMutableColumn&& other) noexcept; + + MutableColumnPtr& mutable_column() { return _column; } + const MutableColumnPtr& mutable_column() const { return _column; } + + void restore(); + + private: + Block* _block = nullptr; + size_t _position = 0; + MutableColumnPtr _column; + }; + + /** Get columns from a consumed block for mutation. Columns in block will be nullptr. */ + MutableColumns mutate_columns() &&; + MutableColumns mutate_columns() & = delete; + + /** Temporarily mutate a live Block's columns. The returned guard owns the columns and + * restores them on destruction; prefer this over manual move/writeback. + */ + ScopedMutableColumns mutate_columns_scoped() &; + ScopedMutableColumns mutate_columns_scoped() && = delete; + + /** Temporarily mutate one live Block column; use when only one slot needs ownership. */ + ScopedMutableColumn mutate_column_scoped(size_t position) &; + ScopedMutableColumn mutate_column_scoped(size_t position) && = delete; /** Replace columns in a block */ void set_columns(MutableColumns&& columns); @@ -224,9 +289,11 @@ class Block { // Shuffle columns in place based on the result_column_ids void shuffle_columns(const std::vector& result_column_ids); - // Default column size = -1 means clear all column in block - // Else clear column [0, column_size) delete column [column_size, data.size) - void clear_column_data(int64_t column_size = -1) noexcept; + // column_size == -1 clears all columns; otherwise clear [0, column_size) + // and drop the rest. Shared columns are detached through clone_empty(), so + // allocation or clone failures propagate. + void clear_column_data(int64_t column_size = -1); + void clear_column_data(const std::vector& columns_to_clear); MOCK_FUNCTION bool mem_reuse() { return !data.empty(); } @@ -381,25 +448,36 @@ class MutableBlock { std::vector _names; public: - static MutableBlock build_mutable_block(Block* block) { - return block == nullptr ? MutableBlock() : MutableBlock(block); + // Build from a consumed Block. This has no restore contract: the source + // Block is left without columns and must not be used as a live output block. + // For caller-owned live Blocks, use ScopedMutableBlock or + // mutate_columns_scoped() instead. + static MutableBlock build_mutable_block(Block&& block) { + return MutableBlock(std::move(block)); } + static MutableBlock build_mutable_block(std::nullptr_t) { return MutableBlock(); } + static MutableBlock build_mutable_block(Block* block) = delete; MutableBlock() = default; ~MutableBlock() = default; - - MutableBlock(Block* block) - : _columns(block->mutate_columns()), - _data_types(block->get_data_types()), - _names(block->get_names()) {} + MutableBlock(const MutableBlock&) = delete; + MutableBlock& operator=(const MutableBlock&) = delete; + MutableBlock(MutableBlock&& m_block) noexcept + : _columns(std::move(m_block._columns)), + _data_types(std::move(m_block._data_types)), + _names(std::move(m_block._names)) {} + + // Consumes block columns and converts them to mutable columns recursively. + // This constructor is for temporary/owned Blocks only. MutableBlock(Block&& block) - : _columns(block.mutate_columns()), + : _columns(std::move(block).mutate_columns()), _data_types(block.get_data_types()), _names(block.get_names()) {} - void operator=(MutableBlock&& m_block) { + MutableBlock& operator=(MutableBlock&& m_block) noexcept { _columns = std::move(m_block._columns); _data_types = std::move(m_block._data_types); _names = std::move(m_block._names); + return *this; } size_t rows() const; @@ -408,6 +486,7 @@ class MutableBlock { bool empty() const { return rows() == 0; } MutableColumns& mutable_columns() { return _columns; } + const MutableColumns& mutable_columns() const { return _columns; } void set_mutable_columns(MutableColumns&& columns) { _columns = std::move(columns); } @@ -584,7 +663,8 @@ class MutableBlock { _names.clear(); } - // columns resist. columns' inner data removed. + // Clear owned mutable columns in place. MutableBlock already owns its + // columns exclusively, so this does not perform COW detaching or cloning. void clear_column_data() noexcept; size_t allocated_bytes() const; @@ -604,6 +684,49 @@ class MutableBlock { std::string dump_names() const; }; +// RAII adapter for code that wants the MutableBlock API over a live Block. It +// owns only the temporary mutable columns and restores them to the Block on +// destruction. While the adapter is alive, read/write column data through +// mutable_block()/mutable_columns(); the Block's column slots are moved out. +class ScopedMutableBlock { +public: + ScopedMutableBlock() = delete; + explicit ScopedMutableBlock(Block* block); + ~ScopedMutableBlock() { restore(); } + + ScopedMutableBlock(const ScopedMutableBlock&) = delete; + ScopedMutableBlock& operator=(const ScopedMutableBlock&) = delete; + + ScopedMutableBlock(ScopedMutableBlock&& other) noexcept + : _block(std::exchange(other._block, nullptr)), + _mutable_block(std::move(other._mutable_block)) {} + + ScopedMutableBlock& operator=(ScopedMutableBlock&& other) noexcept { + if (this != &other) { + restore(); + _block = std::exchange(other._block, nullptr); + _mutable_block = std::move(other._mutable_block); + } + return *this; + } + + MutableBlock& mutable_block() { return _mutable_block; } + const MutableBlock& mutable_block() const { return _mutable_block; } + MutableColumns& mutable_columns() { return _mutable_block.mutable_columns(); } + const MutableColumns& mutable_columns() const { return _mutable_block.mutable_columns(); } + + void restore() { + if (_block != nullptr) { + _block->set_columns(std::move(_mutable_block.mutable_columns())); + _block = nullptr; + } + } + +private: + Block* _block = nullptr; + MutableBlock _mutable_block; +}; + struct IteratorRowRef { std::shared_ptr block; int row_pos; diff --git a/be/src/core/column/column.cpp b/be/src/core/column/column.cpp index b0056e3d4377bd..3fea47f93887ec 100644 --- a/be/src/core/column/column.cpp +++ b/be/src/core/column/column.cpp @@ -232,10 +232,11 @@ bool is_column_const(const IColumn& column) { void IColumn::check_const_only_in_top_level() const { ColumnCallback throw_if_const = [&](WrappedPtr& column) { - if (is_column_const(*column)) { + const ColumnPtr& col = const_cast(column); + if (is_column_const(*col)) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "const column is not allowed to be nested, but got {}", - column->get_name()); + col->get_name()); } }; const_cast(this)->for_each_subcolumn(throw_if_const); diff --git a/be/src/core/column/column.h b/be/src/core/column/column.h index d20ecc9d820846..6a443b6cac6d92 100644 --- a/be/src/core/column/column.h +++ b/be/src/core/column/column.h @@ -579,18 +579,29 @@ class IColumn : public COW { return false; } + // Recursively make a mutable column tree. Use this rvalue member when the + // current column object is being consumed. Shared nodes are cloned, while + // exclusive nodes are reused through the COW fast path. MutablePtr mutate() const&& { MutablePtr res = shallow_mutate(); - res->for_each_subcolumn( - [](WrappedPtr& subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); + res->for_each_subcolumn([](WrappedPtr& subcolumn) { + static_cast(subcolumn) = + std::move(*static_cast(subcolumn)).mutate(); + }); return res; } + // COW entry point for a ColumnPtr. Passing the pointer by value keeps the + // original owner alive until the top-level detach succeeds; passing + // std::move(ptr) explicitly consumes that owner. Subcolumns are still + // recursively detached as needed. static MutablePtr mutate(Ptr ptr) { MutablePtr res = ptr->shallow_mutate(); /// Now use_count is 2. ptr.reset(); /// Reset use_count to 1. - res->for_each_subcolumn( - [](WrappedPtr& subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); + res->for_each_subcolumn([](WrappedPtr& subcolumn) { + static_cast(subcolumn) = + std::move(*static_cast(subcolumn)).mutate(); + }); return res; } diff --git a/be/src/core/column/column_array.cpp b/be/src/core/column/column_array.cpp index 6de4d96cc326f7..e1d3e42e5451f0 100644 --- a/be/src/core/column/column_array.cpp +++ b/be/src/core/column/column_array.cpp @@ -47,6 +47,41 @@ class SipHash; namespace doris { +namespace { + +const ColumnArray::ColumnOffsets& check_array_offsets_column(const IColumn& offsets_column) { + const auto* offsets_concrete = typeid_cast(&offsets_column); + if (!offsets_concrete) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, "offsets_column must be a ColumnUInt64"); + __builtin_unreachable(); + } + return *offsets_concrete; +} + +void validate_array_offsets(const IColumn& nested_column, const IColumn& offsets_column) { + const auto& offsets_concrete = check_array_offsets_column(offsets_column); + if (!offsets_concrete.empty()) { + auto last_offset = offsets_concrete.get_data().back(); + + /// This will also prevent possible overflow in offset. + if (nested_column.size() != last_offset) { + throw doris::Exception( + ErrorCode::INTERNAL_ERROR, + "nested_column's size {}, is not consistent with offsets_column's {}", + nested_column.size(), last_offset); + } + } +} + +void check_empty_array_data_without_offsets(const IColumn& nested_column) { + if (!nested_column.empty()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "Not empty data passed to ColumnArray, but no offsets passed"); + __builtin_unreachable(); + } +} + +} // namespace ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& offsets_column) : data(std::move(nested_column)), offsets(std::move(offsets_column)) { @@ -63,24 +98,8 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& of // } // #endif check_const_only_in_top_level(); - const auto* offsets_concrete = typeid_cast(offsets.get()); - - if (!offsets_concrete) { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, "offsets_column must be a ColumnUInt64"); - __builtin_unreachable(); - } - - if (!offsets_concrete->empty() && data) { - auto last_offset = offsets_concrete->get_data().back(); - - /// This will also prevent possible overflow in offset. - if (data->size() != last_offset) { - throw doris::Exception( - ErrorCode::INTERNAL_ERROR, - "nested_column's size {}, is not consistent with offsets_column's {}", - data->size(), last_offset); - } - } + validate_array_offsets(*static_cast(data), + *static_cast(offsets)); /** NOTE * Arrays with constant value are possible and used in implementation of higher order functions (see FunctionReplicate). @@ -90,14 +109,18 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column, MutableColumnPtr&& of ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nested_column)) { data = data->convert_to_full_column_if_const(); - if (!data->empty()) { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, - "Not empty data passed to ColumnArray, but no offsets passed"); - __builtin_unreachable(); - } + check_empty_array_data_without_offsets(*data); offsets = ColumnOffsets::create(); } +ColumnArray::ColumnArray(SharedTag, ColumnPtr nested_column, ColumnPtr offsets_column) { + static_cast(data) = std::move(nested_column); + static_cast(offsets) = std::move(offsets_column); + check_const_only_in_top_level(); + validate_array_offsets(*static_cast(data), + *static_cast(offsets)); +} + void ColumnArray::shrink_padding_chars() { data->shrink_padding_chars(); } diff --git a/be/src/core/column/column_array.h b/be/src/core/column/column_array.h index c11547bdbf5e2d..06eb3d2123e6a5 100644 --- a/be/src/core/column/column_array.h +++ b/be/src/core/column/column_array.h @@ -75,6 +75,10 @@ class ColumnArray final : public COWHelper { /** Create an empty column of arrays with the type of values as in the column `nested_column` */ explicit ColumnArray(MutableColumnPtr&& nested_column); + /** Create an array column with shared (possibly non-exclusive) nested column and offsets. */ + struct SharedTag {}; + ColumnArray(SharedTag, ColumnPtr nested_column, ColumnPtr offsets_column); + ColumnArray(const ColumnArray&) = default; ColumnArray() = default; @@ -92,18 +96,27 @@ class ColumnArray final : public COWHelper { Offsets64; public: - /** Create immutable column using immutable arguments. This arguments may be shared with other columns. - * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. + /** Create a column from immutable/shared subcolumns without cloning them. + * Call IColumn::mutate before modifying the returned column tree. */ using Base = COWHelper; static MutablePtr create(const ColumnPtr& nested_column, const ColumnPtr& offsets_column) { - return ColumnArray::create(nested_column->assume_mutable(), - offsets_column->assume_mutable()); + // Construct with shared columns preserved (no cloning), as create(ColumnPtr) is designed + // to accept immutable/shared arguments per the COW contract. + return Base::create(SharedTag {}, nested_column, offsets_column); } static MutablePtr create(const ColumnPtr& nested_column) { - return ColumnArray::create(nested_column->assume_mutable()); + // Construct with shared columns preserved (no cloning), as create(ColumnPtr) is designed + // to accept immutable/shared arguments per the COW contract. + if (!nested_column->empty()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "Not empty data passed to ColumnArray, but no offsets passed"); + __builtin_unreachable(); + } + ColumnPtr empty_offsets = ColumnOffsets::create(); + return Base::create(SharedTag {}, nested_column, std::move(empty_offsets)); } template empty() != create_with_empty) { + const IColumn& col = get_data_column(); + if (col.empty() != create_with_empty) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Incorrect size of nested column in constructor of ColumnConst: {}, " "create_with_empty: {}.", - data->size(), create_with_empty); + col.size(), create_with_empty); } - if (data->size() != 1 && !create_with_empty) { + if (col.size() != 1 && !create_with_empty) { throw doris::Exception( ErrorCode::INTERNAL_ERROR, "Incorrect size of nested column in constructor of ColumnConst: {}, must be 1.", - data->size()); + col.size()); } } @@ -108,7 +109,10 @@ void ColumnConst::get_permutation(bool /*reverse*/, size_t /*limit*/, int /*nan_ } void ColumnConst::replace_float_special_values() { - data->replace_float_special_values(); + // COW: get exclusive ownership of data before mutating + auto mutable_data = IColumn::mutate(std::move(static_cast(data))); + mutable_data->replace_float_special_values(); + data = std::move(mutable_data); } std::pair check_column_const_set_readability(const IColumn& column, diff --git a/be/src/core/column/column_const.h b/be/src/core/column/column_const.h index 92a86628526384..7f648ece468dd1 100644 --- a/be/src/core/column/column_const.h +++ b/be/src/core/column/column_const.h @@ -240,7 +240,8 @@ class ColumnConst final : public COWHelper { bool has_enough_capacity(const IColumn& src) const override { return true; } int compare_at(size_t, size_t, const IColumn& rhs, int nan_direction_hint) const override { - auto rhs_const_column = assert_cast(rhs); + const auto& rhs_const_column = + assert_cast(rhs); const auto* this_nullable = check_and_get_column(data.get()); const auto* rhs_nullable = @@ -321,7 +322,11 @@ class ColumnConst final : public COWHelper { size_t deserialize_impl(const char* pos) override { ++s; - return data->deserialize_impl(pos); + ColumnPtr owned = std::move(static_cast(data)); + auto mutable_data = IColumn::mutate(std::move(owned)); + size_t ret = mutable_data->deserialize_impl(pos); + data = std::move(mutable_data); + return ret; } void replace_float_special_values() override; diff --git a/be/src/core/column/column_fixed_length_object.h b/be/src/core/column/column_fixed_length_object.h index 0a00aa0bcf9e8e..3789eeb868d150 100644 --- a/be/src/core/column/column_fixed_length_object.h +++ b/be/src/core/column/column_fixed_length_object.h @@ -119,6 +119,10 @@ class ColumnFixedLengthObject final : public COWHelper(&_data[n * _item_size]), _item_size}; } + StringRef get_raw_data() const override { + return {reinterpret_cast(_data.data()), _data.size()}; + } + void insert(const Field& x) override { DCHECK_EQ(x.get().length(), _item_size); insert_data(x.get().data(), _item_size); diff --git a/be/src/core/column/column_map.cpp b/be/src/core/column/column_map.cpp index 48db377d888b75..7ad4cb522ef9d5 100644 --- a/be/src/core/column/column_map.cpp +++ b/be/src/core/column/column_map.cpp @@ -41,40 +41,68 @@ class SipHash; namespace doris { +namespace { -/** A column of map values. - */ -std::string ColumnMap::get_name() const { - return "Map(" + keys_column->get_name() + ", " + values_column->get_name() + ")"; +const ColumnMap::COffsets& check_map_offsets_column(const IColumn& offsets_column) { + const auto* offsets_concrete = check_and_get_column(offsets_column); + if (!offsets_concrete) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, "offsets_column must be a ColumnUInt64"); + __builtin_unreachable(); + } + return *offsets_concrete; } -ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values, MutableColumnPtr&& offsets) - : keys_column(std::move(keys)), - values_column(std::move(values)), - offsets_column(std::move(offsets)) { - check_const_only_in_top_level(); - const auto* offsets_concrete = assert_cast(offsets_column.get()); +void validate_map_columns(const IColumn& keys, const IColumn& values, const IColumn& offsets) { + const auto& offsets_concrete = check_map_offsets_column(offsets); - if (!offsets_concrete->empty() && keys_column && values_column) { - auto last_offset = offsets_concrete->get_data().back(); + if (!offsets_concrete.empty()) { + auto last_offset = offsets_concrete.get_data().back(); /// This will also prevent possible overflow in offset. - if (keys_column->size() != last_offset) { + if (keys.size() != last_offset) { DCHECK(0); throw doris::Exception( doris::ErrorCode::INTERNAL_ERROR, "offsets_column size {} has data inconsistent with key_column {}", last_offset, - keys_column->size()); + keys.size()); } - if (values_column->size() != last_offset) { + if (values.size() != last_offset) { throw doris::Exception( doris::ErrorCode::INTERNAL_ERROR, "offsets_column size {} has data inconsistent with value_column {}", - last_offset, values_column->size()); + last_offset, values.size()); } } } +} // namespace + +/** A column of map values. + */ +std::string ColumnMap::get_name() const { + return "Map(" + keys_column->get_name() + ", " + values_column->get_name() + ")"; +} + +ColumnMap::ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values, MutableColumnPtr&& offsets) + : keys_column(std::move(keys)), + values_column(std::move(values)), + offsets_column(std::move(offsets)) { + check_const_only_in_top_level(); + validate_map_columns(*static_cast(keys_column), + *static_cast(values_column), + *static_cast(offsets_column)); +} + +ColumnMap::ColumnMap(SharedTag, ColumnPtr keys, ColumnPtr values, ColumnPtr offsets) { + static_cast(keys_column) = std::move(keys); + static_cast(values_column) = std::move(values); + static_cast(offsets_column) = std::move(offsets); + check_const_only_in_top_level(); + validate_map_columns(*static_cast(keys_column), + *static_cast(values_column), + *static_cast(offsets_column)); +} + // todo. here to resize every row map MutableColumnPtr ColumnMap::clone_resized(size_t to_size) const { auto res = ColumnMap::create(get_keys().clone_empty(), get_values().clone_empty(), @@ -518,35 +546,45 @@ void ColumnMap::insert_range_from_ignore_overflow(const IColumn& src, size_t sta } ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint) const { - auto k_arr = - ColumnArray::create(keys_column->assume_mutable(), offsets_column->assume_mutable()) - ->filter(filt, result_size_hint); - auto v_arr = - ColumnArray::create(values_column->assume_mutable(), offsets_column->assume_mutable()) - ->filter(filt, result_size_hint); + auto k_arr = ColumnArray::create(static_cast(keys_column), + static_cast(offsets_column)) + ->filter(filt, result_size_hint); + auto v_arr = ColumnArray::create(static_cast(values_column), + static_cast(offsets_column)) + ->filter(filt, result_size_hint); return ColumnMap::create(assert_cast(*k_arr).get_data_ptr(), assert_cast(*v_arr).get_data_ptr(), assert_cast(*k_arr).get_offsets_ptr()); } size_t ColumnMap::filter(const Filter& filter) { - MutableColumnPtr copied_off = offsets_column->clone_empty(); - copied_off->insert_range_from(*offsets_column, 0, offsets_column->size()); - ColumnArray::create(keys_column->assume_mutable(), offsets_column->assume_mutable()) - ->filter(filter); - ColumnArray::create(values_column->assume_mutable(), copied_off->assume_mutable()) - ->filter(filter); - return get_offsets().size(); + // Move subcolumns out of this ColumnMap to get exclusive ownership, then write back. + auto keys_mut = IColumn::mutate(std::move(static_cast(keys_column))); + auto offsets_mut = IColumn::mutate(std::move(static_cast(offsets_column))); + auto values_mut = IColumn::mutate(std::move(static_cast(values_column))); + // Clone offsets for values (both keys and values share the same offsets structure) + MutableColumnPtr copied_off = offsets_mut->clone_empty(); + copied_off->insert_range_from(*offsets_mut, 0, offsets_mut->size()); + auto k_arr = ColumnArray::create(std::move(keys_mut), std::move(offsets_mut)); + k_arr->filter(filter); + auto v_arr = ColumnArray::create(std::move(values_mut), std::move(copied_off)); + v_arr->filter(filter); + // Put filtered subcolumns back + static_cast(keys_column) = k_arr->get_data_ptr(); + static_cast(offsets_column) = k_arr->get_offsets_ptr(); + static_cast(values_column) = v_arr->get_data_ptr(); + // Use const access to avoid assume_mutable_ref() on the just-written-back offsets_column + // (k_arr still holds a ref, so use_count > 1 until k_arr goes out of scope) + return static_cast(offsets_column)->size(); } MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const { - // Make a temp column array - auto k_arr = - ColumnArray::create(keys_column->assume_mutable(), offsets_column->assume_mutable()) - ->permute(perm, limit); - auto v_arr = - ColumnArray::create(values_column->assume_mutable(), offsets_column->assume_mutable()) - ->permute(perm, limit); + auto k_arr = ColumnArray::create(static_cast(keys_column), + static_cast(offsets_column)) + ->permute(perm, limit); + auto v_arr = ColumnArray::create(static_cast(values_column), + static_cast(offsets_column)) + ->permute(perm, limit); return ColumnMap::create(assert_cast(*k_arr).get_data_ptr(), assert_cast(*v_arr).get_data_ptr(), @@ -554,23 +592,38 @@ MutableColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const } Status ColumnMap::deduplicate_keys(bool recursive) { - const auto inner_rows = keys_column->size(); - const auto rows = offsets_column->size(); + const IColumn& ck = *static_cast(keys_column); + const IColumn& co = *static_cast(offsets_column); + const auto inner_rows = ck.size(); + const auto rows = co.size(); if (recursive) { - auto values_column_ = values_column; - if (values_column_->is_nullable()) { - values_column_ = (assert_cast(*values_column)).get_nested_column_ptr(); - } - - if (auto* values_map = check_and_get_column(values_column_.get())) { - RETURN_IF_ERROR(values_map->deduplicate_keys(recursive)); + const auto& values_ptr = static_cast(values_column); + if (const auto* nullable_values = check_and_get_column(values_ptr.get())) { + if (check_and_get_column(nullable_values->get_nested_column_ptr().get())) { + auto values_mut = + IColumn::mutate(std::move(static_cast(values_column))); + auto& nullable_values_mut = assert_cast(*values_mut); + auto nested_values_mut = + IColumn::mutate(static_cast(nullable_values_mut) + .get_nested_column_ptr()); + auto& nested_values_map = assert_cast(*nested_values_mut); + RETURN_IF_ERROR(nested_values_map.deduplicate_keys(recursive)); + ColumnPtr nested_values_ptr = std::move(nested_values_mut); + nullable_values_mut.change_nested_column(nested_values_ptr); + static_cast(values_column) = std::move(values_mut); + } + } else if (check_and_get_column(values_ptr.get())) { + auto values_mut = IColumn::mutate(std::move(static_cast(values_column))); + auto& values_map = assert_cast(*values_mut); + RETURN_IF_ERROR(values_map.deduplicate_keys(recursive)); + static_cast(values_column) = std::move(values_mut); } } DorisVector serialized_keys(inner_rows); - const size_t max_one_row_byte_size = keys_column->get_max_row_byte_size(); + const size_t max_one_row_byte_size = ck.get_max_row_byte_size(); size_t total_bytes = max_one_row_byte_size * inner_rows; Arena pool; @@ -579,7 +632,7 @@ Status ColumnMap::deduplicate_keys(bool recursive) { // reach mem limit, don't serialize in batch const char* begin = nullptr; for (size_t i = 0; i != inner_rows; ++i) { - serialized_keys[i] = keys_column->serialize_value_into_arena(i, pool, begin); + serialized_keys[i] = ck.serialize_value_into_arena(i, pool, begin); } } else { auto* serialized_key_buffer = reinterpret_cast(pool.alloc(total_bytes)); @@ -590,7 +643,7 @@ Status ColumnMap::deduplicate_keys(bool recursive) { serialized_keys[i].size = 0; } - keys_column->serialize(serialized_keys.data(), inner_rows); + ck.serialize(serialized_keys.data(), inner_rows); } auto new_offsets = COffsets::create(); @@ -598,7 +651,7 @@ Status ColumnMap::deduplicate_keys(bool recursive) { auto& new_offsets_data = new_offsets->get_data(); IColumn::Filter filter(inner_rows, 1); - auto& offsets = get_offsets(); + const auto& offsets = static_cast(this)->get_offsets(); Offset64 offset = 0; bool has_duplicated_key = false; @@ -636,8 +689,12 @@ Status ColumnMap::deduplicate_keys(bool recursive) { if (has_duplicated_key) { offsets_column = std::move(new_offsets); - keys_column->filter(filter); - values_column->filter(filter); + auto keys_mut = IColumn::mutate(std::move(static_cast(keys_column))); + keys_mut->filter(filter); + static_cast(keys_column) = std::move(keys_mut); + auto values_mut = IColumn::mutate(std::move(static_cast(values_column))); + values_mut->filter(filter); + static_cast(values_column) = std::move(values_mut); } return Status::OK(); diff --git a/be/src/core/column/column_map.h b/be/src/core/column/column_map.h index 12f8fe4f8184ab..fa67caa654e25a 100644 --- a/be/src/core/column/column_map.h +++ b/be/src/core/column/column_map.h @@ -53,16 +53,16 @@ class Arena; */ class ColumnMap final : public COWHelper { public: - /** Create immutable column using immutable arguments. This arguments may be shared with other columns. - * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. + /** Create a column from immutable/shared subcolumns without cloning them. + * Call IColumn::mutate before modifying the returned column tree. */ using Base = COWHelper; using COffsets = ColumnArray::ColumnOffsets; + struct SharedTag {}; static MutablePtr create(const ColumnPtr& keys, const ColumnPtr& values, const ColumnPtr& offsets) { - return ColumnMap::create(keys->assume_mutable(), values->assume_mutable(), - offsets->assume_mutable()); + return Base::create(SharedTag {}, keys, values, offsets); } template { WrappedPtr offsets_column; // offset ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values, MutableColumnPtr&& offsets); + ColumnMap(SharedTag, ColumnPtr keys, ColumnPtr values, ColumnPtr offsets); ColumnMap(const ColumnMap&) = default; }; diff --git a/be/src/core/column/column_nullable.cpp b/be/src/core/column/column_nullable.cpp index 95b186fe894b69..ed6f5865543621 100644 --- a/be/src/core/column/column_nullable.cpp +++ b/be/src/core/column/column_nullable.cpp @@ -28,6 +28,30 @@ #include "exec/sort/sort_block.h" namespace doris { +namespace { + +const ColumnUInt8& check_nullable_null_map_column(const IColumn& null_map) { + const auto* concrete = check_and_get_column(null_map); + if (!concrete) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "ColumnNullable null map must be ColumnUInt8, but got {}", + null_map.get_name()); + __builtin_unreachable(); + } + return *concrete; +} + +void check_nullable_sizes(const IColumn& nested_column, const IColumn& null_map) { + const auto& null_map_concrete = check_nullable_null_map_column(null_map); + if (nested_column.size() != null_map_concrete.size()) { + throw doris::Exception( + ErrorCode::INTERNAL_ERROR, + "Size of nested column {} with size {} is not equal to size of null map {}", + nested_column.get_name(), nested_column.size(), null_map_concrete.size()); + } +} + +} // namespace ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnPtr&& null_map_) : _nested_column(std::move(nested_column_)), _null_map(std::move(null_map_)) { @@ -45,6 +69,40 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnP "ColumnNullable cannot have constant null map"); __builtin_unreachable(); } + check_nullable_sizes(*static_cast(_nested_column), + *static_cast(_null_map)); +} + +ColumnNullable::ColumnNullable(SharedTag, ColumnPtr nested_column_, ColumnPtr null_map_) { + check_nullable_sizes(*nested_column_, *null_map_); + + if (const auto* nullable_nested = check_and_get_column(nested_column_.get())) { + auto merged_null_map = null_map_->clone_empty(); + merged_null_map->insert_range_from(*null_map_, 0, null_map_->size()); + auto& merged_null_map_data = assert_cast(*merged_null_map).get_data(); + const auto& nested_null_map_data = nullable_nested->get_null_map_data(); + DCHECK_EQ(merged_null_map_data.size(), nested_null_map_data.size()); + for (size_t i = 0; i != merged_null_map_data.size(); ++i) { + merged_null_map_data[i] |= nested_null_map_data[i]; + } + + static_cast(_nested_column) = nullable_nested->get_nested_column_ptr(); + static_cast(_null_map) = std::move(merged_null_map); + } else { + static_cast(_nested_column) = std::move(nested_column_); + static_cast(_null_map) = std::move(null_map_); + } + + check_const_only_in_top_level(); + check_nullable_sizes(*static_cast(_nested_column), + *static_cast(_null_map)); +} + +void ColumnNullable::replace_columns(ColumnPtr nested_column, ColumnPtr null_map) { + check_nullable_sizes(*nested_column, *null_map); + static_cast(_nested_column) = std::move(nested_column); + static_cast(_null_map) = std::move(null_map); + check_const_only_in_top_level(); } void ColumnNullable::shrink_padding_chars() { @@ -113,7 +171,14 @@ void ColumnNullable::update_crc32c_batch(uint32_t* __restrict hashes, const auto* __restrict real_null_data = get_null_map_column().get_data().data(); if (_nested_column->support_replace_column_null_data()) { // nullmap process is slow, replace null data to default value to avoid nullmap process - _nested_column->assume_mutable()->replace_column_null_data(real_null_data); + // This is an intentional in-place mutation inside a logically-const hash computation: + // null positions are overwritten with defaults so the inner hash loop needs no null checks. + // The invariant is that a column instance is not hashed concurrently through the same + // owner while this per-block hash path runs. Shared aliases are detached by mutate() + // before this normalized nested column is written back. + auto nested_mut = std::move(*static_cast(_nested_column)).mutate(); + nested_mut->replace_column_null_data(real_null_data); + static_cast(const_cast(_nested_column)) = std::move(nested_mut); _nested_column->update_crc32c_batch(hashes, nullptr); } else { auto s = size(); @@ -373,12 +438,15 @@ size_t ColumnNullable::filter(const Filter& filter) { Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) { auto* nullable_col_ptr = assert_cast(col_ptr); - WrappedPtr nest_col_ptr = nullable_col_ptr->_nested_column; + // Access the nested column via const path to avoid assume_mutable_ref (which requires + // exclusive ownership). The output col_ptr was just created, so its nested column is exclusive. + IColumn* nest_col_raw = const_cast( + static_cast(nullable_col_ptr->_nested_column).get()); /// `get_null_map_data` will set `_need_update_has_null` to true auto& res_nullmap = nullable_col_ptr->get_null_map_data(); - RETURN_IF_ERROR(get_nested_column().filter_by_selector(sel, sel_size, nest_col_ptr.get())); + RETURN_IF_ERROR(get_nested_column().filter_by_selector(sel, sel_size, nest_col_raw)); DCHECK(res_nullmap.empty()); res_nullmap.resize(sel_size); auto& cur_nullmap = get_null_map_column().get_data(); diff --git a/be/src/core/column/column_nullable.h b/be/src/core/column/column_nullable.h index a31df0937d2b61..025e37976732e3 100644 --- a/be/src/core/column/column_nullable.h +++ b/be/src/core/column/column_nullable.h @@ -55,16 +55,17 @@ class ColumnNullable final : public COWHelper { friend class COWHelper; ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnPtr&& null_map_); + struct SharedTag {}; + ColumnNullable(SharedTag, ColumnPtr nested_column_, ColumnPtr null_map_); ColumnNullable(const ColumnNullable&) = default; public: - /** Create immutable column using immutable arguments. This arguments may be shared with other columns. - * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. + /** Create a column from immutable/shared subcolumns without cloning them. + * Call IColumn::mutate before modifying the returned column tree. */ using Base = COWHelper; static MutablePtr create(const ColumnPtr& nested_column_, const ColumnPtr& null_map_) { - return ColumnNullable::create(nested_column_->assume_mutable(), - null_map_->assume_mutable()); + return Base::create(SharedTag {}, nested_column_, null_map_); } template @@ -269,6 +270,8 @@ class ColumnNullable final : public COWHelper { // used in schema change void change_nested_column(ColumnPtr& other) { ((ColumnPtr&)_nested_column) = other; } + void replace_columns(ColumnPtr nested_column, ColumnPtr null_map); + /// Return the column that represents values. IColumn& get_nested_column() { return *_nested_column; } const IColumn& get_nested_column() const { return *_nested_column; } diff --git a/be/src/core/column/column_varbinary.h b/be/src/core/column/column_varbinary.h index 673059194face5..caad77e28ad44f 100644 --- a/be/src/core/column/column_varbinary.h +++ b/be/src/core/column/column_varbinary.h @@ -44,7 +44,12 @@ class ColumnVarbinary final : public COWHelper { private: ColumnVarbinary() = default; ColumnVarbinary(const size_t n) : _data(n) {} - ColumnVarbinary(const ColumnVarbinary& src) : _data(src._data.begin(), src._data.end()) {} + ColumnVarbinary(const ColumnVarbinary& src) { + _data.reserve(src._data.size()); + for (const auto& value : src._data) { + insert_data(value.data(), value.size()); + } + } public: std::string get_name() const override { return "ColumnVarbinary"; } diff --git a/be/src/core/column/column_variant.cpp b/be/src/core/column/column_variant.cpp index 2ab04c80b861a8..37921e26989f86 100644 --- a/be/src/core/column/column_variant.cpp +++ b/be/src/core/column/column_variant.cpp @@ -484,7 +484,7 @@ MutableColumnPtr ColumnVariant::apply_for_columns(Func&& func) const { auto& finalized_object = assert_cast(*finalized); return finalized_object.apply_for_columns(std::forward(func)); } - auto new_root = func(get_root())->assume_mutable(); + auto new_root = std::move(*func(get_root())).mutate(); auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode, get_root_type(), std::move(new_root)); for (const auto& subcolumn : subcolumns) { @@ -492,16 +492,16 @@ MutableColumnPtr ColumnVariant::apply_for_columns(Func&& func) const { continue; } auto new_subcolumn = func(subcolumn->data.get_finalized_column_ptr()); - if (!res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable(), + if (!res->add_sub_column(subcolumn->path, std::move(*new_subcolumn).mutate(), subcolumn->data.get_least_common_type())) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "add path {} is error", subcolumn->path.get_path()); } } auto sparse_column = func(serialized_sparse_column); - res->serialized_sparse_column = sparse_column->assume_mutable(); + res->serialized_sparse_column = IColumn::mutate(std::move(sparse_column)); auto doc_value_column = func(serialized_doc_value_column); - res->serialized_doc_value_column = doc_value_column->assume_mutable(); + res->serialized_doc_value_column = IColumn::mutate(std::move(doc_value_column)); res->num_rows = res->serialized_sparse_column->size(); ENABLE_CHECK_CONSISTENCY(res.get()); return res; @@ -942,6 +942,10 @@ bool ColumnVariant::Subcolumn::is_null_at(size_t n) const { } ind -= part->size(); } + // Remaining rows are pending lazy defaults (current_num_of_defaults suffix). + if (ind < current_num_of_defaults) { + return true; + } throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); } @@ -972,6 +976,11 @@ void ColumnVariant::Subcolumn::get(size_t n, FieldWithDataType& res) const { ind -= part->size(); } + // Remaining rows are pending lazy defaults (current_num_of_defaults suffix). + if (ind < current_num_of_defaults) { + res = FieldWithDataType(Field()); + return; + } throw doris::Exception(ErrorCode::OUT_OF_BOUND, "Index ({}) for getting field is out of range", n); } @@ -2059,14 +2068,13 @@ Status ColumnVariant::serialize_sparse_columns( /// directly as NestedGroup data by the writer (VariantColumnWriterImpl). void ColumnVariant::unnest(Subcolumns::NodePtr& entry, Subcolumns& res_subcolumns) const { entry->data.finalize(); - auto nested_column = entry->data.get_finalized_column_ptr()->assume_mutable(); + auto nested_column = std::move(*entry->data.get_finalized_column_ptr()).mutate(); auto* nested_column_nullable = assert_cast(nested_column.get()); auto* nested_column_array = - assert_cast(nested_column_nullable->get_nested_column_ptr().get()); + assert_cast(&nested_column_nullable->get_nested_column()); auto& offset = nested_column_array->get_offsets_ptr(); - auto* nested_object_nullable = assert_cast( - nested_column_array->get_data_ptr()->assume_mutable().get()); + auto* nested_object_nullable = assert_cast(&nested_column_array->get_data()); auto& nested_object_column = assert_cast(nested_object_nullable->get_nested_column()); PathInData nested_path = entry->path; @@ -2082,13 +2090,18 @@ void ColumnVariant::unnest(Subcolumns::NodePtr& entry, Subcolumns& res_subcolumn path_builder.append(nested_entry->path.get_parts(), true); auto subnested_column = ColumnArray::create( ColumnNullable::create(nested_entry->data.get_finalized_column_ptr(), - nested_object_nullable->get_null_map_column_ptr()), + static_cast(nested_object_nullable) + ->get_null_map_column() + .get_ptr()), offset); - auto nullable_subnested_column = ColumnNullable::create( - std::move(subnested_column), nested_column_nullable->get_null_map_column_ptr()); + auto nullable_subnested_column = + ColumnNullable::create(std::move(subnested_column), + static_cast(nested_column_nullable) + ->get_null_map_column() + .get_ptr()); auto type = make_nullable( std::make_shared(nested_entry->data.least_common_type.get())); - Subcolumn subcolumn(nullable_subnested_column->assume_mutable(), type, is_nullable); + Subcolumn subcolumn(std::move(nullable_subnested_column), type, is_nullable); res_subcolumns.add(path_builder.build(), subcolumn); } } @@ -2101,7 +2114,24 @@ void ColumnVariant::clear_sparse_column() { } #endif - serialized_sparse_column->clear(); + serialized_sparse_column = ColumnPtr(create_binary_column_fn()); +} + +void ColumnVariant::ensure_binary_columns_rows() { + auto resize_if_empty = [this](WrappedPtr& column) { + const auto& const_column = static_cast(column); + if (const_column->size() == num_rows) { + return; + } + CHECK(const_column->empty()) + << "ColumnVariant binary column size mismatch, rows: " << num_rows + << ", column rows: " << const_column->size(); + auto mutable_column = IColumn::mutate(std::move(static_cast(column))); + mutable_column->resize(num_rows); + column = std::move(mutable_column); + }; + resize_if_empty(serialized_sparse_column); + resize_if_empty(serialized_doc_value_column); } Status ColumnVariant::convert_typed_path_to_storage_type( @@ -2216,6 +2246,7 @@ Status ColumnVariant::pick_subcolumns_to_sparse_column( } void ColumnVariant::finalize(FinalizeMode mode) { + ensure_binary_columns_rows(); if (is_finalized() && mode == FinalizeMode::READ_MODE) { _prev_positions.clear(); ENABLE_CHECK_CONSISTENCY(this); @@ -2263,6 +2294,7 @@ void ColumnVariant::finalize(FinalizeMode mode) { std::swap(subcolumns, new_subcolumns); _prev_positions.clear(); + ensure_binary_columns_rows(); ENABLE_CHECK_CONSISTENCY(this); } @@ -2313,7 +2345,7 @@ ColumnPtr ColumnVariant::filter(const Filter& filter, ssize_t count) const { ENABLE_CHECK_CONSISTENCY(res.get()); return res; } - auto new_root = get_root()->filter(filter, count)->assume_mutable(); + auto new_root = std::move(*get_root()->filter(filter, count)).mutate(); auto new_column = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode, get_root_type(), std::move(new_root)); for (const auto& entry : subcolumns) { @@ -2321,7 +2353,7 @@ ColumnPtr ColumnVariant::filter(const Filter& filter, ssize_t count) const { continue; } auto subcolumn = entry->data.get_finalized_column().filter(filter, -1); - new_column->add_sub_column(entry->path, subcolumn->assume_mutable(), + new_column->add_sub_column(entry->path, std::move(*subcolumn).mutate(), entry->data.get_least_common_type()); } new_column->serialized_sparse_column = serialized_sparse_column->filter(filter, count); @@ -2368,8 +2400,10 @@ void ColumnVariant::clear() { // we must keep root column exist empty.create_root(Subcolumn(0, is_nullable, true)); std::swap(empty, subcolumns); - serialized_sparse_column->clear(); - serialized_doc_value_column->clear(); + // Reassign to fresh empty columns to avoid requiring exclusive ownership. + // The existing columns may be shared (use_count > 1) so we cannot clear them in-place. + serialized_sparse_column = ColumnPtr(create_binary_column_fn()); + serialized_doc_value_column = ColumnPtr(create_binary_column_fn()); num_rows = 0; _prev_positions.clear(); ENABLE_CHECK_CONSISTENCY(this); @@ -2769,10 +2803,26 @@ void ColumnVariant::fill_path_column_from_sparse_data(Subcolumn& subcolumn, Null MutableColumnPtr ColumnVariant::clone() const { auto res = ColumnVariant::create(_max_subcolumns_count, _enable_doc_mode); + // Copy typed_path_count and nested_path_count so the subcolumn limit logic is consistent. + res->typed_path_count = typed_path_count; + res->nested_path_count = nested_path_count; Subcolumns new_subcolumns; for (const auto& subcolumn : subcolumns) { - auto new_subcolumn = subcolumn->data; - if (subcolumn->data.is_root) { + // Struct-copy all metadata (num_rows, num_of_defaults_in_prefix, + // current_num_of_defaults, data_types, etc.), then deep-clone data WrappedPtrs. + Subcolumn new_subcolumn = subcolumn->data; + for (auto& wp : new_subcolumn.data) { + static_cast(wp) = + std::move(*static_cast(wp)).mutate(); + } + // Flush pending lazy defaults into actual data so that the cloned subcolumn + // is self-consistent (current_num_of_defaults == 0 after clone). + if (new_subcolumn.current_num_of_defaults > 0) { + size_t pending = new_subcolumn.current_num_of_defaults; + new_subcolumn.current_num_of_defaults = 0; + new_subcolumn.insert_many_defaults(pending); + } + if (subcolumn->data.is_root || subcolumn->path.empty()) { new_subcolumns.create_root(std::move(new_subcolumn)); } else if (!new_subcolumns.add(subcolumn->path, std::move(new_subcolumn))) { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "add path {} is error in clone()", @@ -2783,13 +2833,8 @@ MutableColumnPtr ColumnVariant::clone() const { throw doris::Exception(ErrorCode::INTERNAL_ERROR, "root is nullptr in clone()"); } res->subcolumns = std::move(new_subcolumns); - auto&& column = serialized_sparse_column->get_ptr(); - auto sparse_column = std::move(*column).mutate(); - res->serialized_sparse_column = sparse_column->assume_mutable(); - - auto&& new_doc_value_column = serialized_doc_value_column->get_ptr(); - auto doc_value_column = std::move(*new_doc_value_column).mutate(); - res->serialized_doc_value_column = doc_value_column->assume_mutable(); + res->serialized_sparse_column = IColumn::mutate(serialized_sparse_column->get_ptr()); + res->serialized_doc_value_column = IColumn::mutate(serialized_doc_value_column->get_ptr()); res->set_num_rows(num_rows); ENABLE_CHECK_CONSISTENCY(res.get()); diff --git a/be/src/core/column/column_variant.h b/be/src/core/column/column_variant.h index 16ced2f529118f..1ae92afd54cccc 100644 --- a/be/src/core/column/column_variant.h +++ b/be/src/core/column/column_variant.h @@ -325,7 +325,7 @@ class ColumnVariant final : public COWHelper { if (subcolumns.empty()) { return nullptr; } - return subcolumns.get_mutable_root()->data.get_finalized_column_ptr()->assume_mutable(); + return std::move(*subcolumns.get_mutable_root()->data.get_finalized_column_ptr()).mutate(); } void serialize_one_row_to_string(int64_t row, std::string* output, @@ -354,6 +354,8 @@ class ColumnVariant final : public COWHelper { void clear_sparse_column(); + void ensure_binary_columns_rows(); + // root is null or type nothing bool is_null_root() const; @@ -409,8 +411,12 @@ class ColumnVariant final : public COWHelper { ColumnPtr get_sparse_column() const { return serialized_sparse_column; } + IColumn& get_sparse_column_mutable() { return *serialized_sparse_column; } + ColumnPtr get_doc_value_column() const { return serialized_doc_value_column; } + IColumn& get_doc_value_column_mutable() { return *serialized_doc_value_column; } + // use sparse_subcolumns_schema to record sparse column's path info and type static MutableColumnPtr create_binary_column_fn() { return ColumnMap::create(ColumnString::create(), ColumnString::create(), diff --git a/be/src/core/cow.h b/be/src/core/cow.h index fcac631aa83ce1..4fb6059a1fc111 100644 --- a/be/src/core/cow.h +++ b/be/src/core/cow.h @@ -25,6 +25,9 @@ #include #include +#include "common/exception.h" +#include "common/status.h" + namespace doris { /** Copy-on-write shared ptr. @@ -313,9 +316,24 @@ class COW { public: MutablePtr mutate() const&& { return shallow_mutate(); } - MutablePtr assume_mutable() const { return const_cast(this)->get_ptr(); } + // Ownership assertion for callers that have already proved this object is + // uniquely owned. This does not detach shared owners; use a type-specific + // COW entry point (for example IColumn::mutate) when the pointer may be + // shared. + MutablePtr assume_mutable() const { + if (this->use_count() > 1) { + throw Exception(ErrorCode::INTERNAL_ERROR, "COW::assume_mutable: use_count() > 1"); + } + return const_cast(this)->get_ptr(); + } - Derived& assume_mutable_ref() const { return const_cast(*derived()); } + // Reference variant of assume_mutable(), with the same ownership contract. + Derived& assume_mutable_ref() const { + if (this->use_count() > 1) { + throw Exception(ErrorCode::INTERNAL_ERROR, "COW::assume_mutable: use_count() > 1"); + } + return const_cast(*derived()); + } protected: /// It works as immutable_ptr if it is const and as mutable_ptr if it is non const. @@ -448,4 +466,4 @@ class COWHelper : public Base { return MutablePtr(static_cast(Base::shallow_mutate().get())); } }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/core/data_type/data_type_array.cpp b/be/src/core/data_type/data_type_array.cpp index 1c0ef786e77ea0..38bb9711347e61 100644 --- a/be/src/core/data_type/data_type_array.cpp +++ b/be/src/core/data_type/data_type_array.cpp @@ -120,15 +120,18 @@ const char* DataTypeArray::deserialize(const char* buf, MutableColumnPtr* column buf = deserialize_const_flag_and_row_num(buf, column, &real_have_saved_num); auto* data_column = assert_cast(origin_column); - auto& offsets = data_column->get_offsets(); // offsets + auto offsets_column = std::move(*data_column->get_offsets_ptr()).mutate(); + auto& offsets = assert_cast(*offsets_column).get_data(); offsets.resize(real_have_saved_num); memcpy(offsets.data(), buf, sizeof(ColumnArray::Offset64) * real_have_saved_num); buf += sizeof(ColumnArray::Offset64) * real_have_saved_num; // children - auto nested_column = data_column->get_data_ptr()->assume_mutable(); + auto nested_column = std::move(*data_column->get_data_ptr()).mutate(); buf = get_nested_type()->deserialize(buf, &nested_column, be_exec_version); + data_column->get_offsets_ptr() = std::move(offsets_column); + data_column->get_data_ptr() = std::move(nested_column); return buf; } diff --git a/be/src/core/data_type/data_type_map.cpp b/be/src/core/data_type/data_type_map.cpp index 0932bf47c218bd..c0292526701531 100644 --- a/be/src/core/data_type/data_type_map.cpp +++ b/be/src/core/data_type/data_type_map.cpp @@ -129,16 +129,20 @@ const char* DataTypeMap::deserialize(const char* buf, MutableColumnPtr* column, buf = deserialize_const_flag_and_row_num(buf, column, &real_have_saved_num); auto* map_column = assert_cast(origin_column); - auto& map_offsets = map_column->get_offsets(); // offsets + auto offsets_column = std::move(*map_column->get_offsets_ptr()).mutate(); + auto& map_offsets = assert_cast(*offsets_column).get_data(); map_offsets.resize(real_have_saved_num); memcpy(map_offsets.data(), buf, sizeof(ColumnArray::Offset64) * real_have_saved_num); buf += sizeof(ColumnArray::Offset64) * real_have_saved_num; // key value - auto nested_keys_column = map_column->get_keys_ptr()->assume_mutable(); - auto nested_values_column = map_column->get_values_ptr()->assume_mutable(); + auto nested_keys_column = std::move(*map_column->get_keys_ptr()).mutate(); + auto nested_values_column = std::move(*map_column->get_values_ptr()).mutate(); buf = get_key_type()->deserialize(buf, &nested_keys_column, be_exec_version); buf = get_value_type()->deserialize(buf, &nested_values_column, be_exec_version); + map_column->get_offsets_ptr() = std::move(offsets_column); + map_column->get_keys_ptr() = std::move(nested_keys_column); + map_column->get_values_ptr() = std::move(nested_values_column); return buf; } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/core/data_type/data_type_nullable.cpp b/be/src/core/data_type/data_type_nullable.cpp index 365dd86c4ee154..77250f5cdbe54a 100644 --- a/be/src/core/data_type/data_type_nullable.cpp +++ b/be/src/core/data_type/data_type_nullable.cpp @@ -107,24 +107,30 @@ const char* DataTypeNullable::deserialize(const char* buf, MutableColumnPtr* col size_t real_have_saved_num = 0; buf = deserialize_const_flag_and_row_num(buf, column, &real_have_saved_num); - auto* col = assert_cast(origin_column); - // null flags auto mem_size = real_have_saved_num * sizeof(bool); - col->get_null_map_data().resize(real_have_saved_num); + auto* col = assert_cast(origin_column); + // A nullable column can be exclusive while its subcolumns are still shared + // after a shallow COW clone. Detach both owner slots before writing into them. + const auto& const_col = *col; + auto nested = std::move(*const_col.get_nested_column_ptr()).mutate(); + auto null_map = std::move(*const_col.get_null_map_column_ptr()).mutate(); + auto& null_map_data = assert_cast(*null_map).get_data(); + + null_map_data.resize(real_have_saved_num); if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { - memcpy(col->get_null_map_data().data(), buf, mem_size); + memcpy(null_map_data.data(), buf, mem_size); buf += mem_size; } else { size_t encode_size = unaligned_load(buf); buf += sizeof(size_t); // Throw exception if mem_size is large than UINT32_MAX - streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(col->get_null_map_data().data()), + streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(null_map_data.data()), cast_set(upper_int32(mem_size))); buf += encode_size; } - // column data values - auto nested = col->get_nested_column_ptr(); + buf = nested_data_type->deserialize(buf, &nested, be_exec_version); + col->replace_columns(std::move(nested), std::move(null_map)); return buf; } diff --git a/be/src/core/data_type/data_type_struct.cpp b/be/src/core/data_type/data_type_struct.cpp index 44cdadd3e98387..ed1e56c51c5365 100644 --- a/be/src/core/data_type/data_type_struct.cpp +++ b/be/src/core/data_type/data_type_struct.cpp @@ -214,8 +214,9 @@ const char* DataTypeStruct::deserialize(const char* buf, MutableColumnPtr* colum auto* struct_column = assert_cast(origin_column); DCHECK(elems.size() == struct_column->tuple_size()); for (size_t i = 0; i < elems.size(); ++i) { - auto child_column = struct_column->get_column_ptr(i)->assume_mutable(); + auto child_column = std::move(*struct_column->get_column_ptr(i)).mutate(); buf = elems[i]->deserialize(buf, &child_column, be_exec_version); + struct_column->get_column_ptr(i) = std::move(child_column); } return buf; } diff --git a/be/src/exec/common/arrow_column_to_doris_column.cpp b/be/src/exec/common/arrow_column_to_doris_column.cpp index cd6e959596791b..645376ee12d7a7 100644 --- a/be/src/exec/common/arrow_column_to_doris_column.cpp +++ b/be/src/exec/common/arrow_column_to_doris_column.cpp @@ -100,10 +100,12 @@ Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arr Status arrow_column_to_doris_column(const arrow::Array* arrow_column, size_t arrow_batch_cur_idx, ColumnPtr& doris_column, const DataTypePtr& type, size_t num_elements, const cctz::time_zone& ctz) { - RETURN_IF_ERROR(type->get_serde()->read_column_from_arrow( - doris_column->assume_mutable_ref(), arrow_column, arrow_batch_cur_idx, - arrow_batch_cur_idx + num_elements, ctz)); - return Status::OK(); + auto mutable_column = IColumn::mutate(std::move(doris_column)); + auto status = type->get_serde()->read_column_from_arrow( + *mutable_column, arrow_column, arrow_batch_cur_idx, arrow_batch_cur_idx + num_elements, + ctz); + doris_column = std::move(mutable_column); + return status; } } // namespace doris diff --git a/be/src/exec/common/data_gen_functions/vnumbers_tvf.cpp b/be/src/exec/common/data_gen_functions/vnumbers_tvf.cpp index d1d4f91270f409..f36afc9f611b25 100644 --- a/be/src/exec/common/data_gen_functions/vnumbers_tvf.cpp +++ b/be/src/exec/common/data_gen_functions/vnumbers_tvf.cpp @@ -49,7 +49,7 @@ Status VNumbersTVF::get_next(RuntimeState* state, Block* block, bool* eos) { // now only support one column for tvf numbers for (int i = 0; i < _slot_num; ++i) { if (mem_reuse) { - columns[i] = std::move(*(block->get_by_position(i).column)).mutate(); + columns[i] = IColumn::mutate(std::move(block->get_by_position(i).column)); } else { columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column(); } @@ -73,7 +73,7 @@ Status VNumbersTVF::get_next(RuntimeState* state, Block* block, bool* eos) { } if (mem_reuse) { - columns.clear(); + block->set_columns(std::move(columns)); } else { size_t n_columns = 0; for (const auto* slot_desc : _tuple_desc->slots()) { diff --git a/be/src/exec/common/hash_table/hash_map_context.h b/be/src/exec/common/hash_table/hash_map_context.h index ebd303c66c2b16..5e590ac7789109 100644 --- a/be/src/exec/common/hash_table/hash_map_context.h +++ b/be/src/exec/common/hash_table/hash_map_context.h @@ -955,7 +955,7 @@ struct MethodKeysFixed : public MethodBase { const auto* nullmap = assert_cast(*nullmap_columns[j]).get_data().data(); // make sure null cell is filled by 0x0 - key_columns[j]->assume_mutable()->replace_column_null_data(nullmap); + const_cast(key_columns[j])->replace_column_null_data(nullmap); } auto* __restrict current = result_data + offset; for (size_t i = 0; i < row_numbers; ++i) { diff --git a/be/src/exec/common/partition_sort_utils.cpp b/be/src/exec/common/partition_sort_utils.cpp index 09f834532d5940..ed042b1686dcbe 100644 --- a/be/src/exec/common/partition_sort_utils.cpp +++ b/be/src/exec/common/partition_sort_utils.cpp @@ -28,13 +28,15 @@ Status PartitionBlocks::append_block_by_selector(const Block* input_block, bool _blocks.push_back(Block::create_unique( VectorizedUtils::create_empty_block(_partition_sort_info->_row_desc))); } - auto columns = input_block->get_columns(); - auto mutable_columns = _blocks.back()->mutate_columns(); - DCHECK(columns.size() == mutable_columns.size()); - for (int i = 0; i < mutable_columns.size(); ++i) { - columns[i]->append_data_by_selector(mutable_columns[i], _selector); + { + auto columns = input_block->get_columns(); + auto mutable_columns_guard = _blocks.back()->mutate_columns_scoped(); + auto& mutable_columns = mutable_columns_guard.mutable_columns(); + DCHECK(columns.size() == mutable_columns.size()); + for (int i = 0; i < mutable_columns.size(); ++i) { + columns[i]->append_data_by_selector(mutable_columns[i], _selector); + } } - _blocks.back()->set_columns(std::move(mutable_columns)); _init_rows = _init_rows - selector_rows; _current_input_rows = _current_input_rows + selector_rows; _selector.clear(); diff --git a/be/src/exec/common/util.hpp b/be/src/exec/common/util.hpp index a729142ce92239..477005f709d003 100644 --- a/be/src/exec/common/util.hpp +++ b/be/src/exec/common/util.hpp @@ -36,22 +36,24 @@ class VectorizedUtils { // Block block; return create_columns_with_type_and_name(row_desc); } - static MutableBlock build_mutable_mem_reuse_block(Block* block, const RowDescriptor& row_desc) { + static ScopedMutableBlock build_scoped_mutable_mem_reuse_block(Block* block, + const RowDescriptor& row_desc) { if (!block->mem_reuse()) { MutableBlock tmp(VectorizedUtils::create_columns_with_type_and_name(row_desc)); block->swap(tmp.to_block()); } - return MutableBlock::build_mutable_block(block); + return ScopedMutableBlock(block); } - static MutableBlock build_mutable_mem_reuse_block(Block* block, const Block& other) { + static ScopedMutableBlock build_scoped_mutable_mem_reuse_block(Block* block, + const Block& other) { if (!block->mem_reuse()) { MutableBlock tmp(other.clone_empty()); block->swap(tmp.to_block()); } - return MutableBlock::build_mutable_block(block); + return ScopedMutableBlock(block); } - static MutableBlock build_mutable_mem_reuse_block(Block* block, - const std::vector& slots) { + static ScopedMutableBlock build_scoped_mutable_mem_reuse_block( + Block* block, const std::vector& slots) { if (!block->mem_reuse()) { size_t column_size = slots.size(); MutableColumns columns(column_size); @@ -65,7 +67,7 @@ class VectorizedUtils { slot_desc->col_name())); } } - return MutableBlock(block); + return ScopedMutableBlock(block); } static ColumnsWithTypeAndName create_columns_with_type_and_name(const RowDescriptor& row_desc) { diff --git a/be/src/exec/common/variant_util.cpp b/be/src/exec/common/variant_util.cpp index 39e8f236ecd16e..6008e3ac2bff51 100644 --- a/be/src/exec/common/variant_util.cpp +++ b/be/src/exec/common/variant_util.cpp @@ -355,7 +355,7 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co auto variant = ColumnVariant::create(data_type_object.variant_max_subcolumns_count(), data_type_object.enable_doc_mode()); - variant->create_root(arg.type, arg.column->assume_mutable()); + variant->create_root(arg.type, std::move(*arg.column).mutate()); ColumnPtr nullable = ColumnNullable::create( variant->get_ptr(), assert_cast(arg.column.get())->get_null_map_column_ptr()); @@ -2048,9 +2048,8 @@ void parse_json_to_variant_impl(IColumn& column, const char* src, size_t length, } } column_variant.incr_num_rows(); - auto sparse_column = column_variant.get_sparse_column(); - if (sparse_column->size() == old_num_rows) { - sparse_column->assume_mutable()->insert_default(); + if (column_variant.get_sparse_column()->size() == old_num_rows) { + column_variant.get_sparse_column_mutable().insert_default(); } #ifndef NDEBUG column_variant.check_consistency(); @@ -2147,10 +2146,15 @@ Status _parse_and_materialize_variant_columns(Block& block, for (size_t i = 0; i < variant_pos.size(); ++i) { auto column_ref = block.get_by_position(variant_pos[i]).column; bool is_nullable = column_ref->is_nullable(); - MutableColumnPtr var_column = column_ref->assume_mutable(); + MutableColumnPtr owner_column = std::move(*column_ref).mutate(); + ColumnPtr nullable_null_map; + MutableColumnPtr var_column; if (is_nullable) { - const auto& nullable = assert_cast(*column_ref); - var_column = nullable.get_nested_column_ptr()->assume_mutable(); + const auto& nullable = assert_cast(*owner_column); + nullable_null_map = nullable.get_null_map_column_ptr(); + var_column = std::move(*nullable.get_nested_column_ptr()).mutate(); + } else { + var_column = std::move(owner_column); } auto& var = assert_cast(*var_column); var_column->finalize(); @@ -2194,15 +2198,13 @@ Status _parse_and_materialize_variant_columns(Block& block, auto expected_root_type = make_nullable(std::make_shared()); var.ensure_root_node_type(expected_root_type); - variant_column = var.assume_mutable(); + variant_column = std::move(var_column); } // Wrap variant with nullmap if it is nullable ColumnPtr result = variant_column->get_ptr(); if (is_nullable) { - const auto& null_map = - assert_cast(*column_ref).get_null_map_column_ptr(); - result = ColumnNullable::create(result, null_map); + result = ColumnNullable::create(result, nullable_null_map); } block.get_by_position(variant_pos[i]).column = result; } diff --git a/be/src/exec/exchange/local_exchanger.cpp b/be/src/exec/exchange/local_exchanger.cpp index 620aae737050d6..c83a2c9cecb63a 100644 --- a/be/src/exec/exchange/local_exchanger.cpp +++ b/be/src/exec/exchange/local_exchanger.cpp @@ -146,9 +146,12 @@ void ShuffleExchanger::close(SourceInfo&& source_info) { Status ShuffleExchanger::get_block(RuntimeState* state, Block* block, bool* eos, Profile&& profile, SourceInfo&& source_info) { PartitionedBlock partitioned_block; - MutableBlock mutable_block; - - auto get_data = [&]() -> Status { + if (_dequeue_data(source_info.local_state, partitioned_block, eos, block, + source_info.channel_id)) { + SCOPED_TIMER(profile.copy_data_timer); + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( + block, partitioned_block.first->_data_block); + auto& mutable_block = scoped_mutable_block.mutable_block(); do { const auto* offset_start = partitioned_block.second.row_idxs->data() + partitioned_block.second.offset_start; @@ -158,15 +161,6 @@ Status ShuffleExchanger::get_block(RuntimeState* state, Block* block, bool* eos, } while (mutable_block.rows() < state->batch_size() && !*eos && _dequeue_data(source_info.local_state, partitioned_block, eos, block, source_info.channel_id)); - return Status::OK(); - }; - - if (_dequeue_data(source_info.local_state, partitioned_block, eos, block, - source_info.channel_id)) { - SCOPED_TIMER(profile.copy_data_timer); - mutable_block = VectorizedUtils::build_mutable_mem_reuse_block( - block, partitioned_block.first->_data_block); - RETURN_IF_ERROR(get_data()); } return Status::OK(); } @@ -212,7 +206,7 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const std::vectorsize() > 0); + DCHECK(shuffle_idx_to_instance_idx && !shuffle_idx_to_instance_idx->empty()); const auto& map = *shuffle_idx_to_instance_idx; int32_t enqueue_rows = 0; for (const auto& it : map) { @@ -419,8 +413,9 @@ Status BroadcastExchanger::get_block(RuntimeState* state, Block* block, bool* eo if (_dequeue_data(source_info.local_state, partitioned_block, eos, block, source_info.channel_id)) { SCOPED_TIMER(profile.copy_data_timer); - MutableBlock mutable_block = VectorizedUtils::build_mutable_mem_reuse_block( + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( block, partitioned_block.first->_data_block); + auto& mutable_block = scoped_mutable_block.mutable_block(); auto block_wrapper = partitioned_block.first; RETURN_IF_ERROR(mutable_block.add_rows(&block_wrapper->_data_block, partitioned_block.second.offset_start, @@ -540,9 +535,12 @@ Status AdaptivePassthroughExchanger::get_block(RuntimeState* state, Block* block return Status::OK(); } PartitionedBlock partitioned_block; - MutableBlock mutable_block; - - auto get_data = [&]() -> Status { + if (_dequeue_data(source_info.local_state, partitioned_block, eos, block, + source_info.channel_id)) { + SCOPED_TIMER(profile.copy_data_timer); + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( + block, partitioned_block.first->_data_block); + auto& mutable_block = scoped_mutable_block.mutable_block(); do { if (partitioned_block.second.row_idxs == nullptr) { // The passthrough path which means the block is not partitioned, we can directly move the block without copying. @@ -552,6 +550,7 @@ Status AdaptivePassthroughExchanger::get_block(RuntimeState* state, Block* block _tmp_eos[source_info.channel_id] = *eos; *eos = false; } else { + scoped_mutable_block.restore(); *block = std::move(partitioned_block.first->_data_block); } break; @@ -564,15 +563,6 @@ Status AdaptivePassthroughExchanger::get_block(RuntimeState* state, Block* block } while (mutable_block.rows() < state->batch_size() && !*eos && _dequeue_data(source_info.local_state, partitioned_block, eos, block, source_info.channel_id)); - return Status::OK(); - }; - - if (_dequeue_data(source_info.local_state, partitioned_block, eos, block, - source_info.channel_id)) { - SCOPED_TIMER(profile.copy_data_timer); - mutable_block = VectorizedUtils::build_mutable_mem_reuse_block( - block, partitioned_block.first->_data_block); - RETURN_IF_ERROR(get_data()); } return Status::OK(); } diff --git a/be/src/exec/exchange/vdata_stream_sender.cpp b/be/src/exec/exchange/vdata_stream_sender.cpp index 04e68aeb136d13..72767deac23456 100644 --- a/be/src/exec/exchange/vdata_stream_sender.cpp +++ b/be/src/exec/exchange/vdata_stream_sender.cpp @@ -329,7 +329,7 @@ Status BlockSerializer::_serialize_block(PBlock* dest, size_t num_receivers) { reset_block(); } else { block.clear_column_data(); - _mutable_block->set_mutable_columns(block.mutate_columns()); + _mutable_block->set_mutable_columns(std::move(block).mutate_columns()); } } diff --git a/be/src/exec/operator/aggregation_sink_operator.cpp b/be/src/exec/operator/aggregation_sink_operator.cpp index f6a9c2cdc4211d..0808361ad74f86 100644 --- a/be/src/exec/operator/aggregation_sink_operator.cpp +++ b/be/src/exec/operator/aggregation_sink_operator.cpp @@ -299,16 +299,20 @@ Status AggSinkLocalState::_merge_with_serialized_key_helper(Block* block) { for (int i = 0; i < key_size; ++i) { if constexpr (for_spill) { - key_columns[i] = block->get_by_position(i).column.get(); key_locs[i] = i; } else { int& result_column_id = key_locs[i]; RETURN_IF_ERROR( Base::_shared_state->probe_expr_ctxs[i]->execute(block, &result_column_id)); block->replace_by_position_if_const(result_column_id); - key_columns[i] = block->get_by_position(result_column_id).column.get(); } - key_columns[i]->assume_mutable()->replace_float_special_values(); + { + auto mutable_col = + IColumn::mutate(std::move(block->get_by_position(key_locs[i]).column)); + mutable_col->replace_float_special_values(); + block->get_by_position(key_locs[i]).column = std::move(mutable_col); + key_columns[i] = block->get_by_position(key_locs[i]).column.get(); + } } size_t rows = block->rows(); @@ -491,8 +495,13 @@ Status AggSinkLocalState::_execute_with_serialized_key_helper(Block* block) { block->get_by_position(result_column_id).column = block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); + { + auto mutable_col = + IColumn::mutate(std::move(block->get_by_position(result_column_id).column)); + mutable_col->replace_float_special_values(); + block->get_by_position(result_column_id).column = std::move(mutable_col); + } key_columns[i] = block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); } } diff --git a/be/src/exec/operator/aggregation_source_operator.cpp b/be/src/exec/operator/aggregation_source_operator.cpp index d5385efdd06fe0..05e6a30c612249 100644 --- a/be/src/exec/operator/aggregation_source_operator.cpp +++ b/be/src/exec/operator/aggregation_source_operator.cpp @@ -113,7 +113,7 @@ Status AggLocalState::_get_results_with_serialized_key(RuntimeState* state, Bloc MutableColumns key_columns; for (int i = 0; i < key_size; ++i) { if (mem_reuse) { - key_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate()); + key_columns.emplace_back(IColumn::mutate(std::move(block->get_by_position(i).column))); } else { key_columns.emplace_back( shared_state.probe_expr_ctxs[i]->root()->data_type()->create_column()); @@ -121,149 +121,156 @@ Status AggLocalState::_get_results_with_serialized_key(RuntimeState* state, Bloc } std::visit( - Overload { - [&](std::monostate& arg) -> void { - throw doris::Exception(ErrorCode::INTERNAL_ERROR, "uninited hash table"); - }, - [&](auto& agg_method) -> void { - agg_method.init_iterator(); - auto& data = *agg_method.hash_table; - const auto size = std::min(data.size(), size_t(state->batch_size())); - using KeyType = std::decay_t::Key; - std::vector keys(size); - - if (shared_state.use_simple_count) { - DCHECK_EQ(shared_state.aggregate_evaluators.size(), 1); - - value_data_types[0] = shared_state.aggregate_evaluators[0] - ->function() - ->get_serialized_type(); - if (mem_reuse) { - value_columns[0] = - std::move(*block->get_by_position(key_size).column) - .mutate(); - } else { - value_columns[0] = shared_state.aggregate_evaluators[0] - ->function() - ->create_serialize_column(); - } - - auto& count_col = - assert_cast(*value_columns[0]); - uint32_t num_rows = 0; - { - SCOPED_TIMER(_hash_table_iterate_timer); - auto& it = agg_method.begin; - while (it != agg_method.end && num_rows < state->batch_size()) { - keys[num_rows] = it.get_first(); - auto inline_count = - reinterpret_cast(it.get_second()); - count_col.insert_data( - reinterpret_cast(&inline_count), - sizeof(UInt64)); - ++it; - ++num_rows; - } - } - - { - SCOPED_TIMER(_insert_keys_to_column_timer); - agg_method.insert_keys_into_columns(keys, key_columns, num_rows); - } - - // Handle null key if present - if (agg_method.begin == agg_method.end) { - if (agg_method.hash_table->has_null_key_data()) { - DCHECK(key_columns.size() == 1); - DCHECK(key_columns[0]->is_nullable()); - if (num_rows < state->batch_size()) { - key_columns[0]->insert_data(nullptr, 0); - auto mapped = - agg_method.hash_table->template get_null_key_data< - AggregateDataPtr>(); - count_col.resize(num_rows + 1); - *reinterpret_cast(count_col.get_data().data() + - num_rows * sizeof(UInt64)) = - std::bit_cast(mapped); - *eos = true; - } - } else { - *eos = true; - } - } - return; - } - - if (shared_state.values.size() < size + 1) { - shared_state.values.resize(size + 1); - } - - uint32_t num_rows = 0; - shared_state.aggregate_data_container->init_once(); - auto& iter = shared_state.aggregate_data_container->iterator; - - { - SCOPED_TIMER(_hash_table_iterate_timer); - while (iter != shared_state.aggregate_data_container->end() && - num_rows < state->batch_size()) { - keys[num_rows] = iter.template get_key(); - shared_state.values[num_rows] = iter.get_aggregate_data(); - ++iter; - ++num_rows; - } - } + Overload {[&](std::monostate& arg) -> void { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, "uninited hash table"); + }, + [&](auto& agg_method) -> void { + agg_method.init_iterator(); + auto& data = *agg_method.hash_table; + const auto size = std::min(data.size(), size_t(state->batch_size())); + using KeyType = std::decay_t::Key; + std::vector keys(size); + + if (shared_state.use_simple_count) { + DCHECK_EQ(shared_state.aggregate_evaluators.size(), 1); + + value_data_types[0] = shared_state.aggregate_evaluators[0] + ->function() + ->get_serialized_type(); + if (mem_reuse) { + value_columns[0] = IColumn::mutate( + std::move(block->get_by_position(key_size).column)); + } else { + value_columns[0] = shared_state.aggregate_evaluators[0] + ->function() + ->create_serialize_column(); + } - { - SCOPED_TIMER(_insert_keys_to_column_timer); - agg_method.insert_keys_into_columns(keys, key_columns, num_rows); - } + auto& count_col = + assert_cast(*value_columns[0]); + uint32_t num_rows = 0; + { + SCOPED_TIMER(_hash_table_iterate_timer); + auto& it = agg_method.begin; + while (it != agg_method.end && num_rows < state->batch_size()) { + keys[num_rows] = it.get_first(); + auto inline_count = + reinterpret_cast(it.get_second()); + count_col.insert_data( + reinterpret_cast(&inline_count), + sizeof(UInt64)); + ++it; + ++num_rows; + } + } - if (iter == shared_state.aggregate_data_container->end()) { - if (agg_method.hash_table->has_null_key_data()) { - // only one key of group by support wrap null key - // here need additional processing logic on the null key / value - DCHECK(key_columns.size() == 1); - DCHECK(key_columns[0]->is_nullable()); - if (agg_method.hash_table->has_null_key_data()) { - key_columns[0]->insert_data(nullptr, 0); - shared_state.values[num_rows] = - agg_method.hash_table->template get_null_key_data< - AggregateDataPtr>(); - ++num_rows; - *eos = true; - } - } else { - *eos = true; - } - } + { + SCOPED_TIMER(_insert_keys_to_column_timer); + agg_method.insert_keys_into_columns(keys, key_columns, num_rows); + } - { - SCOPED_TIMER(_insert_values_to_column_timer); - for (size_t i = 0; i < shared_state.aggregate_evaluators.size(); ++i) { - value_data_types[i] = shared_state.aggregate_evaluators[i] - ->function() - ->get_serialized_type(); - if (mem_reuse) { - value_columns[i] = - std::move(*block->get_by_position(i + key_size).column) - .mutate(); - } else { - value_columns[i] = shared_state.aggregate_evaluators[i] - ->function() - ->create_serialize_column(); - } - shared_state.aggregate_evaluators[i] - ->function() - ->serialize_to_column( - shared_state.values, - shared_state.offsets_of_aggregate_states[i], - value_columns[i], num_rows); - } - } - }}, + // Handle null key if present + if (agg_method.begin == agg_method.end) { + if (agg_method.hash_table->has_null_key_data()) { + DCHECK(key_columns.size() == 1); + DCHECK(key_columns[0]->is_nullable()); + if (num_rows < state->batch_size()) { + key_columns[0]->insert_data(nullptr, 0); + auto mapped = + agg_method.hash_table->template get_null_key_data< + AggregateDataPtr>(); + count_col.resize(num_rows + 1); + *reinterpret_cast(count_col.get_data().data() + + num_rows * sizeof(UInt64)) = + std::bit_cast(mapped); + *eos = true; + } + } else { + *eos = true; + } + } + return; + } + + if (shared_state.values.size() < size + 1) { + shared_state.values.resize(size + 1); + } + + uint32_t num_rows = 0; + shared_state.aggregate_data_container->init_once(); + auto& iter = shared_state.aggregate_data_container->iterator; + + { + SCOPED_TIMER(_hash_table_iterate_timer); + while (iter != shared_state.aggregate_data_container->end() && + num_rows < state->batch_size()) { + keys[num_rows] = iter.template get_key(); + shared_state.values[num_rows] = iter.get_aggregate_data(); + ++iter; + ++num_rows; + } + } + + { + SCOPED_TIMER(_insert_keys_to_column_timer); + agg_method.insert_keys_into_columns(keys, key_columns, num_rows); + } + + if (iter == shared_state.aggregate_data_container->end()) { + if (agg_method.hash_table->has_null_key_data()) { + // only one key of group by support wrap null key + // here need additional processing logic on the null key / value + DCHECK(key_columns.size() == 1); + DCHECK(key_columns[0]->is_nullable()); + if (agg_method.hash_table->has_null_key_data()) { + key_columns[0]->insert_data(nullptr, 0); + shared_state.values[num_rows] = + agg_method.hash_table->template get_null_key_data< + AggregateDataPtr>(); + ++num_rows; + *eos = true; + } + } else { + *eos = true; + } + } + + { + SCOPED_TIMER(_insert_values_to_column_timer); + for (size_t i = 0; i < shared_state.aggregate_evaluators.size(); + ++i) { + value_data_types[i] = shared_state.aggregate_evaluators[i] + ->function() + ->get_serialized_type(); + if (mem_reuse) { + value_columns[i] = IColumn::mutate(std::move( + block->get_by_position(i + key_size).column)); + } else { + value_columns[i] = shared_state.aggregate_evaluators[i] + ->function() + ->create_serialize_column(); + } + shared_state.aggregate_evaluators[i] + ->function() + ->serialize_to_column( + shared_state.values, + shared_state.offsets_of_aggregate_states[i], + value_columns[i], num_rows); + } + } + }}, shared_state.agg_data->method_variant); - if (!mem_reuse) { + if (mem_reuse) { + MutableColumns columns(block->columns()); + for (int i = 0; i < key_size; ++i) { + columns[i] = std::move(key_columns[i]); + } + for (int i = 0; i < agg_size; ++i) { + columns[key_size + i] = std::move(value_columns[i]); + } + block->set_columns(std::move(columns)); + } else { ColumnsWithTypeAndName columns_with_schema; for (int i = 0; i < key_size; ++i) { columns_with_schema.emplace_back(std::move(key_columns[i]), @@ -294,7 +301,7 @@ Status AggLocalState::_get_with_serialized_key_result(RuntimeState* state, Block if (!mem_reuse) { key_columns.emplace_back(columns_with_schema[i].type->create_column()); } else { - key_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate()); + key_columns.emplace_back(IColumn::mutate(std::move(block->get_by_position(i).column))); } } MutableColumns value_columns; @@ -302,7 +309,8 @@ Status AggLocalState::_get_with_serialized_key_result(RuntimeState* state, Block if (!mem_reuse) { value_columns.emplace_back(columns_with_schema[i].type->create_column()); } else { - value_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate()); + value_columns.emplace_back( + IColumn::mutate(std::move(block->get_by_position(i).column))); } } @@ -420,7 +428,17 @@ Status AggLocalState::_get_with_serialized_key_result(RuntimeState* state, Block }}, shared_state.agg_data->method_variant); - if (!mem_reuse) { + if (mem_reuse) { + MutableColumns columns(block->columns()); + for (int i = 0; i < block->columns(); ++i) { + if (i < key_size) { + columns[i] = std::move(key_columns[i]); + } else { + columns[i] = std::move(value_columns[i - key_size]); + } + } + block->set_columns(std::move(columns)); + } else { *block = columns_with_schema; MutableColumns columns(block->columns()); for (int i = 0; i < block->columns(); ++i) { diff --git a/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp b/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp index 58f47001185983..8cb58b2d532b95 100644 --- a/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp +++ b/be/src/exec/operator/bucketed_aggregation_sink_operator.cpp @@ -175,8 +175,11 @@ Status BucketedAggSinkLocalState::_execute_with_serialized_key(Block* block) { block->get_by_position(result_column_id).column = block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); + auto mutable_column = + IColumn::mutate(std::move(block->get_by_position(result_column_id).column)); + mutable_column->replace_float_special_values(); + block->get_by_position(result_column_id).column = std::move(mutable_column); key_columns[i] = block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); } } diff --git a/be/src/exec/operator/bucketed_aggregation_source_operator.cpp b/be/src/exec/operator/bucketed_aggregation_source_operator.cpp index 966b0acbc90a08..e1bd71089557fb 100644 --- a/be/src/exec/operator/bucketed_aggregation_source_operator.cpp +++ b/be/src/exec/operator/bucketed_aggregation_source_operator.cpp @@ -328,7 +328,8 @@ void BucketedAggLocalState::_build_output_block(Block* block, MutableColumns& ke MutableColumns value_columns; for (size_t i = key_size; i < columns_with_schema.size(); ++i) { if (mem_reuse) { - value_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate()); + value_columns.emplace_back( + IColumn::mutate(std::move(block->get_by_position(i).column))); } else { value_columns.emplace_back(columns_with_schema[i].type->create_column()); } @@ -362,6 +363,15 @@ void BucketedAggLocalState::_build_output_block(Block* block, MutableColumns& ke columns_with_schema[key_size + i].type, ""); } *block = Block(result_columns); + } else { + MutableColumns columns(block->columns()); + for (size_t i = 0; i < key_size; ++i) { + columns[i] = std::move(key_columns[i]); + } + for (size_t i = 0; i < agg_size; ++i) { + columns[key_size + i] = std::move(value_columns[i]); + } + block->set_columns(std::move(columns)); } } else { // Serialize path. simple_count should always finalize. @@ -373,7 +383,8 @@ void BucketedAggLocalState::_build_output_block(Block* block, MutableColumns& ke value_data_types[i] = shared_state.aggregate_evaluators[i]->function()->get_serialized_type(); if (mem_reuse) { - value_columns[i] = std::move(*block->get_by_position(key_size + i).column).mutate(); + value_columns[i] = + IColumn::mutate(std::move(block->get_by_position(key_size + i).column)); } else { value_columns[i] = shared_state.aggregate_evaluators[i]->function()->create_serialize_column(); @@ -394,6 +405,15 @@ void BucketedAggLocalState::_build_output_block(Block* block, MutableColumns& ke result_columns.emplace_back(std::move(value_columns[i]), value_data_types[i], ""); } *block = Block(result_columns); + } else { + MutableColumns columns(block->columns()); + for (size_t i = 0; i < key_size; ++i) { + columns[i] = std::move(key_columns[i]); + } + for (size_t i = 0; i < agg_size; ++i) { + columns[key_size + i] = std::move(value_columns[i]); + } + block->set_columns(std::move(columns)); } } } @@ -452,8 +472,8 @@ Status BucketedAggLocalState::_output_bucket(RuntimeState* state, Block* block, MutableColumns key_columns; for (size_t i = 0; i < key_size; ++i) { if (mem_reuse) { - key_columns.emplace_back( - std::move(*block->get_by_position(i).column).mutate()); + key_columns.emplace_back(IColumn::mutate( + std::move(block->get_by_position(i).column))); } else { key_columns.emplace_back(shared_state.probe_expr_ctxs[i] ->root() @@ -535,8 +555,8 @@ Status BucketedAggLocalState::_merge_and_output_null_keys(RuntimeState* state, B MutableColumns key_columns; for (size_t i = 0; i < key_size; ++i) { if (mem_reuse) { - key_columns.emplace_back( - std::move(*block->get_by_position(i).column).mutate()); + key_columns.emplace_back(IColumn::mutate( + std::move(block->get_by_position(i).column))); } else { key_columns.emplace_back(shared_state.probe_expr_ctxs[i] ->root() diff --git a/be/src/exec/operator/cache_source_operator.cpp b/be/src/exec/operator/cache_source_operator.cpp index aec8206f54b682..6f2dc9e084e6c9 100644 --- a/be/src/exec/operator/cache_source_operator.cpp +++ b/be/src/exec/operator/cache_source_operator.cpp @@ -156,7 +156,10 @@ Status CacheSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* if (need_clone_empty) { *block = output_block->clone_empty(); } - RETURN_IF_ERROR(MutableBlock::build_mutable_block(block).merge(*output_block)); + ScopedMutableBlock scoped_mutable_block(block); + auto& mutable_block = scoped_mutable_block.mutable_block(); + RETURN_IF_ERROR(mutable_block.merge(*output_block)); + scoped_mutable_block.restore(); local_state._current_query_cache_rows += output_block->rows(); auto mem_consume = output_block->allocated_bytes(); local_state._current_query_cache_bytes += mem_consume; @@ -179,7 +182,10 @@ Status CacheSourceOperatorX::get_block(RuntimeState* state, Block* block, bool* if (need_clone_empty) { *block = hit_cache_block->clone_empty(); } - RETURN_IF_ERROR(MutableBlock::build_mutable_block(block).merge(*hit_cache_block)); + ScopedMutableBlock scoped_mutable_block(block); + auto& mutable_block = scoped_mutable_block.mutable_block(); + RETURN_IF_ERROR(mutable_block.merge(*hit_cache_block)); + scoped_mutable_block.restore(); if (!local_state._hit_cache_column_orders.empty()) { auto datas = block->get_columns_with_type_and_name(); block->clear(); diff --git a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp index 298896401d6f3e..92c11cf2896154 100644 --- a/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp +++ b/be/src/exec/operator/distinct_streaming_aggregation_operator.cpp @@ -162,7 +162,13 @@ Status DistinctStreamingAggLocalState::_distinct_pre_agg_with_serialized_key( in_block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); key_columns[i] = in_block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); + { + auto mutable_col = IColumn::mutate( + std::move(in_block->get_by_position(result_column_id).column)); + mutable_col->replace_float_special_values(); + in_block->get_by_position(result_column_id).column = std::move(mutable_col); + key_columns[i] = in_block->get_by_position(result_column_id).column.get(); + } result_idxs[i] = result_column_id; } } @@ -210,18 +216,22 @@ Status DistinctStreamingAggLocalState::_distinct_pre_agg_with_serialized_key( if (out_block->rows() + _distinct_row.size() > batch_size) { size_t split_size = batch_size - out_block->rows(); for (int i = 0; i < key_size; ++i) { - auto output_dst = out_block->get_by_position(i).column->assume_mutable(); + auto output_dst = + IColumn::mutate(std::move(out_block->get_by_position(i).column)); key_columns[i]->append_data_by_selector(output_dst, _distinct_row, 0, split_size); - auto cache_dst = _cache_block.get_by_position(i).column->assume_mutable(); + out_block->get_by_position(i).column = std::move(output_dst); + auto cache_dst = + IColumn::mutate(std::move(_cache_block.get_by_position(i).column)); key_columns[i]->append_data_by_selector(cache_dst, _distinct_row, split_size, _distinct_row.size()); + _cache_block.get_by_position(i).column = std::move(cache_dst); } } else { for (int i = 0; i < key_size; ++i) { - auto output_column = out_block->get_by_position(i).column; - auto dst = output_column->assume_mutable(); + auto dst = IColumn::mutate(std::move(out_block->get_by_position(i).column)); key_columns[i]->append_data_by_selector(dst, _distinct_row); + out_block->get_by_position(i).column = std::move(dst); } } } diff --git a/be/src/exec/operator/exchange_sink_operator.cpp b/be/src/exec/operator/exchange_sink_operator.cpp index 35698f5217d709..e65dd979ad21ea 100644 --- a/be/src/exec/operator/exchange_sink_operator.cpp +++ b/be/src/exec/operator/exchange_sink_operator.cpp @@ -509,7 +509,7 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, Block* block, bool eos) } else { cur_block.clear_column_data(); local_state._serializer.get_block()->set_mutable_columns( - cur_block.mutate_columns()); + std::move(cur_block).mutate_columns()); } } } diff --git a/be/src/exec/operator/group_commit_block_sink_operator.cpp b/be/src/exec/operator/group_commit_block_sink_operator.cpp index f29029ead7cc26..a72755720d5b77 100644 --- a/be/src/exec/operator/group_commit_block_sink_operator.cpp +++ b/be/src/exec/operator/group_commit_block_sink_operator.cpp @@ -372,7 +372,7 @@ Status GroupCommitBlockSinkOperatorX::sink(RuntimeState* state, Block* input_blo if (local_state._block_convertor->num_filtered_rows() > 0 || local_state._has_filtered_rows) { auto cloneBlock = block->clone_without_columns(); - auto res_block = MutableBlock::build_mutable_block(&cloneBlock); + auto res_block = MutableBlock::build_mutable_block(std::move(cloneBlock)); for (int i = 0; i < rows; ++i) { if (local_state._block_convertor->filter_map()[i]) { continue; diff --git a/be/src/exec/operator/hashjoin_build_sink.cpp b/be/src/exec/operator/hashjoin_build_sink.cpp index 4c5815c71ab691..3071e5e53225e5 100644 --- a/be/src/exec/operator/hashjoin_build_sink.cpp +++ b/be/src/exec/operator/hashjoin_build_sink.cpp @@ -576,7 +576,9 @@ Status HashJoinBuildSinkLocalState::process_build_block(RuntimeState* state, Blo for (auto& data : block) { data.column = std::move(*data.column).mutate()->convert_column_if_overflow(); if (p._need_finalize_variant_column) { - std::move(*data.column).mutate()->finalize(); + auto mutable_column = IColumn::mutate(std::move(data.column)); + mutable_column->finalize(); + data.column = std::move(mutable_column); } } @@ -830,7 +832,7 @@ Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, Block* in_block, bo *local_state._build_expr_call_timer, local_state._build_col_ids)); local_state._build_side_mutable_block = - MutableBlock::build_mutable_block(&tmp_build_block); + MutableBlock::build_mutable_block(std::move(tmp_build_block)); } if (!in_block->empty()) { diff --git a/be/src/exec/operator/hashjoin_build_sink.h b/be/src/exec/operator/hashjoin_build_sink.h index be77ef6cc690bc..3c3faabcdb534e 100644 --- a/be/src/exec/operator/hashjoin_build_sink.h +++ b/be/src/exec/operator/hashjoin_build_sink.h @@ -231,7 +231,7 @@ struct ProcessHashTableBuild { // In order to make the null keys equal when using single null eq, all null keys need to be set to default value. if (_build_raw_ptrs.size() == 1 && null_map && *has_null_key) { - _build_raw_ptrs[0]->assume_mutable()->replace_column_null_data(null_map->data()); + const_cast(_build_raw_ptrs[0])->replace_column_null_data(null_map->data()); } hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows, diff --git a/be/src/exec/operator/hashjoin_probe_operator.cpp b/be/src/exec/operator/hashjoin_probe_operator.cpp index 9b913cc9b23451..ea4b812323a9e6 100644 --- a/be/src/exec/operator/hashjoin_probe_operator.cpp +++ b/be/src/exec/operator/hashjoin_probe_operator.cpp @@ -232,7 +232,8 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, Block* output_bl local_state._join_block.clear_column_data(); - MutableBlock mutable_join_block(&local_state._join_block); + ScopedMutableBlock scoped_mutable_join_block(&local_state._join_block); + auto& mutable_join_block = scoped_mutable_join_block.mutable_block(); Block temp_block; Status st; @@ -313,8 +314,8 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, Block* output_bl state, output_block, eos, &temp_block, !local_state._shared_state->left_semi_direct_return)); // Here make _join_block release the columns' ptr + scoped_mutable_join_block.restore(); local_state._join_block.set_columns(local_state._join_block.clone_empty_columns()); - mutable_join_block.clear(); return Status::OK(); } diff --git a/be/src/exec/operator/join/process_hash_table_probe_impl.h b/be/src/exec/operator/join/process_hash_table_probe_impl.h index 5bfd2ff4e0cbfc..bcc4408906bf54 100644 --- a/be/src/exec/operator/join/process_hash_table_probe_impl.h +++ b/be/src/exec/operator/join/process_hash_table_probe_impl.h @@ -164,7 +164,10 @@ void ProcessHashTableProbe::probe_side_output_column(MutableColumns& for (int i = 0; i < _left_output_slot_flags.size(); ++i) { if (_left_output_slot_flags[i]) { if (_parent_operator->need_finalize_variant_column()) { - std::move(*probe_block.get_by_position(i).column).mutate()->finalize(); + auto mutable_column = + IColumn::mutate(std::move(probe_block.get_by_position(i).column)); + mutable_column->finalize(); + probe_block.get_by_position(i).column = std::move(mutable_column); } } @@ -200,7 +203,8 @@ typename HashTableType::State ProcessHashTableProbe::_init_probe_sid // In order to make the null keys equal when using single null eq, all null keys need to be set to default value. if (_parent->_probe_columns.size() == 1 && null_map) { if (simd::contain_one(null_map, probe_rows)) { - _parent->_probe_columns[0]->assume_mutable()->replace_column_null_data(null_map); + const_cast(_parent->_probe_columns[0]) + ->replace_column_null_data(null_map); } } @@ -650,9 +654,11 @@ Status ProcessHashTableProbe::finalize_block_with_filter(Block* outp ->get_data_column_ptr(); auto& src = source_block->get_by_position(column_id).column; - auto dst = output_block->get_by_position(output_column_id).column->assume_mutable(); + auto dst = IColumn::mutate( + std::move(output_block->get_by_position(output_column_id).column)); dst->clear(); insert_with_indexs(dst, src, container, all_match_one); + output_block->get_by_position(output_column_id).column = std::move(dst); } }; do_lazy_materialize(_right_output_slot_flags, _build_indexs, (int)_right_col_idx, @@ -717,14 +723,17 @@ Status ProcessHashTableProbe::do_mark_join_conjuncts(Block* output_b return Status::OK(); } - auto mark_column_mutable = - output_block->get_by_position(_parent->_mark_column_id).column->assume_mutable(); - auto& mark_column = assert_cast(*mark_column_mutable); - IColumn::Filter& filter = assert_cast(mark_column.get_nested_column()).get_data(); + auto mark_column_mutable = IColumn::mutate( + std::move(output_block->get_by_position(_parent->_mark_column_id).column)); + auto* mark_column = assert_cast(mark_column_mutable.get()); + IColumn::Filter& filter = + assert_cast(mark_column->get_nested_column()).get_data(); + auto& null_map_column = mark_column->get_null_map_column(); + output_block->replace_by_position(_parent->_mark_column_id, std::move(mark_column_mutable)); RETURN_IF_ERROR(VExprContext::execute_conjuncts(_parent->_mark_join_conjuncts, output_block, - mark_column.get_null_map_column(), filter)); + null_map_column, filter)); uint8_t* mark_filter_data = filter.data(); - uint8_t* mark_null_map = mark_column.get_null_map_data().data(); + uint8_t* mark_null_map = mark_column->get_null_map_data().data(); if (is_null_aware_join) { // For null aware anti/semi join, if the equal conjuncts was not matched and the build side has null value, diff --git a/be/src/exec/operator/nested_loop_join_probe_operator.cpp b/be/src/exec/operator/nested_loop_join_probe_operator.cpp index ccc7140c726d07..c0203a74f6f186 100644 --- a/be/src/exec/operator/nested_loop_join_probe_operator.cpp +++ b/be/src/exec/operator/nested_loop_join_probe_operator.cpp @@ -139,10 +139,12 @@ Status NestedLoopJoinProbeLocalState::close(RuntimeState* state) { void NestedLoopJoinProbeLocalState::_update_additional_flags(Block* block) { auto& p = _parent->cast(); if (p._is_mark_join) { - auto mark_column = block->get_by_position(block->columns() - 1).column->assume_mutable(); + auto mark_column = + IColumn::mutate(std::move(block->get_by_position(block->columns() - 1).column)); if (mark_column->size() < block->rows()) { ColumnFilterHelper(*mark_column).resize_fill(block->rows(), 1); } + block->replace_by_position(block->columns() - 1, std::move(mark_column)); } } @@ -160,7 +162,8 @@ void NestedLoopJoinProbeLocalState::_reset_with_next_probe_row() { void process_probe_block(int64_t probe_block_pos, Block& block, const Block& probe_block, size_t probe_side_columns, const Block& build_block, size_t build_side_columns) { - auto dst_columns = block.mutate_columns(); + auto dst_columns_guard = block.mutate_columns_scoped(); + auto& dst_columns = dst_columns_guard.mutable_columns(); const size_t max_added_rows = build_block.rows(); for (size_t i = 0; i < probe_side_columns; ++i) { const ColumnWithTypeAndName& src_column = probe_block.get_by_position(i); @@ -195,13 +198,13 @@ void process_probe_block(int64_t probe_block_pos, Block& block, const Block& pro max_added_rows); } } - block.set_columns(std::move(dst_columns)); } void process_build_block(int64_t build_block_pos, Block& block, const Block& build_block, size_t build_side_columns, const Block& probe_block, size_t probe_side_columns) { - auto dst_columns = block.mutate_columns(); + auto dst_columns_guard = block.mutate_columns_scoped(); + auto& dst_columns = dst_columns_guard.mutable_columns(); const size_t max_added_rows = probe_block.rows(); for (size_t i = 0; i < probe_side_columns; ++i) { const ColumnWithTypeAndName& src_column = probe_block.get_by_position(i); @@ -235,7 +238,6 @@ void process_build_block(int64_t build_block_pos, Block& block, const Block& bui build_block_pos, max_added_rows); } } - block.set_columns(std::move(dst_columns)); } void NestedLoopJoinProbeLocalState::_replace_lazy_placeholder_columns(size_t rows) { @@ -259,31 +261,33 @@ Status NestedLoopJoinProbeLocalState::_append_lazy_rows(const IColumn::Filter& f const size_t old_rows = _join_block.rows(); const size_t new_rows = old_rows + selected_rows; - auto dst_columns = _join_block.mutate_columns(); - for (int column_id : p._materialize_column_ids) { - const auto column_idx = cast_set(column_id); - if (column_idx < p._num_probe_side_columns) { - const auto& src_column = probe_block.get_by_position(column_idx); - if (fixed_side_probe) { - append_many_from_source(dst_columns[column_idx], src_column, fixed_side_pos, - selected_rows); - } else { - append_filtered_from_source(dst_columns[column_idx], src_column, filter, - selected_rows); - } - } else { - const auto build_column_idx = column_idx - p._num_probe_side_columns; - const auto& src_column = build_block.get_by_position(build_column_idx); - if (fixed_side_probe) { - append_filtered_from_source(dst_columns[column_idx], src_column, filter, + { + auto dst_columns_guard = _join_block.mutate_columns_scoped(); + auto& dst_columns = dst_columns_guard.mutable_columns(); + for (int column_id : p._materialize_column_ids) { + const auto column_idx = cast_set(column_id); + if (column_idx < p._num_probe_side_columns) { + const auto& src_column = probe_block.get_by_position(column_idx); + if (fixed_side_probe) { + append_many_from_source(dst_columns[column_idx], src_column, fixed_side_pos, selected_rows); + } else { + append_filtered_from_source(dst_columns[column_idx], src_column, filter, + selected_rows); + } } else { - append_many_from_source(dst_columns[column_idx], src_column, fixed_side_pos, - selected_rows); + const auto build_column_idx = column_idx - p._num_probe_side_columns; + const auto& src_column = build_block.get_by_position(build_column_idx); + if (fixed_side_probe) { + append_filtered_from_source(dst_columns[column_idx], src_column, filter, + selected_rows); + } else { + append_many_from_source(dst_columns[column_idx], src_column, fixed_side_pos, + selected_rows); + } } } } - _join_block.set_columns(std::move(dst_columns)); _replace_lazy_placeholder_columns(new_rows); DCHECK_EQ(_join_block.rows(), new_rows); return Status::OK(); @@ -294,17 +298,19 @@ Status NestedLoopJoinProbeLocalState::_append_lazy_probe_row_with_build_defaults auto& p = _parent->cast(); const size_t new_rows = _join_block.rows() + 1; - auto dst_columns = _join_block.mutate_columns(); - for (int column_id : p._materialize_column_ids) { - const auto column_idx = cast_set(column_id); - if (column_idx < p._num_probe_side_columns) { - const auto& src_column = probe_block.get_by_position(column_idx); - append_many_from_source(dst_columns[column_idx], src_column, probe_row_pos, 1); - } else { - dst_columns[column_idx]->insert_many_defaults(1); + { + auto dst_columns_guard = _join_block.mutate_columns_scoped(); + auto& dst_columns = dst_columns_guard.mutable_columns(); + for (int column_id : p._materialize_column_ids) { + const auto column_idx = cast_set(column_id); + if (column_idx < p._num_probe_side_columns) { + const auto& src_column = probe_block.get_by_position(column_idx); + append_many_from_source(dst_columns[column_idx], src_column, probe_row_pos, 1); + } else { + dst_columns[column_idx]->insert_many_defaults(1); + } } } - _join_block.set_columns(std::move(dst_columns)); _replace_lazy_placeholder_columns(new_rows); DCHECK_EQ(_join_block.rows(), new_rows); return Status::OK(); @@ -316,19 +322,21 @@ Status NestedLoopJoinProbeLocalState::_append_lazy_mark_probe_row_with_build_def const size_t mark_column_id = p._num_probe_side_columns + p._num_build_side_columns; const size_t new_rows = _join_block.rows() + 1; - auto dst_columns = _join_block.mutate_columns(); - for (int column_id : p._materialize_column_ids) { - const auto column_idx = cast_set(column_id); - if (column_idx < p._num_probe_side_columns) { - const auto& src_column = probe_block.get_by_position(column_idx); - append_many_from_source(dst_columns[column_idx], src_column, probe_row_pos, 1); - } else if (column_idx == mark_column_id) { - append_mark_value(dst_columns[column_idx], mark_value); - } else { - dst_columns[column_idx]->insert_many_defaults(1); + { + auto dst_columns_guard = _join_block.mutate_columns_scoped(); + auto& dst_columns = dst_columns_guard.mutable_columns(); + for (int column_id : p._materialize_column_ids) { + const auto column_idx = cast_set(column_id); + if (column_idx < p._num_probe_side_columns) { + const auto& src_column = probe_block.get_by_position(column_idx); + append_many_from_source(dst_columns[column_idx], src_column, probe_row_pos, 1); + } else if (column_idx == mark_column_id) { + append_mark_value(dst_columns[column_idx], mark_value); + } else { + dst_columns[column_idx]->insert_many_defaults(1); + } } } - _join_block.set_columns(std::move(dst_columns)); _replace_lazy_placeholder_columns(new_rows); DCHECK_EQ(_join_block.rows(), new_rows); return Status::OK(); @@ -339,18 +347,21 @@ Status NestedLoopJoinProbeLocalState::_append_lazy_build_rows_with_probe_default auto& p = _parent->cast(); const size_t new_rows = _join_block.rows() + selected_rows; - auto dst_columns = _join_block.mutate_columns(); - for (int column_id : p._materialize_column_ids) { - const auto column_idx = cast_set(column_id); - if (column_idx < p._num_probe_side_columns) { - dst_columns[column_idx]->insert_many_defaults(selected_rows); - } else { - const auto build_column_idx = column_idx - p._num_probe_side_columns; - const auto& src_column = build_block.get_by_position(build_column_idx); - append_filtered_from_source(dst_columns[column_idx], src_column, filter, selected_rows); + { + auto dst_columns_guard = _join_block.mutate_columns_scoped(); + auto& dst_columns = dst_columns_guard.mutable_columns(); + for (int column_id : p._materialize_column_ids) { + const auto column_idx = cast_set(column_id); + if (column_idx < p._num_probe_side_columns) { + dst_columns[column_idx]->insert_many_defaults(selected_rows); + } else { + const auto build_column_idx = column_idx - p._num_probe_side_columns; + const auto& src_column = build_block.get_by_position(build_column_idx); + append_filtered_from_source(dst_columns[column_idx], src_column, filter, + selected_rows); + } } } - _join_block.set_columns(std::move(dst_columns)); _replace_lazy_placeholder_columns(new_rows); DCHECK_EQ(_join_block.rows(), new_rows); return Status::OK(); @@ -981,7 +992,8 @@ template // NOLINTNEXTLINE(readability-function-size,readability-function-cognitive-complexity): existing finalization handles multiple join variants. void NestedLoopJoinProbeLocalState::_finalize_current_phase(Block& block, size_t batch_size) { auto& p = _parent->cast(); - auto dst_columns = block.mutate_columns(); + auto dst_columns_guard = block.mutate_columns_scoped(); + auto& dst_columns = dst_columns_guard.mutable_columns(); DCHECK_GT(dst_columns.size(), 0); auto column_size = dst_columns[0]->size(); if constexpr (BuildSide) { @@ -1090,12 +1102,12 @@ void NestedLoopJoinProbeLocalState::_finalize_current_phase(Block& block, size_t } } } - block.set_columns(std::move(dst_columns)); } void NestedLoopJoinProbeLocalState::_append_probe_data_with_null(Block& block) const { auto& p = _parent->cast(); - auto dst_columns = block.mutate_columns(); + auto dst_columns_guard = block.mutate_columns_scoped(); + auto& dst_columns = dst_columns_guard.mutable_columns(); DCHECK(p._is_mark_join); for (size_t i = 0; i < p._num_probe_side_columns; ++i) { const ColumnWithTypeAndName& src_column = _child_block->get_by_position(i); @@ -1121,7 +1133,6 @@ void NestedLoopJoinProbeLocalState::_append_probe_data_with_null(Block& block) c } auto& mark_column = *dst_columns[dst_columns.size() - 1]; ColumnFilterHelper(mark_column).resize_fill(mark_column.size() + _probe_side_process_count, 0); - block.set_columns(std::move(dst_columns)); } NestedLoopJoinProbeOperatorX::NestedLoopJoinProbeOperatorX(ObjectPool* pool, const TPlanNode& tnode, diff --git a/be/src/exec/operator/operator.cpp b/be/src/exec/operator/operator.cpp index 96f7933d5d5274..6f7bd22539aaeb 100644 --- a/be/src/exec/operator/operator.cpp +++ b/be/src/exec/operator/operator.cpp @@ -360,10 +360,11 @@ Status OperatorXBase::do_projections(RuntimeState* state, Block* origin_block, } }; - MutableBlock mutable_block = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, *_output_row_descriptor); + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( + output_block, *_output_row_descriptor); + auto& mutable_block = scoped_mutable_block.mutable_block(); + auto& mutable_columns = mutable_block.mutable_columns(); if (rows != 0) { - auto& mutable_columns = mutable_block.mutable_columns(); DCHECK_EQ(mutable_columns.size(), local_state->_projections.size()) << debug_string(); for (int i = 0; i < mutable_columns.size(); ++i) { ColumnPtr column_ptr; @@ -379,9 +380,7 @@ Status OperatorXBase::do_projections(RuntimeState* state, Block* origin_block, insert_column_datas(mutable_columns[i], column_ptr, rows); } DCHECK(mutable_block.rows() == rows); - output_block->set_columns(std::move(mutable_columns)); } - local_state->_estimate_memory_usage += bytes_usage; return Status::OK(); diff --git a/be/src/exec/operator/partitioned_aggregation_sink_operator.cpp b/be/src/exec/operator/partitioned_aggregation_sink_operator.cpp index 07c96959d98344..42739b9b2acf77 100644 --- a/be/src/exec/operator/partitioned_aggregation_sink_operator.cpp +++ b/be/src/exec/operator/partitioned_aggregation_sink_operator.cpp @@ -512,8 +512,10 @@ void PartitionedAggSinkLocalState::_reset_tmp_data() { _value_columns.clear(); _key_block.clear_column_data(); _value_block.clear_column_data(); - _key_columns = _key_block.mutate_columns(); - _value_columns = _value_block.mutate_columns(); + // _key_columns/_value_columns own the mutable storage until the next reset. The schema blocks + // are used only as empty reusable owners here, so consuming their columns is intentional. + _key_columns = std::move(_key_block).mutate_columns(); + _value_columns = std::move(_value_block).mutate_columns(); } void PartitionedAggSinkLocalState::_clear_tmp_data() { diff --git a/be/src/exec/operator/repeat_operator.cpp b/be/src/exec/operator/repeat_operator.cpp index 82ffa633056a41..b659052187b17a 100644 --- a/be/src/exec/operator/repeat_operator.cpp +++ b/be/src/exec/operator/repeat_operator.cpp @@ -112,7 +112,9 @@ Status RepeatLocalState::get_repeated_block(Block* input_block, int repeat_id_id size_t input_column_size = input_block->columns(); size_t output_column_size = p._output_slots.size(); DCHECK_LT(input_column_size, output_column_size); - auto m_block = VectorizedUtils::build_mutable_mem_reuse_block(output_block, p._output_slots); + auto scoped_mutable_block = + VectorizedUtils::build_scoped_mutable_mem_reuse_block(output_block, p._output_slots); + auto& m_block = scoped_mutable_block.mutable_block(); auto& output_columns = m_block.mutable_columns(); /* Fill all slots according to child, for example:select tc1,tc2,sum(tc3) from t1 group by grouping sets((tc1),(tc2)); * insert into t1 values(1,2,1),(1,3,1),(2,1,1),(3,1,1); @@ -154,7 +156,6 @@ Status RepeatLocalState::get_repeated_block(Block* input_block, int repeat_id_id RETURN_IF_ERROR(add_grouping_id_column(rows, cur_col, output_columns, repeat_id_idx)); DCHECK_EQ(cur_col, output_column_size); - return Status::OK(); } @@ -229,8 +230,9 @@ Status RepeatOperatorX::pull(doris::RuntimeState* state, Block* output_block, bo _repeat_id_idx = 0; } } else if (local_state._expr_ctxs.empty()) { - auto m_block = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, _output_slots); + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( + output_block, _output_slots); + auto& m_block = scoped_mutable_block.mutable_block(); auto rows = _child_block.rows(); auto& columns = m_block.mutable_columns(); diff --git a/be/src/exec/operator/schema_scan_operator.cpp b/be/src/exec/operator/schema_scan_operator.cpp index 030e49b54d48c0..3d5922573b90e4 100644 --- a/be/src/exec/operator/schema_scan_operator.cpp +++ b/be/src/exec/operator/schema_scan_operator.cpp @@ -21,6 +21,7 @@ #include +#include "core/column/column_nullable.h" #include "core/data_type/data_type_factory.hpp" #include "exec/operator/operator.h" #include "runtime/runtime_profile.h" @@ -256,10 +257,16 @@ Status SchemaScanOperatorX::get_block(RuntimeState* state, Block* block, bool* e if (src_block.rows()) { // block->check_number_of_rows(); for (int i = 0; i < _slot_num; ++i) { - MutableColumnPtr column_ptr = std::move(*block->get_by_position(i).column).mutate(); - column_ptr->insert_range_from( - *src_block.safe_get_by_position(_slot_offsets[i]).column, 0, - src_block.rows()); + MutableColumnPtr column_ptr = + IColumn::mutate(std::move(block->get_by_position(i).column)); + ColumnPtr src_column = src_block.safe_get_by_position(_slot_offsets[i]) + .column->convert_to_full_column_if_const(); + if (column_ptr->is_nullable() && !src_column->is_nullable()) { + src_column = make_nullable(src_column); + } + DORIS_CHECK(column_ptr->is_nullable() == src_column->is_nullable()); + column_ptr->insert_range_from(*src_column, 0, src_block.rows()); + block->replace_by_position(i, std::move(column_ptr)); } DCHECK_EQ(block->columns(), _dest_tuple_desc->slots().size()); RETURN_IF_ERROR(local_state.filter_block(local_state._conjuncts, block)); diff --git a/be/src/exec/operator/set_sink_operator.cpp b/be/src/exec/operator/set_sink_operator.cpp index 608bd8c1b69e58..ec2c717c2cf9f8 100644 --- a/be/src/exec/operator/set_sink_operator.cpp +++ b/be/src/exec/operator/set_sink_operator.cpp @@ -80,7 +80,8 @@ Status SetSinkOperatorX::sink(RuntimeState* state, Block* in_block if (in_block->rows() != 0) { if (local_state._mutable_block.empty()) { auto tmp_build_block = *(in_block->create_same_struct_block(0, false)); - local_state._mutable_block = MutableBlock::build_mutable_block(&tmp_build_block); + local_state._mutable_block = + MutableBlock::build_mutable_block(std::move(tmp_build_block)); } { diff --git a/be/src/exec/operator/set_source_operator.cpp b/be/src/exec/operator/set_source_operator.cpp index a314f411311069..5cc299e7dbb485 100644 --- a/be/src/exec/operator/set_source_operator.cpp +++ b/be/src/exec/operator/set_source_operator.cpp @@ -114,7 +114,7 @@ void SetSourceOperatorX::_create_mutable_cols( for (int i = 0; i < local_state._left_table_data_types.size(); ++i) { if (mem_reuse) { local_state._mutable_cols[i] = - std::move(*output_block->get_by_position(i).column).mutate(); + IColumn::mutate(std::move(output_block->get_by_position(i).column)); } else { local_state._mutable_cols[i] = (local_state._left_table_data_types[i]->create_column()); } @@ -173,6 +173,9 @@ Status SetSourceOperatorX::_get_data_in_hashtable( local_state._left_table_data_types[i], "")); } } else { + for (int i = 0; i < left_col_len; ++i) { + output_block->replace_by_position(i, std::move(local_state._mutable_cols[i])); + } local_state._mutable_cols.clear(); } diff --git a/be/src/exec/operator/streaming_aggregation_operator.cpp b/be/src/exec/operator/streaming_aggregation_operator.cpp index 5744b288a4487e..b3e789e6e0f8a2 100644 --- a/be/src/exec/operator/streaming_aggregation_operator.cpp +++ b/be/src/exec/operator/streaming_aggregation_operator.cpp @@ -330,8 +330,11 @@ Status StreamingAggLocalState::_pre_agg_with_serialized_key(doris::Block* in_blo in_block->get_by_position(result_column_id).column = in_block->get_by_position(result_column_id) .column->convert_to_full_column_if_const(); + auto mutable_column = + IColumn::mutate(std::move(in_block->get_by_position(result_column_id).column)); + mutable_column->replace_float_special_values(); + in_block->get_by_position(result_column_id).column = std::move(mutable_column); key_columns[i] = in_block->get_by_position(result_column_id).column.get(); - key_columns[i]->assume_mutable()->replace_float_special_values(); } } @@ -365,17 +368,26 @@ Status StreamingAggLocalState::_pre_agg_with_serialized_key(doris::Block* in_blo } bool mem_reuse = p._make_nullable_keys.empty() && out_block->mem_reuse(); + if (mem_reuse) { + auto columns_guard = out_block->mutate_columns_scoped(); + MutableColumns& columns = columns_guard.mutable_columns(); + for (int i = 0; i != _aggregate_evaluators.size(); ++i) { + SCOPED_TIMER(_insert_values_to_column_timer); + RETURN_IF_ERROR(_aggregate_evaluators[i]->streaming_agg_serialize_to_column( + in_block, columns[i + key_size], rows, _agg_arena_pool)); + } + for (int i = 0; i < key_size; ++i) { + columns[i]->insert_range_from(*key_columns[i], 0, rows); + } + return Status::OK(); + } + std::vector data_types; MutableColumns value_columns; for (int i = 0; i < _aggregate_evaluators.size(); ++i) { auto data_type = _aggregate_evaluators[i]->function()->get_serialized_type(); - if (mem_reuse) { - value_columns.emplace_back( - std::move(*out_block->get_by_position(i + key_size).column).mutate()); - } else { - value_columns.emplace_back( - _aggregate_evaluators[i]->function()->create_serialize_column()); - } + value_columns.emplace_back( + _aggregate_evaluators[i]->function()->create_serialize_column()); data_types.emplace_back(data_type); } @@ -385,24 +397,16 @@ Status StreamingAggLocalState::_pre_agg_with_serialized_key(doris::Block* in_blo in_block, value_columns[i], rows, _agg_arena_pool)); } - if (!mem_reuse) { - ColumnsWithTypeAndName columns_with_schema; - for (int i = 0; i < key_size; ++i) { - columns_with_schema.emplace_back(key_columns[i]->clone_resized(rows), - _probe_expr_ctxs[i]->root()->data_type(), - _probe_expr_ctxs[i]->root()->expr_name()); - } - for (int i = 0; i < value_columns.size(); ++i) { - columns_with_schema.emplace_back(std::move(value_columns[i]), data_types[i], ""); - } - out_block->swap(Block(columns_with_schema)); - } else { - for (int i = 0; i < key_size; ++i) { - std::move(*out_block->get_by_position(i).column) - .mutate() - ->insert_range_from(*key_columns[i], 0, rows); - } + ColumnsWithTypeAndName columns_with_schema; + for (int i = 0; i < key_size; ++i) { + columns_with_schema.emplace_back(key_columns[i]->clone_resized(rows), + _probe_expr_ctxs[i]->root()->data_type(), + _probe_expr_ctxs[i]->root()->expr_name()); } + for (int i = 0; i < value_columns.size(); ++i) { + columns_with_schema.emplace_back(std::move(value_columns[i]), data_types[i], ""); + } + out_block->swap(Block(columns_with_schema)); } else { bool need_agg = true; if (need_do_sort_limit != 1) { @@ -462,7 +466,7 @@ Status StreamingAggLocalState::_get_results_with_serialized_key(RuntimeState* st MutableColumns key_columns; for (int i = 0; i < key_size; ++i) { if (mem_reuse) { - key_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate()); + key_columns.emplace_back(IColumn::mutate(std::move(block->get_by_position(i).column))); } else { key_columns.emplace_back(_probe_expr_ctxs[i]->root()->data_type()->create_column()); } @@ -486,9 +490,8 @@ Status StreamingAggLocalState::_get_results_with_serialized_key(RuntimeState* st value_data_types[0] = _aggregate_evaluators[0]->function()->get_serialized_type(); if (mem_reuse) { - value_columns[0] = - std::move(*block->get_by_position(key_size).column) - .mutate(); + value_columns[0] = IColumn::mutate( + std::move(block->get_by_position(key_size).column)); } else { value_columns[0] = _aggregate_evaluators[0] ->function() @@ -590,9 +593,8 @@ Status StreamingAggLocalState::_get_results_with_serialized_key(RuntimeState* st value_data_types[i] = _aggregate_evaluators[i]->function()->get_serialized_type(); if (mem_reuse) { - value_columns[i] = - std::move(*block->get_by_position(i + key_size).column) - .mutate(); + value_columns[i] = IColumn::mutate( + std::move(block->get_by_position(i + key_size).column)); } else { value_columns[i] = _aggregate_evaluators[i] ->function() @@ -606,7 +608,16 @@ Status StreamingAggLocalState::_get_results_with_serialized_key(RuntimeState* st }}, _agg_data->method_variant); - if (!mem_reuse) { + if (mem_reuse) { + MutableColumns columns(block->columns()); + for (int i = 0; i < key_size; ++i) { + columns[i] = std::move(key_columns[i]); + } + for (int i = 0; i < agg_size; ++i) { + columns[key_size + i] = std::move(value_columns[i]); + } + block->set_columns(std::move(columns)); + } else { ColumnsWithTypeAndName columns_with_schema; for (int i = 0; i < key_size; ++i) { columns_with_schema.emplace_back(std::move(key_columns[i]), diff --git a/be/src/exec/operator/table_function_operator.cpp b/be/src/exec/operator/table_function_operator.cpp index 09e74f580dd1c2..397a9754620632 100644 --- a/be/src/exec/operator/table_function_operator.cpp +++ b/be/src/exec/operator/table_function_operator.cpp @@ -485,8 +485,9 @@ Status TableFunctionLocalState::get_expanded_block(RuntimeState* state, Block* o } auto& p = _parent->cast(); - MutableBlock m_block = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, p._output_slots); + auto scoped_mutable_block = + VectorizedUtils::build_scoped_mutable_mem_reuse_block(output_block, p._output_slots); + auto& m_block = scoped_mutable_block.mutable_block(); MutableColumns& columns = m_block.mutable_columns(); for (int i = 0; i < p._fn_num; i++) { @@ -560,6 +561,7 @@ Status TableFunctionLocalState::get_expanded_block(RuntimeState* state, Block* o for (auto index : p._useless_slot_indexs) { columns[index]->insert_many_defaults(row_size - columns[index]->size()); } + scoped_mutable_block.restore(); { SCOPED_TIMER(_filter_timer); // 3. eval conjuncts @@ -577,8 +579,9 @@ Status TableFunctionLocalState::_get_expanded_block_for_outer_conjuncts(RuntimeS Block* output_block, bool* eos) { auto& p = _parent->cast(); - MutableBlock m_block = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, p._output_slots); + auto scoped_mutable_block = + VectorizedUtils::build_scoped_mutable_mem_reuse_block(output_block, p._output_slots); + auto& m_block = scoped_mutable_block.mutable_block(); MutableColumns& columns = m_block.mutable_columns(); auto child_slot_count = p._child_slots.size(); for (int i = 0; i < p._fn_num; i++) { @@ -647,7 +650,7 @@ Status TableFunctionLocalState::_get_expanded_block_for_outer_conjuncts(RuntimeS for (auto index : p._useless_slot_indexs) { columns[index]->insert_many_defaults(output_row_count - columns[index]->size()); } - output_block->set_columns(std::move(columns)); + scoped_mutable_block.restore(); /** Handle the outer conjuncts after unnest. Currently, only left outer is supported. @@ -745,8 +748,9 @@ Status TableFunctionLocalState::_get_expanded_block_for_outer_conjuncts(RuntimeS } } if (!null_row_indices.empty()) { - MutableBlock m_block2 = VectorizedUtils::build_mutable_mem_reuse_block( + auto scoped_mutable_block2 = VectorizedUtils::build_scoped_mutable_mem_reuse_block( output_block, p._output_slots); + auto& m_block2 = scoped_mutable_block2.mutable_block(); MutableColumns& columns2 = m_block2.mutable_columns(); for (auto index : p._output_slot_indexs) { auto src_column = _child_block->get_by_position(index).column; @@ -758,7 +762,6 @@ Status TableFunctionLocalState::_get_expanded_block_for_outer_conjuncts(RuntimeS columns2[index]->insert_many_defaults(null_row_indices.size()); } columns2[child_slot_count]->insert_many_defaults(null_row_indices.size()); - output_block->set_columns(std::move(columns2)); } _child_rows_has_output.clear(); _child_block->clear_column_data(_parent->cast() diff --git a/be/src/exec/operator/union_sink_operator.h b/be/src/exec/operator/union_sink_operator.h index 4842ab6b243903..14978ae4526178 100644 --- a/be/src/exec/operator/union_sink_operator.h +++ b/be/src/exec/operator/union_sink_operator.h @@ -157,8 +157,9 @@ class UnionSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorXrows() > 0) { - MutableBlock mblock = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, row_descriptor()); + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( + output_block, row_descriptor()); + auto& mblock = scoped_mutable_block.mutable_block(); Block res; auto& local_state = get_local_state(state); { @@ -173,4 +174,4 @@ class UnionSinkOperatorX MOCK_REMOVE(final) : public DataSinkOperatorXbatch_size(); @@ -177,6 +179,7 @@ Status UnionSourceOperatorX::get_next_const(RuntimeState* state, Block* block) { tmp_block.clear(); } } + scoped_mutable_block.restore(); // some insert query like "insert into string_test select 1, repeat('a', 1024 * 1024);" // the const expr will be in output expr cause the union node return a empty block. so here we diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index 27c66197541f5e..4322842792b9f6 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -164,10 +164,12 @@ Status RowIDFetcher::_merge_rpc_results(const PMultiGetRequest& request, default_values[i] = _fetch_option.desc->slots()[i]->col_default_value(); } } + auto output_columns_guard = output_block->mutate_columns_scoped(); + MutableColumns& output_columns = output_columns_guard.mutable_columns(); for (int i = 0; i < resp.binary_row_data_size(); ++i) { - RETURN_IF_ERROR(JsonbSerializeUtil::jsonb_to_block( + RETURN_IF_ERROR(JsonbSerializeUtil::jsonb_to_columns( serdes, resp.binary_row_data(i).data(), resp.binary_row_data(i).size(), - col_uid_to_idx, *output_block, default_values, {})); + col_uid_to_idx, output_columns, default_values, {})); } return Status::OK(); } @@ -190,10 +192,10 @@ Status RowIDFetcher::_merge_rpc_results(const PMultiGetRequest& request, partial_block.dump_types()); } else { for (int i = 0; i < output_block->columns(); ++i) { - output_block->get_by_position(i).column->assume_mutable()->insert_range_from( - *partial_block.get_by_position(i) - .column->convert_to_full_column_if_const() - .get(), + auto column_guard = output_block->mutate_column_scoped(i); + MutableColumnPtr& column = column_guard.mutable_column(); + column->insert_range_from( + *partial_block.get_by_position(i).column->convert_to_full_column_if_const(), 0, partial_block.rows()); } } @@ -368,9 +370,10 @@ struct DorisFormatReadBatch { static void scatter_scan_blocks_to_result_block( const std::vector>& row_id_block_idx, - std::vector& scan_blocks, Block& result_block) { + const std::vector& scan_blocks, Block& result_block) { for (size_t column_id = 0; column_id < result_block.columns(); ++column_id) { - auto dst_col = const_cast(result_block.get_by_position(column_id).column.get()); + auto dst_col_guard = result_block.mutate_column_scoped(column_id); + MutableColumnPtr& dst_col = dst_col_guard.mutable_column(); std::vector scan_src_columns; scan_src_columns.reserve(row_id_block_idx.size()); @@ -1122,6 +1125,8 @@ Status RowIdStorageReader::read_doris_format_row( return Status::InternalError("Tablet {} does not have row store for all columns", tablet->tablet_id()); } + auto result_columns_guard = result_block.mutate_columns_scoped(); + MutableColumns& result_columns = result_columns_guard.mutable_columns(); for (auto row_id : row_ids) { RowLocation loc(rowset_id, segment->id(), cast_set(row_id)); row_store_read_struct.row_store_buffer.clear(); @@ -1132,15 +1137,16 @@ Status RowIdStorageReader::read_doris_format_row( }, lookup_row_data_ms)); - RETURN_IF_ERROR(JsonbSerializeUtil::jsonb_to_block( + RETURN_IF_ERROR(JsonbSerializeUtil::jsonb_to_columns( row_store_read_struct.serdes, row_store_read_struct.row_store_buffer.data(), row_store_read_struct.row_store_buffer.size(), - row_store_read_struct.col_uid_to_idx, result_block, + row_store_read_struct.col_uid_to_idx, result_columns, row_store_read_struct.default_values, {})); } } else { for (int x = 0; x < slots.size(); ++x) { - MutableColumnPtr column = result_block.get_by_position(x).column->assume_mutable(); + auto column_guard = result_block.mutate_column_scoped(x); + MutableColumnPtr& column = column_guard.mutable_column(); IteratorKey iterator_key {.tablet_id = tablet_id, .rowset_id = rowset_id, .segment_id = segment_id, diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp index 5f1d248c1e1f4d..c0a79f9f38d9ec 100644 --- a/be/src/exec/scan/file_scanner.cpp +++ b/be/src/exec/scan/file_scanner.cpp @@ -438,8 +438,10 @@ Status FileScanner::_process_runtime_filters_partition_prune(bool& can_filter_al if (!first_column_filled) { // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0 // The following process may be tricky and time-consuming, but we have no other way. - _runtime_filter_partition_prune_block.get_by_position(0).column->assume_mutable()->resize( - partition_value_column_size); + auto column = IColumn::mutate( + std::move(_runtime_filter_partition_prune_block.get_by_position(0).column)); + column->resize(partition_value_column_size); + _runtime_filter_partition_prune_block.replace_by_position(0, std::move(column)); } IColumn::Filter result_filter(_runtime_filter_partition_prune_block.rows(), 1); RETURN_IF_ERROR(VExprContext::execute_conjuncts(_runtime_filter_partition_prune_ctxs, nullptr, @@ -773,16 +775,17 @@ Status FileScanner::_convert_to_output_block(Block* block) { // After convert, the column_ptr should be copied into output block. // Can not use block->insert() because it may cause use_count() non-zero bug - MutableBlock mutable_output_block = - VectorizedUtils::build_mutable_mem_reuse_block(block, *_dest_row_desc); + auto scoped_mutable_output_block = + VectorizedUtils::build_scoped_mutable_mem_reuse_block(block, *_dest_row_desc); + auto& mutable_output_block = scoped_mutable_output_block.mutable_block(); auto& mutable_output_columns = mutable_output_block.mutable_columns(); std::vector* skip_bitmaps {nullptr}; + MutableColumnPtr skip_bitmap_column; if (_should_process_skip_bitmap_col()) { - auto* skip_bitmap_nullable_col_ptr = - assert_cast(_src_block_ptr->get_by_position(_skip_bitmap_col_idx) - .column->assume_mutable() - .get()); + skip_bitmap_column = IColumn::mutate( + std::move(_src_block_ptr->get_by_position(_skip_bitmap_col_idx).column)); + auto* skip_bitmap_nullable_col_ptr = assert_cast(skip_bitmap_column.get()); skip_bitmaps = &(assert_cast( skip_bitmap_nullable_col_ptr->get_nested_column_ptr().get()) ->get_data()); @@ -799,6 +802,7 @@ Status FileScanner::_convert_to_output_block(Block* block) { } } } + _src_block_ptr->replace_by_position(_skip_bitmap_col_idx, std::move(skip_bitmap_column)); } // for (auto slot_desc : _output_tuple_desc->slots()) { @@ -865,6 +869,7 @@ Status FileScanner::_convert_to_output_block(Block* block) { mutable_output_columns[j]->insert_range_from(*column_ptr, 0, rows); ctx_idx++; } + scoped_mutable_output_block.restore(); // after do the dest block insert operation, clear _src_block to remove the reference of origin column _src_block_ptr->clear(); diff --git a/be/src/exec/scan/meta_scanner.cpp b/be/src/exec/scan/meta_scanner.cpp index adf1aabe4b8903..52892882f7bcbb 100644 --- a/be/src/exec/scan/meta_scanner.cpp +++ b/be/src/exec/scan/meta_scanner.cpp @@ -112,21 +112,14 @@ Status MetaScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof columns.resize(column_size); for (auto i = 0; i < column_size; i++) { if (mem_reuse) { - columns[i] = block->get_by_position(i).column->assume_mutable(); + columns[i] = IColumn::mutate(std::move(block->get_by_position(i).column)); } else { columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column(); } } // fill block RETURN_IF_ERROR(_fill_block_with_remote_data(columns)); - if (_meta_eos == true) { - if (block->rows() == 0) { - *eof = true; - } - break; - } - // Before really use the Block, must clear other ptr of column in block - // So here need do std::move and clear in `columns` + const bool empty_result = columns.empty() || columns.front()->empty(); if (!mem_reuse) { int column_index = 0; for (const auto slot_desc : _tuple_desc->slots()) { @@ -135,7 +128,13 @@ Status MetaScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eof slot_desc->col_name())); } } else { - columns.clear(); + block->set_columns(std::move(columns)); + } + if (_meta_eos == true) { + if (empty_result) { + *eof = true; + } + break; } VLOG_ROW << "VMetaScanNode output rows: " << block->rows(); } while (block->rows() == 0 && !(*eof)); diff --git a/be/src/exec/scan/scanner.cpp b/be/src/exec/scan/scanner.cpp index ae1ed96e000768..ab76b884ef04fa 100644 --- a/be/src/exec/scan/scanner.cpp +++ b/be/src/exec/scan/scanner.cpp @@ -225,8 +225,9 @@ Status Scanner::_do_projections(Block* origin_block, Block* output_block) { } DCHECK_EQ(rows, input_block.rows()); - MutableBlock mutable_block = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, *_output_row_descriptor); + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( + output_block, *_output_row_descriptor); + auto& mutable_block = scoped_mutable_block.mutable_block(); auto& mutable_columns = mutable_block.mutable_columns(); @@ -239,10 +240,10 @@ Status Scanner::_do_projections(Block* origin_block, Block* output_block) { if (mutable_columns[i]->is_nullable() != column_ptr->is_nullable()) { throw Exception(ErrorCode::INTERNAL_ERROR, "Nullable mismatch"); } - mutable_columns[i] = column_ptr->assume_mutable(); + mutable_columns[i] = IColumn::mutate(std::move(column_ptr)); } - output_block->set_columns(std::move(mutable_columns)); + scoped_mutable_block.restore(); // origin columns was moved into output_block, so we need to set origin_block to empty columns auto empty_columns = origin_block->clone_empty_columns(); diff --git a/be/src/exec/scan/scanner.h b/be/src/exec/scan/scanner.h index c14f6ee2048a7b..4f5d511e94b573 100644 --- a/be/src/exec/scan/scanner.h +++ b/be/src/exec/scan/scanner.h @@ -115,8 +115,9 @@ class Scanner { if (_padding_block.empty()) { _padding_block.swap(_origin_block); } else if (_origin_block.rows()) { - RETURN_IF_ERROR( - MutableBlock::build_mutable_block(&_padding_block).merge(_origin_block)); + ScopedMutableBlock scoped_mutable_block(&_padding_block); + auto& mutable_block = scoped_mutable_block.mutable_block(); + RETURN_IF_ERROR(mutable_block.merge(_origin_block)); } return Status::OK(); } diff --git a/be/src/exec/sink/vtablet_block_convertor.cpp b/be/src/exec/sink/vtablet_block_convertor.cpp index b567b599cfa3bf..e59fa923375998 100644 --- a/be/src/exec/sink/vtablet_block_convertor.cpp +++ b/be/src/exec/sink/vtablet_block_convertor.cpp @@ -238,8 +238,8 @@ Status OlapTableBlockConvertor::_internal_validate_column(RuntimeState* state, B } } - const auto* tmp_column_ptr = check_and_get_column(*orig_column); - const auto& tmp_real_column_ptr = + auto tmp_column_ptr = check_and_get_column(*orig_column); + auto tmp_real_column_ptr = tmp_column_ptr == nullptr ? orig_column : (tmp_column_ptr->get_nested_column_ptr()); const auto* column_string = assert_cast(tmp_real_column_ptr.get()); const auto* null_map = @@ -281,13 +281,22 @@ Status OlapTableBlockConvertor::_internal_validate_column(RuntimeState* state, B {len_column, len_type, "len"}, {nullptr, input_type, "result"}}); RETURN_IF_ERROR(func->execute(nullptr, tmp_block, {0, 1, 2}, 3, row_count)); - column_string = - assert_cast(tmp_block.get_by_position(3).column.get()); - orig_column = - orig_column->is_nullable() - ? ColumnNullable::create(tmp_block.get_by_position(3).column, - tmp_column_ptr->get_null_map_column_ptr()) - : std::move(tmp_block.get_by_position(3).column); + auto result_column = + IColumn::mutate(std::move(tmp_block.get_by_position(3).column)); + if (orig_column->is_nullable()) { + orig_column = ColumnNullable::create( + std::move(result_column), + IColumn::mutate(tmp_column_ptr->get_null_map_column_ptr())); + } else { + orig_column = std::move(result_column); + } + tmp_column_ptr = check_and_get_column(*orig_column); + tmp_real_column_ptr = tmp_column_ptr == nullptr + ? orig_column + : tmp_column_ptr->get_nested_column_ptr(); + column_string = assert_cast(tmp_real_column_ptr.get()); + null_map = tmp_column_ptr == nullptr ? nullptr + : tmp_column_ptr->get_null_map_data().data(); } for (size_t j = 0; j < row_count; ++j) { auto row = rows ? (*rows)[j] : j; diff --git a/be/src/exec/sink/writer/vtablet_writer.cpp b/be/src/exec/sink/writer/vtablet_writer.cpp index c77a94c501585b..0eb98ae48dd711 100644 --- a/be/src/exec/sink/writer/vtablet_writer.cpp +++ b/be/src/exec/sink/writer/vtablet_writer.cpp @@ -1760,10 +1760,12 @@ Status VTabletWriter::_send_new_partition_batch() { // 2. deal batched block // 3. now reuse the column of lval block. cuz write doesn't real adjust it. it generate a new block from that. _row_distribution.clear_batching_stats(); + Defer recover_batching_block([&]() { + _row_distribution._batching_block->set_mutable_columns( + std::move(tmp_block).mutate_columns()); + _row_distribution._batching_block->clear_column_data(); + }); RETURN_IF_ERROR(this->write(_state, tmp_block)); - _row_distribution._batching_block->set_mutable_columns( - tmp_block.mutate_columns()); // Recovery back - _row_distribution._batching_block->clear_column_data(); _row_distribution._deal_batched = false; } return Status::OK(); diff --git a/be/src/exec/sink/writer/vtablet_writer_v2.cpp b/be/src/exec/sink/writer/vtablet_writer_v2.cpp index 17f41063c6a33d..31a2d78819207d 100644 --- a/be/src/exec/sink/writer/vtablet_writer_v2.cpp +++ b/be/src/exec/sink/writer/vtablet_writer_v2.cpp @@ -620,10 +620,12 @@ Status VTabletWriterV2::_send_new_partition_batch() { // 2. deal batched block // 3. now reuse the column of lval block. cuz write doesn't real adjust it. it generate a new block from that. _row_distribution.clear_batching_stats(); + Defer recover_batching_block([&]() { + _row_distribution._batching_block->set_mutable_columns( + std::move(tmp_block).mutate_columns()); + _row_distribution._batching_block->clear_column_data(); + }); RETURN_IF_ERROR(this->write(_state, tmp_block)); - _row_distribution._batching_block->set_mutable_columns( - tmp_block.mutate_columns()); // Recovery back - _row_distribution._batching_block->clear_column_data(); _row_distribution._deal_batched = false; } return Status::OK(); diff --git a/be/src/exec/sort/partition_sorter.cpp b/be/src/exec/sort/partition_sorter.cpp index 64422a202c236f..87b915990d1dfa 100644 --- a/be/src/exec/sort/partition_sorter.cpp +++ b/be/src/exec/sort/partition_sorter.cpp @@ -100,8 +100,9 @@ Status PartitionSorter::_read_row_num(Block* output_block, bool* eos, int batch_ auto& queue = _state->get_queue(); size_t num_columns = _state->unsorted_block()->columns(); - MutableBlock m_block = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, *_state->unsorted_block()); + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( + output_block, *_state->unsorted_block()); + auto& m_block = scoped_mutable_block.mutable_block(); MutableColumns& merged_columns = m_block.mutable_columns(); size_t merged_rows = 0; @@ -121,9 +122,11 @@ Status PartitionSorter::_read_row_num(Block* output_block, bool* eos, int batch_ if (current->impl->is_last(step) && current->impl->pos == 0) { if (merged_rows != 0) { // return directly for next time's read swap whole block + scoped_mutable_block.restore(); return Status::OK(); } // swap and return block directly when we should get all data from cursor + scoped_mutable_block.restore(); output_block->swap(*current->impl->block); merged_rows += step; _output_total_rows += step; @@ -154,8 +157,9 @@ Status PartitionSorter::_read_row_rank(Block* output_block, bool* eos, int batch auto& queue = _state->get_queue(); size_t num_columns = _state->unsorted_block()->columns(); - MutableBlock m_block = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, *_state->unsorted_block()); + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( + output_block, *_state->unsorted_block()); + auto& m_block = scoped_mutable_block.mutable_block(); MutableColumns& merged_columns = m_block.mutable_columns(); size_t merged_rows = 0; @@ -178,6 +182,7 @@ Status PartitionSorter::_read_row_rank(Block* output_block, bool* eos, int batch // rank() maybe need check when have get a distinct row // so when the cmp_res is get a distinct row, need check have output all rows num if (_get_enough_data()) { + scoped_mutable_block.restore(); return Status::OK(); } *_previous_row = *current; diff --git a/be/src/exec/sort/sorter.cpp b/be/src/exec/sort/sorter.cpp index 616cc2145a2d16..686cca97e84e1c 100644 --- a/be/src/exec/sort/sorter.cpp +++ b/be/src/exec/sort/sorter.cpp @@ -114,7 +114,9 @@ void MergeSorterState::_merge_sort_read_impl(int batch_size, doris::Block* block size_t num_columns = unsorted_block()->columns(); - MutableBlock m_block = VectorizedUtils::build_mutable_mem_reuse_block(block, *unsorted_block()); + auto scoped_mutable_block = + VectorizedUtils::build_scoped_mutable_mem_reuse_block(block, *unsorted_block()); + auto& m_block = scoped_mutable_block.mutable_block(); MutableColumns& merged_columns = m_block.mutable_columns(); /// Take rows from queue in right order and push to 'merged'. @@ -143,7 +145,6 @@ void MergeSorterState::_merge_sort_read_impl(int batch_size, doris::Block* block } } - block->set_columns(std::move(merged_columns)); *eos = merged_rows == 0; } diff --git a/be/src/exec/sort/vsorted_run_merger.cpp b/be/src/exec/sort/vsorted_run_merger.cpp index ce4440c3178343..8323490031df06 100644 --- a/be/src/exec/sort/vsorted_run_merger.cpp +++ b/be/src/exec/sort/vsorted_run_merger.cpp @@ -150,8 +150,9 @@ Status VSortedRunMerger::get_next(Block* output_block, bool* eos) { return Status::OK(); } else { size_t num_columns = _priority_queue.top().impl->block->columns(); - MutableBlock m_block = VectorizedUtils::build_mutable_mem_reuse_block( + auto scoped_mutable_block = VectorizedUtils::build_scoped_mutable_mem_reuse_block( output_block, *_priority_queue.top().impl->block); + auto& m_block = scoped_mutable_block.mutable_block(); MutableColumns& merged_columns = m_block.mutable_columns(); if (num_columns != merged_columns.size()) { @@ -194,11 +195,12 @@ Status VSortedRunMerger::get_next(Block* output_block, bool* eos) { current->next(); if (_need_more_data(current)) { do_insert(); + scoped_mutable_block.restore(); return Status::OK(); } } do_insert(); - output_block->set_columns(std::move(merged_columns)); + scoped_mutable_block.restore(); if (merged_rows == 0) { *eos = true; @@ -221,4 +223,4 @@ bool VSortedRunMerger::_need_more_data(MergeSortCursor& current) { } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/exprs/aggregate/aggregate_function_foreach.h b/be/src/exprs/aggregate/aggregate_function_foreach.h index 8afd5b93eaaba8..a00dd9b9397944 100644 --- a/be/src/exprs/aggregate/aggregate_function_foreach.h +++ b/be/src/exprs/aggregate/aggregate_function_foreach.h @@ -210,11 +210,19 @@ class AggregateFunctionForEach : public AggregateFunctionNonFinalBase, auto& arr_to = assert_cast(to); auto& offsets_to = arr_to.get_offsets(); - IColumn& elems_to = arr_to.get_data(); + IColumn* elems_to = &arr_to.get_data(); + ColumnNullable* nullable_elems_to = nullptr; + if (!nested_function->get_return_type()->is_nullable()) { + nullable_elems_to = assert_cast(elems_to); + elems_to = nullable_elems_to->get_nested_column_ptr().get(); + } char* nested_state = state.array_of_aggregate_datas; for (size_t i = 0; i < state.dynamic_array_size; ++i) { - nested_function->insert_result_into(nested_state, elems_to); + nested_function->insert_result_into(nested_state, *elems_to); + if (nullable_elems_to != nullptr) { + nullable_elems_to->get_null_map_data().push_back(0); + } nested_state += nested_size_of_data; } diff --git a/be/src/exprs/aggregate/aggregate_function_java_udaf.h b/be/src/exprs/aggregate/aggregate_function_java_udaf.h index 42b3bc87af6d9d..cbd929824d21d6 100644 --- a/be/src/exprs/aggregate/aggregate_function_java_udaf.h +++ b/be/src/exprs/aggregate/aggregate_function_java_udaf.h @@ -187,7 +187,8 @@ struct AggregateJavaUdafData { RETURN_NOT_OK_STATUS_WITH_WARN(Jni::Env::Get(&env), "Java-Udaf get value function"); Block output_block; - output_block.insert(ColumnWithTypeAndName(to.get_ptr(), result_type, "_result_")); + output_block.insert( + ColumnWithTypeAndName(result_type->create_column(), result_type, "_result_")); auto output_table_schema = JniDataBridge::parse_table_schema(&output_block); std::string output_nullable = result_type->is_nullable() ? "true" : "false"; std::map output_params = {{"is_nullable", output_nullable}, @@ -203,7 +204,11 @@ struct AggregateJavaUdafData { .with_arg(output_map) .call(&output_address)); - return JniDataBridge::fill_block(&output_block, {0}, output_address); + RETURN_IF_ERROR(JniDataBridge::fill_block(&output_block, {0}, output_address)); + const auto& result_column = output_block.get_by_position(0).column; + DORIS_CHECK(result_column->size() == 1); + to.insert_from(*result_column, 0); + return Status::OK(); } private: diff --git a/be/src/exprs/aggregate/aggregate_function_null_v2.h b/be/src/exprs/aggregate/aggregate_function_null_v2.h index aa2c9f3bb39792..a3b513d6014116 100644 --- a/be/src/exprs/aggregate/aggregate_function_null_v2.h +++ b/be/src/exprs/aggregate/aggregate_function_null_v2.h @@ -259,8 +259,7 @@ class AggregateFunctionNullBaseInlineV2 : public IAggregateFunctionHelperget_nested_column().assume_mutable().get(); + const IColumn* src_nested_column = &src_nullable_col->get_nested_column(); if (src_nullable_col->has_null()) { for (size_t i = 0; i < num_rows; ++i) { if (!src_null_map_data[i]) { diff --git a/be/src/exprs/aggregate/aggregate_function_sort.h b/be/src/exprs/aggregate/aggregate_function_sort.h index e001cb0c4c419d..2a7530e817fd3b 100644 --- a/be/src/exprs/aggregate/aggregate_function_sort.h +++ b/be/src/exprs/aggregate/aggregate_function_sort.h @@ -46,33 +46,27 @@ namespace doris { struct AggregateFunctionSortData { const SortDescription sort_desc; - Block block; + // The aggregate state is the sole owner of these columns and appends rows in add(), which is + // a hot path. Keep the long-lived state as MutableBlock and only materialize temporary Block + // views for APIs that require immutable Block input. + MutableBlock block; // The construct only support the template compiler, useless AggregateFunctionSortData() : sort_desc() {}; AggregateFunctionSortData(SortDescription sort_desc, const Block& block) : sort_desc(std::move(sort_desc)), block(block.clone_empty()) {} - void merge(const AggregateFunctionSortData& rhs) { - if (block.rows() == 0) { - block = rhs.block; - } else { - for (size_t i = 0; i < block.columns(); i++) { - auto column = block.get_by_position(i).column->assume_mutable(); - auto column_rhs = rhs.block.get_by_position(i).column; - column->insert_range_from(*column_rhs, 0, rhs.block.rows()); - } - } - } + void merge(const AggregateFunctionSortData& rhs) { append_block(rhs, 0, rhs.block.rows()); } void serialize(const RuntimeState* state, BufferWritable& buf) const { PBlock pblock; size_t uncompressed_bytes = 0; size_t compressed_bytes = 0; int64_t compressed_time = 0; - auto st = block.serialize(state->be_exec_version(), &pblock, &uncompressed_bytes, - &compressed_bytes, &compressed_time, - segment_v2::CompressionTypePB::NO_COMPRESSION); + auto block_view = to_block_view(); + auto st = block_view.serialize(state->be_exec_version(), &pblock, &uncompressed_bytes, + &compressed_bytes, &compressed_time, + segment_v2::CompressionTypePB::NO_COMPRESSION); if (!st.ok()) { throw doris::Exception(st); } @@ -88,12 +82,14 @@ struct AggregateFunctionSortData { pblock.ParseFromString(data); [[maybe_unused]] size_t uncompressed_size = 0; [[maybe_unused]] int64_t uncompressed_time = 0; - auto st = block.deserialize(pblock, &uncompressed_size, &uncompressed_time); + Block deserialized_block; + auto st = deserialized_block.deserialize(pblock, &uncompressed_size, &uncompressed_time); // If memory allocate failed during deserialize, st is not ok, throw exception here to // stop the query. if (!st.ok()) { throw doris::Exception(st); } + block = MutableBlock(std::move(deserialized_block)); } void add(const IColumn** columns, size_t columns_num, size_t row_num) { @@ -102,14 +98,40 @@ struct AggregateFunctionSortData { block.columns(), columns_num); for (size_t i = 0; i < columns_num; ++i) { - auto column = block.get_by_position(i).column->assume_mutable(); - column->insert_from(*columns[i], row_num); + block.get_column_by_position(i)->insert_from(*columns[i], row_num); } } void sort() { + auto block_view = to_block_view(); + auto sorted_block = block_view.clone_empty(); HybridSorter hybrid_sorter; - sort_block(block, block, sort_desc, hybrid_sorter, block.rows()); + sort_block(block_view, sorted_block, sort_desc, hybrid_sorter, block_view.rows()); + block = MutableBlock(std::move(sorted_block)); + } + +private: + void append_block(const AggregateFunctionSortData& rhs, size_t start, size_t length) { + DCHECK_EQ(block.columns(), rhs.block.columns()); + for (size_t i = 0; i < block.columns(); ++i) { + DCHECK(block.get_datatype_by_position(i)->equals( + *rhs.block.get_datatype_by_position(i))) + << "lhs type: " << block.get_datatype_by_position(i)->get_name() + << ", rhs type: " << rhs.block.get_datatype_by_position(i)->get_name(); + block.get_column_by_position(i)->insert_range_from(*rhs.block.get_column_by_position(i), + start, length); + } + } + + Block to_block_view() const { + ColumnsWithTypeAndName columns_with_schema; + columns_with_schema.reserve(block.columns()); + for (size_t i = 0; i < block.columns(); ++i) { + columns_with_schema.emplace_back( + static_cast(*block.get_column_by_position(i)).get_ptr(), + block.get_datatype_by_position(i), ""); + } + return {std::move(columns_with_schema)}; } }; @@ -177,7 +199,7 @@ class AggregateFunctionSort final ColumnRawPtrs arguments_nested; for (int i = 0; i < _arguments.size() - _sort_desc.size(); i++) { arguments_nested.emplace_back( - this->data(place).block.get_by_position(i).column.get()); + this->data(place).block.get_column_by_position(i).get()); } _nested_func->add_batch_single_place(arguments_nested[0]->size(), diff --git a/be/src/exprs/function/array/function_array_flatten.cpp b/be/src/exprs/function/array/function_array_flatten.cpp index 03086f37008788..3f76bcfb015e4a 100644 --- a/be/src/exprs/function/array/function_array_flatten.cpp +++ b/be/src/exprs/function/array/function_array_flatten.cpp @@ -55,23 +55,23 @@ class FunctionArrayFlatten : public IFunction { auto src_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); auto* src_column_array_ptr = - assert_cast(remove_nullable(src_column)->assume_mutable().get()); - ColumnArray* nested_src_column_array_ptr = src_column_array_ptr; + assert_cast(remove_nullable(src_column).get()); + const ColumnArray* nested_src_column_array_ptr = src_column_array_ptr; DataTypePtr src_data_type = block.get_by_position(arguments[0]).type; auto* src_data_type_array = assert_cast(remove_nullable(src_data_type).get()); - auto result_column_offsets = - assert_cast(src_column_array_ptr->get_offsets_column()) - .clone(); + auto result_column_offsets = assert_cast( + src_column_array_ptr->get_offsets_column()) + .clone(); auto* offsets = assert_cast(result_column_offsets.get()) ->get_data() .data(); while (src_data_type_array->get_nested_type()->get_primitive_type() == TYPE_ARRAY) { - nested_src_column_array_ptr = assert_cast( - remove_nullable(src_column_array_ptr->get_data_ptr())->assume_mutable().get()); + nested_src_column_array_ptr = assert_cast( + remove_nullable(src_column_array_ptr->get_data_ptr()).get()); for (size_t i = 0; i < input_rows_count; ++i) { offsets[i] = nested_src_column_array_ptr->get_offsets()[offsets[i] - 1]; diff --git a/be/src/exprs/function/cast/cast_base.h b/be/src/exprs/function/cast/cast_base.h index d759ead1260a72..cc58e29d4acd11 100644 --- a/be/src/exprs/function/cast/cast_base.h +++ b/be/src/exprs/function/cast/cast_base.h @@ -22,6 +22,8 @@ #include "core/assert_cast.h" #include "core/block/block.h" #include "core/call_on_type_index.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" #include "core/data_type/data_type.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_bitmap.h" @@ -98,6 +100,12 @@ constexpr static bool IsBaseCastFromType = IsBaseCastToType || IsStringTypecreate_column(), + ColumnUInt8::create()); +} + namespace CastWrapper { using WrapperType = std::function requires(IsDataTypeNumber) class CastToImpl : public CastToBase { Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - uint32_t result, size_t input_rows_count, + uint32_t result, size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) const override { const auto* col_from = assert_cast( block.get_by_position(arguments[0]).column.get()); auto to_type = block.get_by_position(result).type; - auto serde = remove_nullable(to_type)->get_serde(); - - // by default framework, to_type is already unwrapped nullable - MutableColumnPtr column_to = to_type->create_column(); - ColumnNullable::MutablePtr nullable_col_to = ColumnNullable::create( - std::move(column_to), ColumnUInt8::create(input_rows_count, 0)); + auto nested_to_type = remove_nullable(to_type); + auto serde = nested_to_type->get_serde(); DataTypeSerDe::FormatOptions format_options; format_options.converted_from_string = true; if constexpr (Mode == CastModeType::NonStrictMode) { + auto nullable_col_to = create_empty_nullable_column(nested_to_type); // may write nulls to nullable_col_to RETURN_IF_ERROR(serde->from_string_batch(*col_from, *nullable_col_to, format_options)); + block.get_by_position(result).column = std::move(nullable_col_to); } else if constexpr (Mode == CastModeType::StrictMode) { - // WON'T write nulls to nullable_col_to, just raise errors. null_map is only used to skip invalid rows - RETURN_IF_ERROR(serde->from_string_strict_mode_batch( - *col_from, nullable_col_to->get_nested_column(), format_options, null_map)); + MutableColumnPtr column_to = nested_to_type->create_column(); + // WON'T write nulls to the result column, just raise errors. null_map is only used to skip invalid rows + RETURN_IF_ERROR(serde->from_string_strict_mode_batch(*col_from, *column_to, + format_options, null_map)); + block.get_by_position(result).column = std::move(column_to); } else { return Status::InternalError("Unsupported cast mode"); } - block.get_by_position(result).column = std::move(nullable_col_to); return Status::OK(); } }; diff --git a/be/src/exprs/function/cast/cast_to_boolean.h b/be/src/exprs/function/cast/cast_to_boolean.h index a1a63522eb1a3e..7fda47712e582f 100644 --- a/be/src/exprs/function/cast/cast_to_boolean.h +++ b/be/src/exprs/function/cast/cast_to_boolean.h @@ -122,30 +122,29 @@ template class CastToImpl : public CastToBase { public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - uint32_t result, size_t input_rows_count, + uint32_t result, size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) const override { const auto* col_from = assert_cast( block.get_by_position(arguments[0]).column.get()); auto to_type = block.get_by_position(result).type; - auto serde = remove_nullable(to_type)->get_serde(); - - // by default framework, to_type is already unwrapped nullable - MutableColumnPtr column_to = to_type->create_column(); - ColumnNullable::MutablePtr nullable_col_to = ColumnNullable::create( - std::move(column_to), ColumnUInt8::create(input_rows_count, 0)); + auto nested_to_type = remove_nullable(to_type); + auto serde = nested_to_type->get_serde(); if constexpr (Mode == CastModeType::NonStrictMode) { + auto nullable_col_to = create_empty_nullable_column(nested_to_type); // may write nulls to nullable_col_to RETURN_IF_ERROR(serde->from_string_batch(*col_from, *nullable_col_to, {})); + block.get_by_position(result).column = std::move(nullable_col_to); } else if constexpr (Mode == CastModeType::StrictMode) { - // WON'T write nulls to nullable_col_to, just raise errors. null_map is only used to skip invalid rows - RETURN_IF_ERROR(serde->from_string_strict_mode_batch( - *col_from, nullable_col_to->get_nested_column(), {}, null_map)); + MutableColumnPtr column_to = nested_to_type->create_column(); + // WON'T write nulls to the result column, just raise errors. null_map is only used to skip invalid rows + RETURN_IF_ERROR( + serde->from_string_strict_mode_batch(*col_from, *column_to, {}, null_map)); + block.get_by_position(result).column = std::move(column_to); } else { return Status::InternalError("Unsupported cast mode"); } - block.get_by_position(result).column = std::move(nullable_col_to); return Status::OK(); } }; diff --git a/be/src/exprs/function/cast/cast_to_date.h b/be/src/exprs/function/cast/cast_to_date.h index 535de7e3d482aa..009739b0b0bbbf 100644 --- a/be/src/exprs/function/cast/cast_to_date.h +++ b/be/src/exprs/function/cast/cast_to_date.h @@ -47,32 +47,31 @@ template class CastToImpl : public CastToBase { public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - uint32_t result, size_t input_rows_count, + uint32_t result, size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) const override { const auto* col_from = assert_cast( block.get_by_position(arguments[0]).column.get()); auto to_type = block.get_by_position(result).type; - auto serde = remove_nullable(to_type)->get_serde(); + auto nested_to_type = remove_nullable(to_type); + auto serde = nested_to_type->get_serde(); DataTypeSerDe::FormatOptions options; options.timezone = &context->state()->timezone_obj(); - // by default framework, to_type is already unwrapped nullable - MutableColumnPtr column_to = to_type->create_column(); - ColumnNullable::MutablePtr nullable_col_to = ColumnNullable::create( - std::move(column_to), ColumnUInt8::create(input_rows_count, 0)); - if constexpr (CastMode == CastModeType::StrictMode) { - // WON'T write nulls to nullable_col_to, just raise errors. null_map is only used to skip invalid rows - RETURN_IF_ERROR(serde->from_string_strict_mode_batch( - *col_from, nullable_col_to->get_nested_column(), options, null_map)); + MutableColumnPtr column_to = nested_to_type->create_column(); + // WON'T write nulls to the result column, just raise errors. null_map is only used to skip invalid rows + RETURN_IF_ERROR( + serde->from_string_strict_mode_batch(*col_from, *column_to, options, null_map)); + block.get_by_position(result).column = std::move(column_to); } else { + auto nullable_col_to = create_empty_nullable_column(nested_to_type); // may write nulls to nullable_col_to RETURN_IF_ERROR(serde->from_string_batch(*col_from, *nullable_col_to, options)); + block.get_by_position(result).column = std::move(nullable_col_to); } - block.get_by_position(result).column = std::move(nullable_col_to); return Status::OK(); } }; @@ -82,36 +81,35 @@ template class CastToImpl : public CastToBase { public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - uint32_t result, size_t input_rows_count, + uint32_t result, size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) const override { const auto* col_from = assert_cast( block.get_by_position(arguments[0]).column.get()); auto to_type = block.get_by_position(result).type; + auto nested_to_type = remove_nullable(to_type); auto concrete_serde = std::dynamic_pointer_cast( - remove_nullable(to_type)->get_serde()); - - // by default framework, to_type is already unwrapped nullable - MutableColumnPtr column_to = to_type->create_column(); - ColumnNullable::MutablePtr nullable_col_to = ColumnNullable::create( - std::move(column_to), ColumnUInt8::create(input_rows_count, 0)); + nested_to_type->get_serde()); // datelike types serde must have template functions for those types. but because of they need to be // template functions, so we cannot make them virtual. that's why we assert_cast `serde` before. if constexpr (CastMode == CastModeType::StrictMode) { - // WON'T write nulls to nullable_col_to, just raise errors. null_map is only used to skip invalid rows + MutableColumnPtr column_to = nested_to_type->create_column(); + // WON'T write nulls to the result column, just raise errors. null_map is only used to skip invalid rows if constexpr (IsDataTypeInt) { RETURN_IF_ERROR(concrete_serde->template from_int_strict_mode_batch( - *col_from, nullable_col_to->get_nested_column())); + *col_from, *column_to)); } else if constexpr (IsDataTypeFloat) { RETURN_IF_ERROR(concrete_serde->template from_float_strict_mode_batch( - *col_from, nullable_col_to->get_nested_column())); + *col_from, *column_to)); } else { static_assert(IsDataTypeDecimal); RETURN_IF_ERROR( concrete_serde->template from_decimal_strict_mode_batch( - *col_from, nullable_col_to->get_nested_column())); + *col_from, *column_to)); } + block.get_by_position(result).column = std::move(column_to); } else { + auto nullable_col_to = create_empty_nullable_column(nested_to_type); // may write nulls to nullable_col_to if constexpr (IsDataTypeInt) { RETURN_IF_ERROR(concrete_serde->template from_int_batch( @@ -124,9 +122,9 @@ class CastToImpl : public CastToBase { RETURN_IF_ERROR(concrete_serde->template from_decimal_batch( *col_from, *nullable_col_to)); } + block.get_by_position(result).column = std::move(nullable_col_to); } - block.get_by_position(result).column = std::move(nullable_col_to); return Status::OK(); } }; diff --git a/be/src/exprs/function/cast/cast_to_decimal.h b/be/src/exprs/function/cast/cast_to_decimal.h index daec3a53a548d5..0617aca16ea014 100644 --- a/be/src/exprs/function/cast/cast_to_decimal.h +++ b/be/src/exprs/function/cast/cast_to_decimal.h @@ -684,31 +684,30 @@ template requires(IsDataTypeDecimal) class CastToImpl : public CastToBase { Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - uint32_t result, size_t input_rows_count, + uint32_t result, size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) const override { const auto* col_from = assert_cast( block.get_by_position(arguments[0]).column.get()); auto to_type = block.get_by_position(result).type; - auto serde = remove_nullable(to_type)->get_serde(); - - // by default framework, to_type is already unwrapped nullable - MutableColumnPtr column_to = to_type->create_column(); - ColumnNullable::MutablePtr nullable_col_to = ColumnNullable::create( - std::move(column_to), ColumnUInt8::create(input_rows_count, 0)); + auto nested_to_type = remove_nullable(to_type); + auto serde = nested_to_type->get_serde(); if constexpr (Mode == CastModeType::NonStrictMode) { + auto nullable_col_to = create_empty_nullable_column(nested_to_type); // may write nulls to nullable_col_to RETURN_IF_ERROR(serde->from_string_batch(*col_from, *nullable_col_to, {})); + block.get_by_position(result).column = std::move(nullable_col_to); } else if constexpr (Mode == CastModeType::StrictMode) { - // WON'T write nulls to nullable_col_to, just raise errors. null_map is only used to skip invalid rows - RETURN_IF_ERROR(serde->from_string_strict_mode_batch( - *col_from, nullable_col_to->get_nested_column(), {}, null_map)); + MutableColumnPtr column_to = nested_to_type->create_column(); + // WON'T write nulls to the result column, just raise errors. null_map is only used to skip invalid rows + RETURN_IF_ERROR( + serde->from_string_strict_mode_batch(*col_from, *column_to, {}, null_map)); + block.get_by_position(result).column = std::move(column_to); } else { return Status::InternalError("Unsupported cast mode"); } - block.get_by_position(result).column = std::move(nullable_col_to); return Status::OK(); } }; diff --git a/be/src/exprs/function/cast/cast_to_ip.h b/be/src/exprs/function/cast/cast_to_ip.h index 81eb1e798edc93..a585261e18b169 100644 --- a/be/src/exprs/function/cast/cast_to_ip.h +++ b/be/src/exprs/function/cast/cast_to_ip.h @@ -51,31 +51,30 @@ template class CastToImpl : public CastToBase { public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - uint32_t result, size_t input_rows_count, + uint32_t result, size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) const override { const auto* col_from = assert_cast( block.get_by_position(arguments[0]).column.get()); auto to_type = block.get_by_position(result).type; - auto serde = remove_nullable(to_type)->get_serde(); - - // by default framework, to_type is already unwrapped nullable - MutableColumnPtr column_to = to_type->create_column(); - ColumnNullable::MutablePtr nullable_col_to = ColumnNullable::create( - std::move(column_to), ColumnUInt8::create(input_rows_count, 0)); + auto nested_to_type = remove_nullable(to_type); + auto serde = nested_to_type->get_serde(); if constexpr (Mode == CastModeType::NonStrictMode) { + auto nullable_col_to = create_empty_nullable_column(nested_to_type); // may write nulls to nullable_col_to RETURN_IF_ERROR(serde->from_string_batch(*col_from, *nullable_col_to, {})); + block.get_by_position(result).column = std::move(nullable_col_to); } else if constexpr (Mode == CastModeType::StrictMode) { - // WON'T write nulls to nullable_col_to, just raise errors. null_map is only used to skip invalid rows - RETURN_IF_ERROR(serde->from_string_strict_mode_batch( - *col_from, nullable_col_to->get_nested_column(), {}, null_map)); + MutableColumnPtr column_to = nested_to_type->create_column(); + // WON'T write nulls to the result column, just raise errors. null_map is only used to skip invalid rows + RETURN_IF_ERROR( + serde->from_string_strict_mode_batch(*col_from, *column_to, {}, null_map)); + block.get_by_position(result).column = std::move(column_to); } else { return Status::InternalError("Unsupported cast mode"); } - block.get_by_position(result).column = std::move(nullable_col_to); return Status::OK(); } }; @@ -103,4 +102,4 @@ class CastToImpl : public CastToBase { return Status::OK(); } }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/exprs/function/cast/cast_to_string.h b/be/src/exprs/function/cast/cast_to_string.h index 15879104a21076..c38b8ec90f920a 100644 --- a/be/src/exprs/function/cast/cast_to_string.h +++ b/be/src/exprs/function/cast/cast_to_string.h @@ -567,7 +567,14 @@ class CastToStringFunction { auto time_zone = cctz::utc_time_zone(); options.timezone = (context && context->state()) ? &context->state()->timezone_obj() : &time_zone; - type.get_serde()->to_string_batch(col_from, *col_to, options); + ColumnPtr limited_col; + const IColumn* col_to_serialize = &col_from; + if (col_from.size() != input_rows_count) { + DORIS_CHECK(col_from.size() >= input_rows_count); + limited_col = col_from.cut(0, input_rows_count); + col_to_serialize = limited_col.get(); + } + type.get_serde()->to_string_batch(*col_to_serialize, *col_to, options); block.replace_by_position(result, std::move(col_to)); return Status::OK(); diff --git a/be/src/exprs/function/cast/cast_to_variant.h b/be/src/exprs/function/cast/cast_to_variant.h index acc8ed9e7f6492..3aebb66212a44a 100644 --- a/be/src/exprs/function/cast/cast_to_variant.h +++ b/be/src/exprs/function/cast/cast_to_variant.h @@ -29,19 +29,34 @@ inline Status cast_from_variant_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count, const NullMap::value_type* null_map, const DataTypePtr& data_type_to) { - const auto& col_with_type_and_name = block.get_by_position(arguments[0]); - const auto& col_from = col_with_type_and_name.column; + auto& col_with_type_and_name = block.get_by_position(arguments[0]); + auto& col_from = col_with_type_and_name.column; const IColumn* variant_column = col_from.get(); if (const auto* nullable = check_and_get_column(*variant_column)) { variant_column = &nullable->get_nested_column(); } - const auto& variant = assert_cast(*variant_column); - ColumnPtr col_to = data_type_to->create_column(); - if (!variant.is_finalized()) { + if (!assert_cast(*variant_column).is_finalized()) { // ColumnVariant should be finalized before parsing, finalize maybe modify original column structure - variant.assume_mutable()->finalize(); + auto mutable_column = IColumn::mutate(std::move(col_with_type_and_name.column)); + if (auto* nullable = check_and_get_column(*mutable_column)) { + const auto& const_nullable = *nullable; + auto nested_column = IColumn::mutate(const_nullable.get_nested_column_ptr()); + assert_cast(*nested_column).finalize(); + ColumnPtr nested_column_ptr = std::move(nested_column); + nullable->change_nested_column(nested_column_ptr); + } else { + assert_cast(*mutable_column).finalize(); + } + col_with_type_and_name.column = std::move(mutable_column); + } + + variant_column = col_with_type_and_name.column.get(); + if (const auto* nullable = check_and_get_column(*variant_column)) { + variant_column = &nullable->get_nested_column(); } + const auto& variant = assert_cast(*variant_column); + ColumnPtr col_to = data_type_to->create_column(); // It's important to convert as many elements as possible in this context. For instance, // if the root of this variant column is a number column, converting it to a number column @@ -152,7 +167,7 @@ struct CastToVariant { auto variant = ColumnVariant::create( variant_type ? variant_type->variant_max_subcolumns_count() : 0, variant_type ? variant_type->enable_doc_mode() : false); - variant->create_root(from_type, col_from->assume_mutable()); + variant->create_root(from_type, IColumn::mutate(col_from)); block.replace_by_position(result, std::move(variant)); return Status::OK(); } @@ -186,4 +201,4 @@ WrapperType create_cast_from_variant_wrapper(const DataTypeVariant& from_type, }; } -} // namespace doris::CastWrapper \ No newline at end of file +} // namespace doris::CastWrapper diff --git a/be/src/exprs/function/function.cpp b/be/src/exprs/function/function.cpp index f1e44bb4c2ac40..590eb63829200d 100644 --- a/be/src/exprs/function/function.cpp +++ b/be/src/exprs/function/function.cpp @@ -67,8 +67,7 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum } if (!mutable_result_null_map_column) { - mutable_result_null_map_column = - std::move(result_null_map_column)->assume_mutable(); + mutable_result_null_map_column = (*std::move(result_null_map_column)).mutate(); } NullMap& result_null_map = @@ -80,6 +79,12 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum } } + // Commit merged null map back: result_null_map_column was moved into + // mutable_result_null_map_column when merging 2+ nullable args with nulls. + if (mutable_result_null_map_column) { + result_null_map_column = std::move(mutable_result_null_map_column); + } + if (!result_null_map_column) { if (is_column_const(*src)) { return ColumnConst::create( diff --git a/be/src/exprs/function/function_bitmap.cpp b/be/src/exprs/function/function_bitmap.cpp index 3f2c388efb85bf..35341f297640b0 100644 --- a/be/src/exprs/function/function_bitmap.cpp +++ b/be/src/exprs/function/function_bitmap.cpp @@ -681,11 +681,11 @@ void update_bitmap_op_count(int64_t* __restrict count, const NullMap& null_map) ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, const ColumnNumbers& args, uint32_t result, size_t input_rows_count) { - auto* nullable = assert_cast(src.get()); - ColumnPtr src_not_nullable = nullable->get_nested_column_ptr(); - MutableColumnPtr src_not_nullable_mutable = (*std::move(src_not_nullable)).assume_mutable(); + MutableColumnPtr mutable_src = IColumn::mutate(std::move(src)); + auto* nullable = assert_cast(mutable_src.get()); + auto* src_not_nullable_mutable = &nullable->get_nested_column(); auto* __restrict count_data = - assert_cast(src_not_nullable_mutable.get())->get_data().data(); + assert_cast(src_not_nullable_mutable)->get_data().data(); for (const auto& arg : args) { const ColumnWithTypeAndName& elem = block.get_by_position(arg); @@ -712,7 +712,7 @@ ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block, } } - return src; + return mutable_src; } Status execute_bitmap_op_count_null_to_zero( diff --git a/be/src/exprs/function/function_variant_element.cpp b/be/src/exprs/function/function_variant_element.cpp index e407e595ffd461..cef269c613519f 100644 --- a/be/src/exprs/function/function_variant_element.cpp +++ b/be/src/exprs/function/function_variant_element.cpp @@ -148,8 +148,7 @@ class FunctionVariantElement : public IFunction { const auto& src_sparse_data_values = assert_cast(sparse_data_map.get_values()); auto& sparse_data_offsets = - assert_cast(*target_ptr->get_sparse_column()->assume_mutable()) - .get_offsets(); + assert_cast(target_ptr->get_sparse_column_mutable()).get_offsets(); auto [sparse_data_paths, sparse_data_values] = target_ptr->get_sparse_data_paths_and_values(); StringRef prefix_ref(path.get_path()); @@ -190,7 +189,7 @@ class FunctionVariantElement : public IFunction { sparse_data_offsets.push_back(sparse_data_paths->size()); } target_ptr->get_subcolumns().create_root(root); - target_ptr->get_doc_value_column()->assume_mutable()->resize(src_ptr->size()); + target_ptr->get_doc_value_column_mutable().resize(src_ptr->size()); target_ptr->set_num_rows(src_ptr->size()); } @@ -211,9 +210,9 @@ class FunctionVariantElement : public IFunction { // Ordinary Variant extraction keeps the selected prefix in sparse data, matching the // source branch behavior. Only doc-mode columns keep extracted data in doc_value. auto& extracted_offsets = - assert_cast(*(write_to_doc_value ? target_ptr->get_doc_value_column() - : target_ptr->get_sparse_column()) - ->assume_mutable()) + assert_cast(write_to_doc_value + ? target_ptr->get_doc_value_column_mutable() + : target_ptr->get_sparse_column_mutable()) .get_offsets(); auto [extracted_paths, extracted_values] = write_to_doc_value ? target_ptr->get_doc_value_data_paths_and_values() @@ -251,9 +250,9 @@ class FunctionVariantElement : public IFunction { } target_ptr->get_subcolumns().create_root(root); if (write_to_doc_value) { - target_ptr->get_sparse_column()->assume_mutable()->resize(src_ptr->size()); + target_ptr->get_sparse_column_mutable().resize(src_ptr->size()); } else { - target_ptr->get_doc_value_column()->assume_mutable()->resize(src_ptr->size()); + target_ptr->get_doc_value_column_mutable().resize(src_ptr->size()); } target_ptr->set_num_rows(src_ptr->size()); } @@ -323,7 +322,7 @@ class FunctionVariantElement : public IFunction { if (new_subcolumns.empty() && !nodes.empty()) { CHECK_EQ(nodes.size(), 1); new_subcolumns.create_root(ColumnVariant::Subcolumn { - nodes[0]->data.get_finalized_column_ptr()->assume_mutable(), + IColumn::mutate(nodes[0]->data.get_finalized_column_ptr()), nodes[0]->data.get_least_common_type(), true, true}); auto container = ColumnVariant::create(src.max_subcolumns_count(), src.enable_doc_mode(), @@ -349,12 +348,12 @@ class FunctionVariantElement : public IFunction { } result_col->insert_range_from(*container, 0, container->size()); } - *result = result_col->get_ptr(); // ColumnVariant should be finalized before parsing, finalize maybe modify original column structure - (*result)->assume_mutable()->finalize(); + result_col->finalize(); VLOG_DEBUG << "dump new object " << static_cast(result_col.get())->debug_string() << ", path " << path.get_path(); + *result = std::move(result_col); return Status::OK(); } } diff --git a/be/src/exprs/table_function/python_udtf_function.cpp b/be/src/exprs/table_function/python_udtf_function.cpp index f39ceafd98208c..4bcd8ae46364c5 100644 --- a/be/src/exprs/table_function/python_udtf_function.cpp +++ b/be/src/exprs/table_function/python_udtf_function.cpp @@ -260,8 +260,7 @@ Status PythonUDTFFunction::_convert_list_array_to_array_column( if (_return_type->is_nullable()) { nullable_col = assert_cast(array_col_ptr.get()); - array_col = assert_cast( - nullable_col->get_nested_column_ptr()->assume_mutable().get()); + array_col = assert_cast(&nullable_col->get_nested_column()); } else { array_col = assert_cast(array_col_ptr.get()); } @@ -274,8 +273,8 @@ Status PythonUDTFFunction::_convert_list_array_to_array_column( // Use read_column_from_arrow for optimized conversion // This directly converts Arrow ListArray to Doris ColumnArray // No struct unwrapping needed - Python server sends the correct format! - RETURN_IF_ERROR(array_serde->read_column_from_arrow( - array_col->assume_mutable_ref(), list_array.get(), 0, num_input_rows, _timezone_obj)); + RETURN_IF_ERROR(array_serde->read_column_from_arrow(*array_col, list_array.get(), 0, + num_input_rows, _timezone_obj)); // Handle nullable wrapper: all array elements are non-null // (empty arrays [] are non-null, different from NULL) diff --git a/be/src/exprs/table_function/udf_table_function.cpp b/be/src/exprs/table_function/udf_table_function.cpp index 4b6037f7ab1771..414766ef9157c3 100644 --- a/be/src/exprs/table_function/udf_table_function.cpp +++ b/be/src/exprs/table_function/udf_table_function.cpp @@ -123,10 +123,12 @@ Status UDFTableFunction::process_init(Block* block, RuntimeState* state) { .with_arg(output_map) .call(&output_address)); RETURN_IF_ERROR(JniDataBridge::fill_block(block, {_result_column_idx}, output_address)); + _array_result_column = + IColumn::mutate(std::move(block->get_by_position(_result_column_idx).column)); block->erase(_result_column_idx); if (!extract_column_array_info(*_array_result_column, _array_column_detail)) { return Status::NotSupported("column type {} not supported now", - block->get_by_position(_result_column_idx).column->get_name()); + _array_result_column->get_name()); } return Status::OK(); } diff --git a/be/src/exprs/table_function/vexplode.cpp b/be/src/exprs/table_function/vexplode.cpp index 680e5ccff66ed1..0b8556229a4ee3 100644 --- a/be/src/exprs/table_function/vexplode.cpp +++ b/be/src/exprs/table_function/vexplode.cpp @@ -45,7 +45,8 @@ Status VExplodeTableFunction::_process_init_variant(Block* block, int value_colu // explode variant array auto column_without_nullable = remove_nullable(block->get_by_position(value_column_idx).column); auto column = column_without_nullable->convert_to_full_column_if_const(); - auto& variant_column = assert_cast(*(column->assume_mutable())); + auto variant_column_ptr = IColumn::mutate(std::move(column)); + auto& variant_column = assert_cast(*variant_column_ptr); variant_column.finalize(); _detail.output_as_variant = true; _detail.variant_enable_doc_mode = variant_column.enable_doc_mode(); @@ -62,9 +63,10 @@ Status VExplodeTableFunction::_process_init_variant(Block* block, int value_colu _detail.nested_type = array_type->get_nested_type(); } else { // null root, use nothing type - _array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), - ColumnUInt8::create(0)); - _array_column->assume_mutable()->insert_many_defaults(variant_column.size()); + auto array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), + ColumnUInt8::create(0)); + array_column->insert_many_defaults(variant_column.size()); + _array_column = std::move(array_column); _detail.nested_type = std::make_shared(); } return Status::OK(); diff --git a/be/src/exprs/table_function/vexplode_v2.cpp b/be/src/exprs/table_function/vexplode_v2.cpp index b21802690a84b8..62a4ab1d66ae92 100644 --- a/be/src/exprs/table_function/vexplode_v2.cpp +++ b/be/src/exprs/table_function/vexplode_v2.cpp @@ -51,7 +51,8 @@ Status VExplodeV2TableFunction::_process_init_variant(Block* block, int value_co // explode variant array auto column_without_nullable = remove_nullable(block->get_by_position(value_column_idx).column); auto column = column_without_nullable->convert_to_full_column_if_const(); - auto& variant_column = assert_cast(*(column->assume_mutable())); + auto variant_column_ptr = IColumn::mutate(std::move(column)); + auto& variant_column = assert_cast(*variant_column_ptr); variant_column.finalize(); _multi_detail[children_column_idx].output_as_variant = true; _multi_detail[children_column_idx].variant_enable_doc_mode = variant_column.enable_doc_mode(); @@ -68,10 +69,10 @@ Status VExplodeV2TableFunction::_process_init_variant(Block* block, int value_co _multi_detail[children_column_idx].nested_type = array_type->get_nested_type(); } else { // null root, use nothing type - _array_columns[children_column_idx] = ColumnNullable::create( - ColumnArray::create(ColumnNothing::create(0)), ColumnUInt8::create(0)); - _array_columns[children_column_idx]->assume_mutable()->insert_many_defaults( - variant_column.size()); + auto array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)), + ColumnUInt8::create(0)); + array_column->insert_many_defaults(variant_column.size()); + _array_columns[children_column_idx] = std::move(array_column); _multi_detail[children_column_idx].nested_type = std::make_shared(); } return Status::OK(); diff --git a/be/src/exprs/vcase_expr.h b/be/src/exprs/vcase_expr.h index 382193276cad29..b8e274be82a7bb 100644 --- a/be/src/exprs/vcase_expr.h +++ b/be/src/exprs/vcase_expr.h @@ -217,9 +217,9 @@ class VCaseExpr final : public VExpr { if (!then_columns[i]) { continue; } - auto* __restrict column_raw_data = - assert_cast( - then_columns[i]->assume_mutable().get()) + const auto* __restrict column_raw_data = + assert_cast( + then_columns[i].get()) ->get_data() .data(); if constexpr (std::is_same_v || diff --git a/be/src/exprs/vcompound_pred.h b/be/src/exprs/vcompound_pred.h index 9f65060eba9b0b..e82fa04ba8e5a3 100644 --- a/be/src/exprs/vcompound_pred.h +++ b/be/src/exprs/vcompound_pred.h @@ -180,8 +180,8 @@ class VCompoundPred : public VectorizedFnCall { } ColumnPtr rhs_column = nullptr; - uint8_t* __restrict rhs_data_column = nullptr; - uint8_t* __restrict rhs_null_map = nullptr; + const uint8_t* __restrict rhs_data_column = nullptr; + const uint8_t* __restrict rhs_null_map = nullptr; bool rhs_is_nullable = false; bool rhs_all_true = false; bool rhs_all_false = false; @@ -216,31 +216,36 @@ class VCompoundPred : public VectorizedFnCall { }; auto create_null_map_column = [&](ColumnPtr& null_map_column, - uint8_t* __restrict null_map_data) { + const uint8_t* __restrict null_map_data) { if (null_map_data == nullptr) { null_map_column = ColumnUInt8::create(size, 0); - null_map_data = assert_cast(null_map_column->assume_mutable().get()) - ->get_data() - .data(); + null_map_data = + assert_cast(null_map_column.get())->get_data().data(); } return null_map_data; }; auto vector_vector = [&]() { + MutableColumnPtr mutable_result_column; + uint8_t* __restrict result_data_column = nullptr; + const uint8_t* __restrict other_data_column = rhs_data_column; if (lhs_column->use_count() == 1) { - result_column = lhs_column; + mutable_result_column = IColumn::mutate(std::move(lhs_column)); + result_data_column = + assert_cast(mutable_result_column.get())->get_data().data(); } else if (rhs_column->use_count() == 1) { - result_column = rhs_column; - auto tmp_column = rhs_data_column; - rhs_data_column = lhs_data_column; - lhs_data_column = tmp_column; + mutable_result_column = IColumn::mutate(std::move(rhs_column)); + result_data_column = + assert_cast(mutable_result_column.get())->get_data().data(); + other_data_column = lhs_data_column; } else { - auto col_res = lhs_column->clone_resized(size); - lhs_data_column = assert_cast(col_res.get())->get_data().data(); - result_column = std::move(col_res); + mutable_result_column = lhs_column->clone_resized(size); + result_data_column = + assert_cast(mutable_result_column.get())->get_data().data(); } - do_not_null_pred(lhs_data_column, rhs_data_column, size); + do_not_null_pred(result_data_column, other_data_column, size); + result_column = std::move(mutable_result_column); }; auto vector_vector_null = [&]() { auto col_res = ColumnUInt8::create(size); @@ -347,7 +352,8 @@ class VCompoundPred : public VectorizedFnCall { } template - void static do_not_null_pred(uint8_t* __restrict lhs, uint8_t* __restrict rhs, size_t size) { + void static do_not_null_pred(uint8_t* __restrict lhs, const uint8_t* __restrict rhs, + size_t size) { #ifdef NDEBUG #if defined(__clang__) #pragma clang loop vectorize(enable) @@ -365,8 +371,8 @@ class VCompoundPred : public VectorizedFnCall { } template - void static do_null_pred(uint8_t* __restrict lhs_data, uint8_t* __restrict lhs_null, - uint8_t* __restrict rhs_data, uint8_t* __restrict rhs_null, + void static do_null_pred(const uint8_t* __restrict lhs_data, const uint8_t* __restrict lhs_null, + const uint8_t* __restrict rhs_data, const uint8_t* __restrict rhs_null, uint8_t* __restrict res_data, uint8_t* __restrict res_null, size_t size) { #ifdef NDEBUG @@ -392,22 +398,21 @@ class VCompoundPred : public VectorizedFnCall { [](const VExprSPtr& arg) -> bool { return arg->is_constant(); }); } - std::pair _get_raw_data_and_null_map(ColumnPtr column, - bool has_nullable_column) const { + std::pair _get_raw_data_and_null_map( + const ColumnPtr& column, bool has_nullable_column) const { if (has_nullable_column) { - auto* nullable_column = assert_cast(column->assume_mutable().get()); + const auto* nullable_column = assert_cast(column.get()); auto* data_column = - assert_cast(nullable_column->get_nested_column_ptr().get()) - ->get_data() - .data(); - auto* null_map = - assert_cast(nullable_column->get_null_map_column_ptr().get()) + assert_cast(nullable_column->get_nested_column_ptr().get()) ->get_data() .data(); + auto* null_map = assert_cast( + nullable_column->get_null_map_column_ptr().get()) + ->get_data() + .data(); return std::make_pair(data_column, null_map); } else { - auto* data_column = - assert_cast(column->assume_mutable().get())->get_data().data(); + auto* data_column = assert_cast(column.get())->get_data().data(); return std::make_pair(data_column, nullptr); } } diff --git a/be/src/format/arrow/arrow_stream_reader.cpp b/be/src/format/arrow/arrow_stream_reader.cpp index b91608ee3fafa1..7d496d803a6248 100644 --- a/be/src/format/arrow/arrow_stream_reader.cpp +++ b/be/src/format/arrow/arrow_stream_reader.cpp @@ -94,7 +94,8 @@ Status ArrowStreamReader::_do_get_next_block(Block* block, size_t* read_rows, bo std::move(res_reader).ValueUnsafe(); // convert arrow batch to block - auto columns = block->mutate_columns(); + auto columns_guard = block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); size_t batch_size = out_batches.size(); for (size_t i = 0; i < batch_size; i++) { arrow::RecordBatch& batch = *out_batches[i]; @@ -105,15 +106,17 @@ Status ArrowStreamReader::_do_get_next_block(Block* block, size_t* read_rows, bo std::string column_name = batch.schema()->field(c)->name(); try { - const ColumnWithTypeAndName& column_with_name = block->safe_get_by_position(c); + const auto& column_name_in_block = columns_guard.get_name_by_position(c); - if (column_with_name.name != column_name) { + if (column_name_in_block != column_name) { return Status::InternalError("Column name mismatch: expected {}, got {}", - column_with_name.name, column_name); + column_name_in_block, column_name); } - RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), column, 0, num_rows, _ctzz)); + RETURN_IF_ERROR( + columns_guard.get_datatype_by_position(c) + ->get_serde() + ->read_column_from_arrow(*columns[c], column, 0, num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError("Failed to convert from arrow to block: {}", e.what()); } diff --git a/be/src/format/column_type_convert.cpp b/be/src/format/column_type_convert.cpp index cd71ffb5babb33..08fe5c8a4ce794 100644 --- a/be/src/format/column_type_convert.cpp +++ b/be/src/format/column_type_convert.cpp @@ -18,6 +18,7 @@ #include "format/column_type_convert.h" #include "common/cast_set.h" +#include "core/column/column_nullable.h" #include "core/data_type/define_primitive_type.h" namespace doris::converter { @@ -111,19 +112,12 @@ ColumnPtr ColumnTypeConverter::get_column(const DataTypePtr& src_type, ColumnPtr _cached_src_type = dst_type->is_nullable() ? get_data_type_with_default_argument(make_nullable(src_type)) : get_data_type_with_default_argument(remove_nullable(src_type)); - _cached_src_column = remove_nullable(_cached_src_type)->create_column(); + _cached_src_column = _cached_src_type->create_column(); } // remove the old cached data - _cached_src_column->assume_mutable()->clear(); - - if (dst_type->is_nullable()) { - // In order to share null map between parquet converted src column and dst column to avoid copying. It is very tricky that will - // call mutable function `doris_nullable_column->get_null_map_column_ptr()` which will set `_need_update_has_null = true`. - // Because some operations such as agg will call `has_null()` to set `_need_update_has_null = false`. - auto* doris_nullable_column = static_cast(dst_column.get()); - return ColumnNullable::create(_cached_src_column, - doris_nullable_column->get_null_map_column_ptr()); - } + auto cached_src_column = IColumn::mutate(std::move(_cached_src_column)); + cached_src_column->clear(); + _cached_src_column = std::move(cached_src_column); return _cached_src_column; } diff --git a/be/src/format/column_type_convert.h b/be/src/format/column_type_convert.h index 04003c098f0d30..700fdd2ac1cff4 100644 --- a/be/src/format/column_type_convert.h +++ b/be/src/format/column_type_convert.h @@ -44,6 +44,20 @@ namespace doris::converter { enum FileFormat { COMMON, ORC, PARQUET }; +// Helper: get the inner (non-nullable) mutable column from an exclusively-owned dst_col. +// - For non-nullable dst_col: returns a raw pointer to the column itself. +// - For nullable dst_col: returns a raw pointer to the nested (non-null) column. +// Must only be called when dst_col has exclusive ownership (use_count == 1). +// Returns IColumn* (raw pointer) to avoid creating a second owning MutableColumnPtr, +// which would violate COW invariant (use_count > 1). +inline IColumn* get_mutable_inner_col(MutableColumnPtr& dst_col) { + if (dst_col->is_nullable()) { + return static_cast(dst_col.get())->get_nested_column_ptr().get(); + } else { + return dst_col.get(); + } +} + template constexpr bool is_decimal_type() { return type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 || @@ -109,8 +123,9 @@ class ColumnTypeConverter { /** * Get the column to read data from file with the type from file meta data. * If the converter is not consistent, the returned column is `_cached_src_column`. - * For performance reasons, the null map of `_cached_src_column` is a reference from - * the null map of `dst_column`, so there is no need to convert null map in `convert()`. + * For nullable destination columns, `_cached_src_column` is also nullable and owns its + * temporary null map. The reader fills this source null map first, then copies only the + * newly appended null slice back to the destination column before value conversion. * * According to the hive standard, if certain values fail to be converted(eg. string `row1` to int value), * these values are replaced by nulls. @@ -165,13 +180,13 @@ class IntegerToIntegerConverter : public ColumnTypeConverter { using DstColumnType = typename PrimitiveTypeTraits::ColumnType; using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { if constexpr (sizeof(DstCppType) < sizeof(SrcCppType)) { SrcCppType src_value = src_data[i]; @@ -212,7 +227,7 @@ class NumericToFloatPointConverter : public ColumnTypeConverter { using DstColumnType = typename PrimitiveTypeTraits::ColumnType; using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -223,7 +238,7 @@ class NumericToFloatPointConverter : public ColumnTypeConverter { auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { SrcCppType src_value = src_data[i]; if constexpr (is_integer_type()) { @@ -248,11 +263,11 @@ class BooleanToStringConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); for (int i = 0; i < rows; ++i) { std::string value = src_data[i] != 0 ? "TRUE" : "FALSE"; string_col.insert_data(value.data(), value.size()); @@ -269,7 +284,7 @@ class NumericToStringConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -279,7 +294,7 @@ class NumericToStringConverter : public ColumnTypeConverter { size_t rows = from_col->size(); size_t start_idx = to_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); for (int i = 0; i < rows; ++i) { if constexpr (SrcPrimitiveType == TYPE_FLOAT || SrcPrimitiveType == TYPE_DOUBLE) { if (fileFormat == FileFormat::ORC && std::isnan(src_data[i])) { @@ -318,11 +333,11 @@ class DecimalToStringConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); for (int i = 0; i < rows; ++i) { std::string value = src_data[i].to_string(_scale); string_col.insert_data(value.data(), value.size()); @@ -339,11 +354,11 @@ class TimeToStringConverter : public ColumnTypeConverter { using SrcCppType = typename PrimitiveTypeTraits::CppType; using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); - auto& string_col = static_cast(*to_col.get()); + auto& string_col = static_cast(*to_col); char buf[50]; for (int i = 0; i < rows; ++i) { int len = (reinterpret_cast(src_data[i])).to_buffer(buf); @@ -571,19 +586,19 @@ class CastStringConverter : public ColumnTypeConverter { } NullMap* null_map = nullptr; - MutableColumnPtr to_col = nullptr; + IColumn* to_col = nullptr; if (dst_col->is_nullable()) { auto* nullable = assert_cast(dst_col.get()); - to_col = nullable->get_nested_column_ptr()->assume_mutable(); + to_col = nullable->get_nested_column_ptr().get(); null_map = &nullable->get_null_map_data(); } else { - to_col = dst_col->assume_mutable(); + to_col = dst_col.get(); } size_t rows = string_col->size(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = assert_cast(to_col.get())->get_data(); + auto& data = assert_cast(to_col)->get_data(); CastParameters params; for (int i = 0; i < rows; ++i) { bool can_cast = false; @@ -628,7 +643,7 @@ class DateTimeToNumericConverter : public ColumnTypeConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -639,7 +654,7 @@ class DateTimeToNumericConverter : public ColumnTypeConverter { const auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { const SrcCppType& src_value = src_data[i]; @@ -680,13 +695,13 @@ class TimeV2Converter : public ColumnTypeConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { const auto& src_value = reinterpret_cast(src_data[i]); auto& dst_value = reinterpret_cast(data[start_idx + i]); @@ -718,7 +733,7 @@ class NumericToDecimalConverter : public ColumnTypeConverter { using DstDorisType = typename PrimitiveTypeTraits::ColumnType::value_type; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -729,7 +744,7 @@ class NumericToDecimalConverter : public ColumnTypeConverter { auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); auto max_result = DataTypeDecimal::get_max_digits_number(_precision); auto multiplier = DataTypeDecimal::get_scale_multiplier(_scale); @@ -804,13 +819,13 @@ class DecimalToNumericConverter : public ColumnTypeConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); NullMap* null_map = nullptr; if (dst_col->is_nullable()) { @@ -889,13 +904,13 @@ class DecimalToDecimalConverter : public ColumnTypeConverter { bool narrow_integral = (_to_precision - _to_scale) < (_from_precision - _from_scale); ColumnPtr from_col = remove_nullable(src_col); - MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + IColumn* to_col = get_mutable_inner_col(dst_col); size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = static_cast(*to_col.get()).get_data(); + auto& data = static_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { SrcNativeType src_value = src_data[i].value; @@ -983,15 +998,15 @@ class VarBinaryConverter : public ColumnTypeConverter { from_col = &assert_cast(*src_col); } - MutableColumnPtr to_col = nullptr; + IColumn* to_col = nullptr; // nullmap flag seems have been handled in upper level if (dst_col->is_nullable()) { const auto* nullable = assert_cast(dst_col.get()); - to_col = nullable->get_nested_column_ptr()->assume_mutable(); + to_col = const_cast(nullable)->get_nested_column_ptr().get(); } else { - to_col = dst_col->assume_mutable(); + to_col = dst_col.get(); } - auto* to_dst_column = assert_cast(to_col.get()); + auto* to_dst_column = assert_cast(to_col); for (size_t i = 0; i < from_col->size(); ++i) { auto string_ref = from_col->get_data_at(i); diff --git a/be/src/format/count_reader.h b/be/src/format/count_reader.h index 4b6e55337b23ac..0c4cd791e8542b 100644 --- a/be/src/format/count_reader.h +++ b/be/src/format/count_reader.h @@ -58,11 +58,11 @@ class CountReader : public GenericReader { auto rows = std::min(_remaining_rows, static_cast(_batch_size)); _remaining_rows -= rows; - auto mutate_columns = block->mutate_columns(); + auto mutable_columns_guard = block->mutate_columns_scoped(); + auto& mutate_columns = mutable_columns_guard.mutable_columns(); for (auto& col : mutate_columns) { col->resize(rows); } - block->set_columns(std::move(mutate_columns)); *read_rows = rows; *eof = (_remaining_rows == 0); diff --git a/be/src/format/csv/csv_reader.cpp b/be/src/format/csv/csv_reader.cpp index 539132c7c9f003..266f569acbe9ae 100644 --- a/be/src/format/csv/csv_reader.cpp +++ b/be/src/format/csv/csv_reader.cpp @@ -65,6 +65,19 @@ enum class FileCachePolicy : uint8_t; namespace doris { +namespace { + +size_t columns_byte_size(const std::vector& columns) { + size_t bytes = 0; + for (const auto& column : columns) { + DCHECK(column.get() != nullptr); + bytes += column->byte_size(); + } + return bytes; +} + +} // namespace + void EncloseCsvTextFieldSplitter::do_split(const Slice& line, std::vector* splitted_values) { const char* data = line.data; const auto& column_sep_positions = _text_line_reader_ctx->column_sep_positions(); @@ -430,14 +443,16 @@ Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) RETURN_IF_ERROR(_validate_line(Slice(ptr, size), &success)); ++rows; } - auto mutate_columns = block->mutate_columns(); + auto mutable_columns_guard = block->mutate_columns_scoped(); + auto& mutate_columns = mutable_columns_guard.mutable_columns(); for (auto& col : mutate_columns) { col->resize(rows); } - block->set_columns(std::move(mutate_columns)); } else { - auto columns = block->mutate_columns(); - while (rows < batch_size && !_line_reader_eof && (block->bytes() < max_block_bytes)) { + auto columns_guard = block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); + while (rows < batch_size && !_line_reader_eof && + (columns_byte_size(columns) < max_block_bytes)) { const uint8_t* ptr = nullptr; size_t size = 0; RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, &_line_reader_eof, _io_ctx)); @@ -457,7 +472,7 @@ Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) } if (size == 0) { if (!_line_reader_eof && _state->is_read_csv_empty_line_as_null()) { - RETURN_IF_ERROR(_fill_empty_line(block, columns, &rows)); + RETURN_IF_ERROR(_fill_empty_line(columns, &rows)); } // Read empty line, continue continue; @@ -467,9 +482,8 @@ Status CsvReader::_do_get_next_block(Block* block, size_t* read_rows, bool* eof) if (!success) { continue; } - RETURN_IF_ERROR(_fill_dest_columns(Slice(ptr, size), block, columns, &rows)); + RETURN_IF_ERROR(_fill_dest_columns(Slice(ptr, size), columns, &rows)); } - block->set_columns(std::move(columns)); } *eof = (rows == 0); @@ -719,8 +733,8 @@ Status CsvReader::_deserialize_one_cell(DataTypeSerDeSPtr serde, IColumn& column return serde->deserialize_one_cell_from_csv(column, slice, _options); } -Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, - std::vector& columns, size_t* rows) { +Status CsvReader::_fill_dest_columns(const Slice& line, std::vector& columns, + size_t* rows) { bool is_success = false; RETURN_IF_ERROR(_line_split_to_values(line, &is_success)); @@ -738,10 +752,7 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, IColumn* col_ptr = columns[i].get(); if (!_is_load) { - // block is a Block*, and get_by_position returns a ColumnPtr, - // which is a const pointer. Therefore, using const_cast is permissible. - col_ptr = const_cast( - block->get_by_position(_file_slot_idx_map[i]).column.get()); + col_ptr = columns[_file_slot_idx_map[i]].get(); } if (_use_nullable_string_opt[i]) { @@ -758,15 +769,11 @@ Status CsvReader::_fill_dest_columns(const Slice& line, Block* block, return Status::OK(); } -Status CsvReader::_fill_empty_line(Block* block, std::vector& columns, - size_t* rows) { +Status CsvReader::_fill_empty_line(std::vector& columns, size_t* rows) { for (int i = 0; i < _file_slot_descs.size(); ++i) { IColumn* col_ptr = columns[i].get(); if (!_is_load) { - // block is a Block*, and get_by_position returns a ColumnPtr, - // which is a const pointer. Therefore, using const_cast is permissible. - col_ptr = const_cast( - block->get_by_position(_file_slot_idx_map[i]).column.get()); + col_ptr = columns[_file_slot_idx_map[i]].get(); } auto& null_column = assert_cast(*col_ptr); null_column.insert_data(nullptr, 0); diff --git a/be/src/format/csv/csv_reader.h b/be/src/format/csv/csv_reader.h index 077f089e5e9a18..f619ce4d4a85e5 100644 --- a/be/src/format/csv/csv_reader.h +++ b/be/src/format/csv/csv_reader.h @@ -232,9 +232,9 @@ class CsvReader : public TableFormatReader { private: Status _create_decompressor(); Status _create_file_reader(bool need_schema); - Status _fill_dest_columns(const Slice& line, Block* block, - std::vector& columns, size_t* rows); - Status _fill_empty_line(Block* block, std::vector& columns, size_t* rows); + Status _fill_dest_columns(const Slice& line, std::vector& columns, + size_t* rows); + Status _fill_empty_line(std::vector& columns, size_t* rows); Status _line_split_to_values(const Slice& line, bool* success); void _split_line(const Slice& line); void _init_system_properties(); diff --git a/be/src/format/jni/jni_data_bridge.cpp b/be/src/format/jni/jni_data_bridge.cpp index 7f66e3cefc5cf5..4d42574075a662 100644 --- a/be/src/format/jni/jni_data_bridge.cpp +++ b/be/src/format/jni/jni_data_bridge.cpp @@ -105,24 +105,27 @@ Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_co // org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0 return Status::InternalError("Unsupported type {} in java side", data_type->get_name()); } + auto mutable_doris_column = IColumn::mutate(std::move(doris_column)); MutableColumnPtr data_column; - if (doris_column->is_nullable()) { - auto* nullable_column = - reinterpret_cast(doris_column->assume_mutable().get()); + if (mutable_doris_column->is_nullable()) { + auto* nullable_column = assert_cast(mutable_doris_column.get()); data_column = nullable_column->get_nested_column_ptr(); NullMap& null_map = nullable_column->get_null_map_data(); size_t origin_size = null_map.size(); null_map.resize(origin_size + num_rows); memcpy(null_map.data() + origin_size, static_cast(null_map_ptr), num_rows); } else { - data_column = doris_column->assume_mutable(); + data_column = mutable_doris_column->get_ptr(); } // Date and DateTime are deprecated and not supported. + Status status = Status::OK(); switch (logical_type) { -#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ - case TYPE_INDEX: \ - return _fill_fixed_length_column( \ - data_column, reinterpret_cast(address.next_meta_as_ptr()), num_rows); +#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ + case TYPE_INDEX: { \ + auto* data = reinterpret_cast(address.next_meta_as_ptr()); \ + status = _fill_fixed_length_column(data_column, data, num_rows); \ + break; \ + } FOR_FIXED_LENGTH_TYPES(DISPATCH) #undef DISPATCH case PrimitiveType::TYPE_STRING: @@ -130,19 +133,27 @@ Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_co case PrimitiveType::TYPE_CHAR: [[fallthrough]]; case PrimitiveType::TYPE_VARCHAR: - return _fill_string_column(address, data_column, num_rows); + status = _fill_string_column(address, data_column, num_rows); + break; case PrimitiveType::TYPE_ARRAY: - return _fill_array_column(address, data_column, data_type, num_rows); + status = _fill_array_column(address, data_column, data_type, num_rows); + break; case PrimitiveType::TYPE_MAP: - return _fill_map_column(address, data_column, data_type, num_rows); + status = _fill_map_column(address, data_column, data_type, num_rows); + break; case PrimitiveType::TYPE_STRUCT: - return _fill_struct_column(address, data_column, data_type, num_rows); + status = _fill_struct_column(address, data_column, data_type, num_rows); + break; case PrimitiveType::TYPE_VARBINARY: - return _fill_varbinary_column(address, data_column, num_rows); + status = _fill_varbinary_column(address, data_column, num_rows); + break; default: - return Status::InvalidArgument("Unsupported type {} in jni scanner", data_type->get_name()); + status = Status::InvalidArgument("Unsupported type {} in jni scanner", + data_type->get_name()); + break; } - return Status::OK(); + doris_column = std::move(mutable_doris_column); + return status; } Status JniDataBridge::_fill_varbinary_column(TableMetaAddress& address, diff --git a/be/src/format/json/new_json_reader.cpp b/be/src/format/json/new_json_reader.cpp index da141437fcf200..90a4bd65b22813 100644 --- a/be/src/format/json/new_json_reader.cpp +++ b/be/src/format/json/new_json_reader.cpp @@ -452,17 +452,38 @@ Status NewJsonReader::_get_range_params() { return Status::OK(); } -static Status ignore_malformed_json_append_null(Block& block) { - for (auto& column : block.get_columns()) { - if (!column->is_nullable()) [[unlikely]] { +Status json_reader_detail::append_null_for_malformed_json(Block& block) { + for (int i = 0; i < block.columns(); ++i) { + auto& column_with_type = block.get_by_position(i); + if (!column_with_type.column->is_nullable()) [[unlikely]] { return Status::DataQualityError("malformed json, but the column `{}` is not nullable.", - column->get_name()); + column_with_type.column->get_name()); } - static_cast(column->assume_mutable().get())->insert_default(); + auto column = IColumn::mutate(std::move(column_with_type.column)); + assert_cast(column.get())->insert_default(); + column_with_type.column = std::move(column); } return Status::OK(); } +void json_reader_detail::truncate_block_to_rows(Block& block, size_t num_rows) { + for (int i = 0; i < block.columns(); ++i) { + auto& column_with_type = block.get_by_position(i); + auto column = IColumn::mutate(std::move(column_with_type.column)); + if (column->size() > num_rows) { + column->pop_back(column->size() - num_rows); + } + column_with_type.column = std::move(column); + } +} + +void json_reader_detail::pop_back_last_inserted_value(Block& block, size_t column_index) { + auto& column = block.get_by_position(column_index).column; + auto mutable_column = IColumn::mutate(std::move(column)); + mutable_column->pop_back(1); + column = std::move(mutable_column); +} + Status NewJsonReader::_open_file_reader(bool need_schema) { int64_t start_offset = _range.start_offset; if (start_offset != 0) { @@ -678,12 +699,7 @@ Status NewJsonReader::_handle_simdjson_error(simdjson::simdjson_error& error, Bl error.what()); _counter->num_rows_filtered++; // Before continuing to process other rows, we need to first clean the fail parsed row. - for (int i = 0; i < block.columns(); ++i) { - auto column = block.get_by_position(i).column->assume_mutable(); - if (column->size() > num_rows) { - column->pop_back(column->size() - num_rows); - } - } + json_reader_detail::truncate_block_to_rows(block, num_rows); RETURN_IF_ERROR(_state->append_error_msg_to_file( [&]() -> std::string { @@ -714,7 +730,7 @@ Status NewJsonReader::_simdjson_handle_simple_json(RuntimeState* /*state*/, Bloc if (_is_load) { return Status::OK(); } else if (_openx_json_ignore_malformed) { - RETURN_IF_ERROR(ignore_malformed_json_append_null(block)); + RETURN_IF_ERROR(json_reader_detail::append_null_for_malformed_json(block)); return Status::OK(); } } @@ -934,12 +950,7 @@ Status NewJsonReader::_simdjson_handle_nested_complex_json( if (!st.ok()) { RETURN_IF_ERROR(_append_error_msg(nullptr, st.to_string(), "", nullptr)); // Before continuing to process other rows, we need to first clean the fail parsed row. - for (int i = 0; i < block.columns(); ++i) { - auto column = block.get_by_position(i).column->assume_mutable(); - if (column->size() > num_rows) { - column->pop_back(column->size() - num_rows); - } - } + json_reader_detail::truncate_block_to_rows(block, num_rows); continue; } if (!valid) { @@ -1009,7 +1020,7 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val if (_is_hive_table) { //Since value can only be traversed once, // we can only insert the original value first, then delete it, and then reinsert the new value - block.get_by_position(column_index).column->assume_mutable()->pop_back(1); + json_reader_detail::pop_back_last_inserted_value(block, column_index); } else { continue; } @@ -1075,14 +1086,7 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val "partial update, missing key column: {}", slot_desc->col_name(), valid)); // remove this line in block - for (size_t index = 0; index < block.columns(); ++index) { - auto column = block.get_by_position(index).column->assume_mutable(); - if (column->size() != cur_row_count) { - DCHECK(column->size() == cur_row_count + 1); - column->pop_back(1); - DCHECK(column->size() == cur_row_count); - } - } + json_reader_detail::truncate_block_to_rows(block, cur_row_count); return Status::OK(); } _set_skip_bitmap_mark(slot_desc, column_ptr, block, cur_row_count, valid); @@ -1542,10 +1546,8 @@ Status NewJsonReader::_simdjson_write_columns_by_jsonpath( // there is no valid value in json line but has filled with default value before // so remove this line in block std::string col_names; - for (int i = 0; i < block.columns(); ++i) { - auto column = block.get_by_position(i).column->assume_mutable(); - column->pop_back(1); - } + DCHECK(block.rows() > 0); + json_reader_detail::truncate_block_to_rows(block, block.rows() - 1); for (auto* slot_desc : slot_descs) { col_names.append(slot_desc->col_name() + ", "); } diff --git a/be/src/format/json/new_json_reader.h b/be/src/format/json/new_json_reader.h index e74607a0e6de56..b975433c34f0f8 100644 --- a/be/src/format/json/new_json_reader.h +++ b/be/src/format/json/new_json_reader.h @@ -62,6 +62,12 @@ struct ScannerCounter; class Block; class IColumn; +namespace json_reader_detail { +Status append_null_for_malformed_json(Block& block); +void truncate_block_to_rows(Block& block, size_t num_rows); +void pop_back_last_inserted_value(Block& block, size_t column_index); +} // namespace json_reader_detail + /// JSON-specific initialization context. /// Extends ReaderInitContext with default value context (unique to JSON reader). struct JsonInitContext final : public ReaderInitContext { diff --git a/be/src/format/lance/lance_rust_reader.cpp b/be/src/format/lance/lance_rust_reader.cpp index 166bbd52dcc519..092ce7211e90e5 100644 --- a/be/src/format/lance/lance_rust_reader.cpp +++ b/be/src/format/lance/lance_rust_reader.cpp @@ -230,6 +230,8 @@ Status LanceRustReader::_do_get_next_block(Block* block, size_t* read_rows, bool const auto num_columns = record_batch->num_columns(); // Convert Arrow columns to Doris Block columns (same pattern as PaimonCppReader) + auto columns_guard = block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); for (int c = 0; c < num_columns; ++c) { const auto& field = record_batch->schema()->field(c); @@ -238,11 +240,13 @@ Status LanceRustReader::_do_get_next_block(Block* block, size_t* read_rows, bool continue; } - const ColumnWithTypeAndName& column_with_name = block->get_by_position(it->second); + const auto block_pos = it->second; try { - RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), record_batch->column(c).get(), 0, - num_rows, _ctzz)); + RETURN_IF_ERROR(columns_guard.get_datatype_by_position(block_pos) + ->get_serde() + ->read_column_from_arrow(*columns[block_pos], + record_batch->column(c).get(), 0, + num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError("Failed to convert Lance arrow to block: {}", e.what()); } diff --git a/be/src/format/orc/vorc_reader.cpp b/be/src/format/orc/vorc_reader.cpp index 06ffe6302dac60..70c4d4affaf44e 100644 --- a/be/src/format/orc/vorc_reader.cpp +++ b/be/src/format/orc/vorc_reader.cpp @@ -116,6 +116,40 @@ namespace doris { // TODO: we need to determine it by test. static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE = std::numeric_limits::max(); static constexpr char EMPTY_STRING_FOR_OVERFLOW[ColumnString::MAX_STRINGS_OVERFLOW_SIZE] = ""; + +static void fill_orc_null_map(ColumnNullable* nullable_column, const orc::ColumnVectorBatch* cvb, + size_t num_values) { + NullMap& map_data_column = nullable_column->get_null_map_data(); + const auto origin_size = map_data_column.size(); + map_data_column.resize(origin_size + num_values); + if (cvb->hasNulls) { + const auto* cvb_nulls = cvb->notNull.data(); + for (int i = 0; i < num_values; ++i) { + map_data_column[origin_size + i] = !cvb_nulls[i]; + } + } else { + memset(map_data_column.data() + origin_size, 0, num_values); + } +} + +static void align_orc_null_map(const ColumnPtr& src_column, ColumnNullable* dst_nullable_column, + size_t src_null_map_start, size_t new_rows) { + auto& dst_null_map = dst_nullable_column->get_null_map_column(); + const size_t old_rows = dst_nullable_column->get_nested_column().size(); + const size_t expected_rows = old_rows + new_rows; + if (dst_null_map.size() == expected_rows) { + return; + } + DCHECK_EQ(dst_null_map.size(), old_rows); + if (src_column->is_nullable()) { + const auto* src_nullable = assert_cast(src_column.get()); + DCHECK_GE(src_nullable->get_null_map_column().size(), src_null_map_start + new_rows); + dst_null_map.insert_range_from(src_nullable->get_null_map_column(), src_null_map_start, + new_rows); + } else { + dst_null_map.insert_many_vals(0, new_rows); + } +} // Because HIVE 0.11 & 0.12 does not support precision and scale for decimal // The decimal type of orc file produced by HIVE 0.11 & 0.12 are DECIMAL(0,0) // We should set a default precision and scale for these orc files. @@ -2018,13 +2052,14 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, // Handle key column: if still missing, fill with default values if (key_is_missing) { // Fill key column with default values (nulls or empty values) - auto mutable_key_column = doris_key_column->assume_mutable(); + auto mutable_key_column = IColumn::mutate(std::move(doris_key_column)); if (mutable_key_column->is_nullable()) { auto* nullable_column = static_cast(mutable_key_column.get()); nullable_column->insert_many_defaults(element_size); } else { mutable_key_column->insert_many_defaults(element_size); } + doris_key_column = std::move(mutable_key_column); } else { // Normal processing: convert ORC column to Doris column RETURN_IF_ERROR(_orc_column_to_doris_column( @@ -2035,13 +2070,14 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, // Handle value column: if still missing, fill with default values if (value_is_missing) { // Fill value column with default values (nulls or empty values) - auto mutable_value_column = doris_value_column->assume_mutable(); + auto mutable_value_column = IColumn::mutate(std::move(doris_value_column)); if (mutable_value_column->is_nullable()) { auto* nullable_column = static_cast(mutable_value_column.get()); nullable_column->insert_many_defaults(element_size); } else { mutable_value_column->insert_many_defaults(element_size); } + doris_value_column = std::move(mutable_value_column); } else { // Normal processing: convert ORC column to Doris column RETURN_IF_ERROR(_orc_column_to_doris_column( @@ -2106,8 +2142,10 @@ Status OrcReader::_fill_doris_data_column(const std::string& col_name, "Child field of '{}' is not nullable, but is missing in orc file", col_name); } - reinterpret_cast(doris_field->assume_mutable().get()) + auto mutable_field = IColumn::mutate(std::move(doris_field)); + reinterpret_cast(mutable_field.get()) ->insert_many_defaults(num_values); + doris_field = std::move(mutable_field); } for (auto read_field : read_fields) { @@ -2172,45 +2210,64 @@ Status OrcReader::_orc_column_to_doris_column( resolved_column = converter->get_column(src_type, doris_column, data_type); resolved_type = converter->get_type(); - if (resolved_column->is_nullable()) { + MutableColumnPtr mutable_resolved_column; + if (converter->is_consistent()) { + resolved_column.reset(); + mutable_resolved_column = IColumn::mutate(std::move(doris_column)); + } else { + mutable_resolved_column = IColumn::mutate(std::move(resolved_column)); + } + + size_t src_null_map_start = 0; + if (mutable_resolved_column->is_nullable()) { SCOPED_RAW_TIMER(&_statistics.decode_null_map_time); auto* nullable_column = - reinterpret_cast(resolved_column->assume_mutable().get()); + reinterpret_cast(mutable_resolved_column.get()); data_column = nullable_column->get_nested_column_ptr(); - - NullMap& map_data_column = nullable_column->get_null_map_data(); - auto origin_size = map_data_column.size(); - map_data_column.resize(origin_size + num_values); - if (cvb->hasNulls) { - const auto* cvb_nulls = cvb->notNull.data(); - for (int i = 0; i < num_values; ++i) { - map_data_column[origin_size + i] = !cvb_nulls[i]; - } - } else { - memset(map_data_column.data() + origin_size, 0, num_values); - } + src_null_map_start = nullable_column->get_null_map_column().size(); + fill_orc_null_map(nullable_column, cvb, num_values); } else { if (cvb->hasNulls) { return Status::InternalError("Not nullable column {} has null values in orc file", col_name); } - data_column = resolved_column->assume_mutable(); + data_column = std::move(mutable_resolved_column); } RETURN_IF_ERROR(_fill_doris_data_column( col_name, data_column, remove_nullable(resolved_type), root_node, orc_column_type, cvb, num_values)); - // resolve schema change + + if (mutable_resolved_column) { + data_column.reset(); + resolved_column = std::move(mutable_resolved_column); + } else { + resolved_column = std::move(data_column); + } + + if (converter->is_consistent()) { + doris_column = std::move(resolved_column); + return Status::OK(); + } + + doris_column = IColumn::mutate(std::move(doris_column)); auto converted_column = doris_column->assume_mutable(); + if (converted_column->is_nullable()) { + const size_t new_rows = remove_nullable(resolved_column)->size(); + align_orc_null_map(resolved_column, + reinterpret_cast(converted_column.get()), + src_null_map_start, new_rows); + } return converter->convert(resolved_column, converted_column); } else { - auto mutable_column = doris_column->assume_mutable(); + auto mutable_column = IColumn::mutate(std::move(doris_column)); if (mutable_column->is_nullable()) { auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(num_values); } else { mutable_column->insert_many_defaults(num_values); } + doris_column = std::move(mutable_column); } return Status::OK(); @@ -2628,9 +2685,7 @@ Status OrcReader::_get_next_block_impl(Block* block, size_t* read_rows, bool* eo } if (can_filter_all) { - for (auto& col : columns_to_filter) { - std::move(*block->get_by_position(col).column).assume_mutable()->clear(); - } + block->clear_column_data(columns_to_filter); Block::erase_useless_column(block, column_to_keep); return _convert_dict_cols_to_string_cols(block, &batch_vec); } @@ -2802,7 +2857,8 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s if (_lazy_read_ctx.resize_first_column) { // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0 // The following process may be tricky and time-consuming, but we have no other way. - block->get_by_position(0).column->assume_mutable()->resize(size); + auto column_guard = block->mutate_column_scoped(0); + column_guard.mutable_column()->resize(size); } // transactional hive orc delete row @@ -2829,26 +2885,25 @@ Status OrcReader::filter(orc::ColumnVectorBatch& data, uint16_t* sel, uint16_t s if (_lazy_read_ctx.resize_first_column) { // We have to clean the first column to insert right data. - block->get_by_position(0).column->assume_mutable()->clear(); + block->clear_column_data(std::vector {0}); } if (can_filter_all) { + std::vector columns_to_clear; + columns_to_clear.reserve(table_col_names.size() + + _lazy_read_ctx.predicate_partition_columns.size() + + _lazy_read_ctx.predicate_missing_columns.size()); for (auto& col : table_col_names) { // clean block to read predicate columns and acid columns - block->get_by_position((*_col_name_to_block_idx)[col]) - .column->assume_mutable() - ->clear(); + columns_to_clear.emplace_back((*_col_name_to_block_idx)[col]); } for (auto& col : _lazy_read_ctx.predicate_partition_columns) { - block->get_by_position((*_col_name_to_block_idx)[col.first]) - .column->assume_mutable() - ->clear(); + columns_to_clear.emplace_back((*_col_name_to_block_idx)[col.first]); } for (auto& col : _lazy_read_ctx.predicate_missing_columns) { - block->get_by_position((*_col_name_to_block_idx)[col.first]) - .column->assume_mutable() - ->clear(); + columns_to_clear.emplace_back((*_col_name_to_block_idx)[col.first]); } + block->clear_column_data(columns_to_clear); Block::erase_useless_column(block, origin_column_num); RETURN_IF_ERROR(_convert_dict_cols_to_string_cols(block, nullptr)); } diff --git a/be/src/format/orc/vorc_reader.h b/be/src/format/orc/vorc_reader.h index 91c8ffa4a0684c..6d9f74ae4a0ace 100644 --- a/be/src/format/orc/vorc_reader.h +++ b/be/src/format/orc/vorc_reader.h @@ -226,7 +226,8 @@ class OrcReader : public TableFormatReader, public RowPositionProvider { if (col_pos < 0) { return Status::InternalError("Column {} not found in block", col_name); } - auto col = block->get_by_position(col_pos).column->assume_mutable(); + auto column_guard = block->mutate_column_scoped(col_pos); + auto& col = column_guard.mutable_column(); const auto& row_ids = this->current_batch_row_positions(); RETURN_IF_ERROR( _row_id_column_iterator->read_by_rowids(row_ids.data(), row_ids.size(), col)); diff --git a/be/src/format/parquet/byte_stream_split_decoder.cpp b/be/src/format/parquet/byte_stream_split_decoder.cpp index a2674d0995c433..30f0958d9c8cf5 100644 --- a/be/src/format/parquet/byte_stream_split_decoder.cpp +++ b/be/src/format/parquet/byte_stream_split_decoder.cpp @@ -19,6 +19,7 @@ #include +#include "core/column/column_fixed_length_object.h" #include "util/byte_stream_split.h" namespace doris { @@ -45,7 +46,13 @@ Status ByteStreamSplitDecoder::_decode_values(MutableColumnPtr& doris_column, _offset, non_null_size, _data->size); } - size_t primitive_length = remove_nullable(data_type)->get_size_of_value_in_memory(); + size_t primitive_length = _type_length; + if (const auto* fixed_length_column = + check_and_get_column(*doris_column)) { + DCHECK_EQ(fixed_length_column->item_size(), _type_length); + } else { + primitive_length = remove_nullable(data_type)->get_size_of_value_in_memory(); + } size_t data_index = doris_column->size() * primitive_length; size_t scale_size = (select_vector.num_values() - select_vector.num_filtered()) * (_type_length / primitive_length); diff --git a/be/src/format/parquet/delta_bit_pack_decoder.h b/be/src/format/parquet/delta_bit_pack_decoder.h index 52d45ea2297b33..6257e4f214a182 100644 --- a/be/src/format/parquet/delta_bit_pack_decoder.h +++ b/be/src/format/parquet/delta_bit_pack_decoder.h @@ -30,6 +30,8 @@ #include #include "common/status.h" +#include "core/column/column_fixed_length_object.h" +#include "core/column/column_vector.h" #include "core/data_type/data_type.h" #include "format/parquet/decoder.h" #include "format/parquet/fix_length_plain_decoder.h" @@ -84,11 +86,21 @@ class DeltaDecoder : public Decoder { Status decode_fixed_byte_array(const std::vector& decoded_vals, MutableColumnPtr& doris_column, DataTypePtr& data_type, ColumnSelectVector& select_vector) { - auto& column_data = reinterpret_cast(*doris_column).get_data(); - size_t data_index = column_data.size(); - column_data.resize(data_index + _type_length * (select_vector.num_values() - - select_vector.num_filtered())); - auto* data = column_data.data(); + const size_t result_size = select_vector.num_values() - select_vector.num_filtered(); + size_t data_index = 0; + uint8_t* data = nullptr; + if (auto* fixed_length_column = + check_and_get_column(*doris_column)) { + DCHECK_EQ(fixed_length_column->item_size(), _type_length); + data_index = fixed_length_column->size() * _type_length; + fixed_length_column->resize(fixed_length_column->size() + result_size); + data = fixed_length_column->get_data().data(); + } else { + auto& column_data = assert_cast(*doris_column).get_data(); + data_index = column_data.size(); + column_data.resize(data_index + _type_length * result_size); + data = reinterpret_cast(column_data.data()); + } ColumnSelectVector::DataReadType read_type; int value_idx = 0; while (size_t run_length = select_vector.get_next_run(&read_type)) { diff --git a/be/src/format/parquet/fix_length_dict_decoder.hpp b/be/src/format/parquet/fix_length_dict_decoder.hpp index c0f0dd967a7dec..aef4e7e6a19bb5 100644 --- a/be/src/format/parquet/fix_length_dict_decoder.hpp +++ b/be/src/format/parquet/fix_length_dict_decoder.hpp @@ -18,6 +18,7 @@ #pragma once #include "core/column/column_dictionary.h" +#include "core/column/column_fixed_length_object.h" #include "core/column/column_nullable.h" #include "core/data_type/data_type_nullable.h" #include "format/parquet/decoder.h" @@ -107,7 +108,13 @@ class FixLengthDictDecoder final : public BaseDictDecoder { template Status _decode_fixed_values(MutableColumnPtr& doris_column, DataTypePtr& data_type, ColumnSelectVector& select_vector) { - size_t primitive_length = remove_nullable(data_type)->get_size_of_value_in_memory(); + size_t primitive_length = _type_length; + if (const auto* fixed_length_column = + check_and_get_column(*doris_column)) { + DCHECK_EQ(fixed_length_column->item_size(), _type_length); + } else { + primitive_length = remove_nullable(data_type)->get_size_of_value_in_memory(); + } size_t data_index = doris_column->size() * primitive_length; size_t scale_size = (select_vector.num_values() - select_vector.num_filtered()) * (_type_length / primitive_length); diff --git a/be/src/format/parquet/fix_length_plain_decoder.h b/be/src/format/parquet/fix_length_plain_decoder.h index 1628b8c6d05c98..0c0c47197fac21 100644 --- a/be/src/format/parquet/fix_length_plain_decoder.h +++ b/be/src/format/parquet/fix_length_plain_decoder.h @@ -20,6 +20,7 @@ #include #include "common/status.h" +#include "core/column/column_fixed_length_object.h" #include "core/data_type/data_type.h" #include "format/parquet/decoder.h" #include "format/parquet/parquet_common.h" @@ -46,7 +47,13 @@ class FixLengthPlainDecoder final : public Decoder { return Status::IOError("Out-of-bounds access in parquet data decoder"); } - size_t primitive_length = remove_nullable(data_type)->get_size_of_value_in_memory(); + size_t primitive_length = _type_length; + if (const auto* fixed_length_column = + check_and_get_column(*doris_column)) { + DCHECK_EQ(fixed_length_column->item_size(), _type_length); + } else { + primitive_length = remove_nullable(data_type)->get_size_of_value_in_memory(); + } size_t data_index = doris_column->size() * primitive_length; size_t scale_size = (select_vector.num_values() - select_vector.num_filtered()) * (_type_length / primitive_length); diff --git a/be/src/format/parquet/parquet_column_convert.cpp b/be/src/format/parquet/parquet_column_convert.cpp index 940e95bd973306..8f8a6cc34fcb96 100644 --- a/be/src/format/parquet/parquet_column_convert.cpp +++ b/be/src/format/parquet/parquet_column_convert.cpp @@ -21,7 +21,9 @@ #include #include "common/cast_set.h" +#include "core/column/column_fixed_length_object.h" #include "core/column/column_nullable.h" +#include "core/data_type/data_type_fixed_length_object.h" #include "core/data_type/data_type_nullable.h" #include "core/data_type/define_primitive_type.h" #include "core/data_type/primitive_type.h" @@ -106,28 +108,38 @@ ColumnPtr PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type s _cached_src_physical_type = std::make_shared(); break; case tparquet::Type::type::FIXED_LEN_BYTE_ARRAY: - _cached_src_physical_type = std::make_shared(); + _cached_src_physical_type = std::make_shared(); break; case tparquet::Type::type::INT96: _cached_src_physical_type = std::make_shared(); break; } - _cached_src_physical_column = _cached_src_physical_type->create_column(); + const bool is_fixed_length_byte_array = + src_physical_type == tparquet::Type::type::FIXED_LEN_BYTE_ARRAY; if (dst_logical_type->is_nullable()) { + MutableColumnPtr nested_physical_column; + if (is_fixed_length_byte_array) { + nested_physical_column = ColumnFixedLengthObject::create( + _convert_params->field_schema->parquet_schema.type_length); + } else { + nested_physical_column = _cached_src_physical_type->create_column(); + } + _cached_src_physical_column = ColumnNullable::create(std::move(nested_physical_column), + ColumnUInt8::create()); _cached_src_physical_type = make_nullable(_cached_src_physical_type); + } else { + if (is_fixed_length_byte_array) { + _cached_src_physical_column = ColumnFixedLengthObject::create( + _convert_params->field_schema->parquet_schema.type_length); + } else { + _cached_src_physical_column = _cached_src_physical_type->create_column(); + } } } // remove the old cached data - _cached_src_physical_column->assume_mutable()->clear(); - - if (dst_logical_type->is_nullable()) { - // In order to share null map between parquet converted src column and dst column to avoid copying. It is very tricky that will - // call mutable function `doris_nullable_column->get_null_map_column_ptr()` which will set `_need_update_has_null = true`. - // Because some operations such as agg will call `has_null()` to set `_need_update_has_null = false`. - auto* doris_nullable_column = assert_cast(dst_logical_column.get()); - return ColumnNullable::create(_cached_src_physical_column, - doris_nullable_column->get_null_map_column_ptr()); - } + auto cached_src_physical_column = IColumn::mutate(std::move(_cached_src_physical_column)); + cached_src_physical_column->clear(); + _cached_src_physical_column = std::move(cached_src_physical_column); return _cached_src_physical_column; } diff --git a/be/src/format/parquet/parquet_column_convert.h b/be/src/format/parquet/parquet_column_convert.h index f56ad295bab968..9206ea285ac101 100644 --- a/be/src/format/parquet/parquet_column_convert.h +++ b/be/src/format/parquet/parquet_column_convert.h @@ -25,7 +25,9 @@ #include #include "common/cast_set.h" +#include "core/column/column_fixed_length_object.h" #include "core/column/column_varbinary.h" +#include "core/column/column_vector.h" #include "core/data_type/data_type_factory.hpp" #include "core/data_type/primitive_type.h" #include "core/extended_types.h" @@ -194,6 +196,84 @@ struct ConvertParams { } }; +inline IColumn* get_mutable_inner_column(ColumnPtr& column) { + column = IColumn::mutate(std::move(column)); + auto mutable_column = column->assume_mutable(); + if (mutable_column->is_nullable()) { + return &assert_cast(mutable_column.get())->get_nested_column(); + } + return mutable_column.get(); +} + +inline size_t get_mutable_inner_column_size(const ColumnPtr& column) { + if (column->is_nullable()) { + const auto* nullable = assert_cast(column.get()); + return nullable->get_nested_column().size(); + } + return column->size(); +} + +inline size_t get_null_map_size_or_inner_column_size(const ColumnPtr& column) { + if (column->is_nullable()) { + const auto* nullable = assert_cast(column.get()); + return nullable->get_null_map_column().size(); + } + return column->size(); +} + +inline size_t get_appended_null_map_start(const ColumnPtr& column, size_t new_rows) { + if (!column->is_nullable()) { + return 0; + } + const auto* nullable = assert_cast(column.get()); + const size_t null_map_size = nullable->get_null_map_column().size(); + DCHECK_GE(null_map_size, new_rows); + return null_map_size - new_rows; +} + +inline void align_null_map(ColumnPtr& src_column, ColumnPtr& dst_column, size_t old_null_map_size, + size_t new_rows, size_t src_null_map_start = 0) { + if (!dst_column->is_nullable()) { + return; + } + + dst_column = IColumn::mutate(std::move(dst_column)); + auto* dst_nullable = assert_cast(dst_column->assume_mutable().get()); + auto& dst_null_map = dst_nullable->get_null_map_column(); + const size_t expected_rows = old_null_map_size + new_rows; + if (dst_null_map.size() == expected_rows) { + return; + } + DCHECK_EQ(dst_null_map.size(), old_null_map_size); + if (src_column->is_nullable()) { + const auto* src_nullable = assert_cast(src_column.get()); + DCHECK_GE(src_nullable->get_null_map_column().size(), src_null_map_start + new_rows); + dst_null_map.insert_range_from(src_nullable->get_null_map_column(), src_null_map_start, + new_rows); + } else { + dst_null_map.insert_many_vals(0, new_rows); + } +} + +struct FixedLengthPhysicalData { + const uint8_t* data = nullptr; + size_t byte_size = 0; + size_t rows = 0; +}; + +inline FixedLengthPhysicalData get_fixed_length_physical_data(const IColumn& column, + size_t type_length) { + if (const auto* fixed_length_column = check_and_get_column(column)) { + DCHECK_EQ(fixed_length_column->item_size(), type_length); + return {fixed_length_column->get_data().data(), fixed_length_column->byte_size(), + fixed_length_column->size()}; + } + + const auto& uint8_column = assert_cast(column); + DCHECK_EQ(uint8_column.size() % type_length, 0); + return {uint8_column.get_data().data(), uint8_column.size(), uint8_column.size() / type_length}; +} + /** * Convert parquet physical column to logical column * In parquet document(https://github.com/apache/parquet-format/blob/master/LogicalTypes.md), @@ -213,11 +293,12 @@ struct ConvertParams { * Ultimate performance optimization: * 1. If process of (First => Second) is consistent, eg. from BYTE_ARRAY to string, no additional copies and conversions will be introduced; * 2. If process of (Second => Third) is consistent, no additional copies and conversions will be introduced; - * 3. Null map is share among all processes, no additional copies and conversions will be introduced in null map; + * 3. Null maps are owned by each temporary nullable column, and only appended null slices are + * copied between conversion stages; * 4. Only create one physical column in physical conversion, and reused in each loop; * 5. Only create one logical column in logical conversion, and reused in each loop; - * 6. FIXED_LENGTH_BYTE_ARRAY is read as ColumnUInt8 instead of ColumnString, so the underlying decoder has no process to decode string - * and use memory copy to read the data as a whole, and the conversion has no need to resolve the Offsets in ColumnString. + * 6. FIXED_LENGTH_BYTE_ARRAY is read as ColumnFixedLengthObject instead of ColumnString, so + * the decoder can copy fixed-size values as a whole while keeping nullable row counts valid. */ class PhysicalToLogicalConverter { protected: @@ -254,26 +335,46 @@ class PhysicalToLogicalConverter { PrimitiveType::TYPE_INT, dst_logical_type->is_nullable()); } if (is_consistent() && _logical_converter->is_consistent()) { + dst_logical_col = std::move(src_physical_col); + return Status::OK(); + } + if (_logical_converter->is_consistent()) { + const size_t old_rows = get_mutable_inner_column_size(dst_logical_col); + const size_t old_null_map_size = + get_null_map_size_or_inner_column_size(dst_logical_col); + RETURN_IF_ERROR(physical_convert(src_physical_col, dst_logical_col)); + const size_t new_rows = get_mutable_inner_column_size(dst_logical_col) - old_rows; + align_null_map(src_physical_col, dst_logical_col, old_null_map_size, new_rows, + get_appended_null_map_start(src_physical_col, new_rows)); return Status::OK(); } + ColumnPtr src_logical_column; if (is_consistent()) { - if (dst_logical_type->is_nullable()) { - auto doris_nullable_column = - assert_cast(dst_logical_col.get()); - src_logical_column = - ColumnNullable::create(_cached_src_physical_column, - doris_nullable_column->get_null_map_column_ptr()); - } else { - src_logical_column = _cached_src_physical_column; - } + src_logical_column = src_physical_col; } else { src_logical_column = _logical_converter->get_column(src_logical_type, dst_logical_col, dst_logical_type); } + const size_t src_old_rows = get_mutable_inner_column_size(src_logical_column); + const size_t src_old_null_map_size = + get_null_map_size_or_inner_column_size(src_logical_column); RETURN_IF_ERROR(physical_convert(src_physical_col, src_logical_column)); + const size_t src_new_rows = + get_mutable_inner_column_size(src_logical_column) - src_old_rows; + align_null_map(src_physical_col, src_logical_column, src_old_null_map_size, src_new_rows, + get_appended_null_map_start(src_physical_col, src_new_rows)); + + dst_logical_col = IColumn::mutate(std::move(dst_logical_col)); + const size_t dst_old_rows = get_mutable_inner_column_size(dst_logical_col); + const size_t dst_old_null_map_size = + get_null_map_size_or_inner_column_size(dst_logical_col); auto converted_column = dst_logical_col->assume_mutable(); - return _logical_converter->convert(src_logical_column, converted_column); + RETURN_IF_ERROR(_logical_converter->convert(src_logical_column, converted_column)); + const size_t dst_new_rows = get_mutable_inner_column_size(dst_logical_col) - dst_old_rows; + align_null_map(src_logical_column, dst_logical_col, dst_old_null_map_size, dst_new_rows, + get_appended_null_map_start(src_logical_column, dst_new_rows)); + return Status::OK(); } virtual ColumnPtr get_physical_column(tparquet::Type::type src_physical_type, @@ -283,6 +384,11 @@ class PhysicalToLogicalConverter { DataTypePtr& get_physical_type() { return _cached_src_physical_type; } + bool read_directly_into_dst_logical_column() { + return !_convert_params->is_type_compatibility && is_consistent() && + _logical_converter->is_consistent(); + } + virtual bool is_consistent() { return false; } virtual bool support() { return true; } @@ -319,14 +425,14 @@ class LittleIntPhysicalConverter : public PhysicalToLogicalConverter { using DstCppType = typename PrimitiveTypeTraits::CppType; using DstColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); size_t rows = from_col->size(); // always comes from tparquet::Type::INT32 auto& src_data = assert_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = assert_cast(*to_col.get()).get_data(); + auto& data = assert_cast(*to_col).get_data(); for (int i = 0; i < rows; ++i) { data[start_idx + i] = static_cast(src_data[i]); } @@ -378,13 +484,13 @@ class UnsignedIntegerConverter : public PhysicalToLogicalConverter { using DstColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); auto& src_data = assert_cast(from_col.get())->get_data(); size_t rows = src_data.size(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); - auto& data = assert_cast(*to_col.get()).get_data(); + auto& data = assert_cast(*to_col).get_data(); for (int i = 0; i < rows; i++) { StorageCppType src_value = src_data[i]; @@ -405,18 +511,18 @@ class FixedSizeBinaryConverter : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); - auto* src_data = assert_cast(from_col.get()); - size_t length = src_data->size(); - size_t num_values = length / _type_length; - auto& string_col = static_cast(*to_col.get()); + const auto src_data = get_fixed_length_physical_data(*from_col, _type_length); + size_t length = src_data.byte_size; + size_t num_values = src_data.rows; + auto& string_col = static_cast(*to_col); auto& offsets = string_col.get_offsets(); auto& chars = string_col.get_chars(); size_t origin_size = chars.size(); chars.resize(origin_size + length); - memcpy(chars.data() + origin_size, src_data->get_data().data(), length); + memcpy(chars.data() + origin_size, src_data.data, length); origin_size = offsets.size(); offsets.resize(origin_size + num_values); @@ -441,16 +547,15 @@ class Float16PhysicalConverter : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr from_col = remove_nullable(src_physical_col); - MutableColumnPtr to_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); - const auto* src_data = assert_cast(from_col.get()); - size_t length = src_data->size(); - size_t num_values = length / _type_length; - auto* to_float_column = assert_cast(to_col.get()); + const auto src_data = get_fixed_length_physical_data(*from_col, _type_length); + size_t num_values = src_data.rows; + auto* to_float_column = assert_cast(to_col); size_t start_idx = to_float_column->size(); to_float_column->resize(start_idx + num_values); auto& to_float_column_data = to_float_column->get_data(); - const auto* ptr = src_data->get_data().data(); + const auto* ptr = src_data.data; for (int i = 0; i < num_values; ++i) { size_t offset = i * _type_length; const auto* data_ptr = ptr + offset; @@ -520,26 +625,13 @@ class UUIDVarBinaryConverter : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { DCHECK(!is_column_const(*src_physical_col)) << src_physical_col->dump_structure(); DCHECK(!is_column_const(*src_logical_column)) << src_logical_column->dump_structure(); - const ColumnUInt8* uint8_col = nullptr; - if (is_column_nullable(*src_physical_col)) { - const auto& nullable = assert_cast(src_physical_col.get()); - uint8_col = &assert_cast(nullable->get_nested_column()); - } else { - uint8_col = &assert_cast(*src_physical_col); - } + const ColumnPtr from_col = remove_nullable(src_physical_col); + const auto src_data = get_fixed_length_physical_data(*from_col, _type_length); - MutableColumnPtr to_col = nullptr; - // nullmap flag seems have been handled in upper level - if (src_logical_column->is_nullable()) { - const auto* nullable = assert_cast(src_logical_column.get()); - to_col = nullable->get_nested_column_ptr()->assume_mutable(); - } else { - to_col = src_logical_column->assume_mutable(); - } - auto* to_varbinary_column = assert_cast(to_col.get()); - size_t length = uint8_col->size(); - size_t num_values = length / _type_length; - const auto* ptr = uint8_col->get_data().data(); + IColumn* to_col = get_mutable_inner_column(src_logical_column); + auto* to_varbinary_column = assert_cast(to_col); + size_t num_values = src_data.rows; + const auto* ptr = src_data.data; for (int i = 0; i < num_values; ++i) { auto offset = i * _type_length; @@ -561,7 +653,7 @@ class FixedSizeToDecimal : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); #define M(FixedTypeLength, ValueCopyType) \ case FixedTypeLength: \ @@ -612,13 +704,14 @@ class FixedSizeToDecimal : public PhysicalToLogicalConverter { } template - Status _convert_internal(ColumnPtr& src_col, MutableColumnPtr& dst_col) { - size_t rows = src_col->size() / fixed_type_length; - auto* buf = static_cast(src_col.get())->get_data().data(); + Status _convert_internal(ColumnPtr& src_col, IColumn* dst_col) { + const auto src_data = get_fixed_length_physical_data(*src_col, fixed_type_length); + size_t rows = src_data.rows; + const auto* buf = src_data.data; size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = static_cast*>(dst_col.get())->get_data(); + auto& data = static_cast*>(dst_col)->get_data(); size_t offset = 0; for (int i = 0; i < rows; i++) { // When Decimal in parquet is stored in byte arrays, binary and fixed, @@ -645,7 +738,7 @@ class StringToDecimal : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { using ValueCopyType = DecimalType::NativeType; ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); auto buf = static_cast(src_col.get())->get_chars().data(); @@ -653,7 +746,7 @@ class StringToDecimal : public PhysicalToLogicalConverter { size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = static_cast*>(dst_col.get())->get_data(); + auto& data = static_cast*>(dst_col)->get_data(); for (int i = 0; i < rows; i++) { size_t len = offset[i] - offset[i - 1]; // When Decimal in parquet is stored in byte arrays, binary and fixed, @@ -678,7 +771,7 @@ class NumberToDecimal : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { using ValueCopyType = typename DecimalType::NativeType; ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); auto* src_data = @@ -686,7 +779,7 @@ class NumberToDecimal : public PhysicalToLogicalConverter { size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto* data = static_cast*>(dst_col.get())->get_data().data(); + auto* data = static_cast*>(dst_col)->get_data().data(); for (int i = 0; i < rows; i++) { ValueCopyType value; @@ -706,14 +799,14 @@ class NumberToDecimal : public PhysicalToLogicalConverter { class Int32ToDate : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); size_t start_idx = dst_col->size(); dst_col->reserve(start_idx + rows); auto& src_data = static_cast(src_col.get())->get_data(); - auto& data = static_cast(dst_col.get())->get_data(); + auto& data = static_cast(dst_col)->get_data(); date_day_offset_dict& date_dict = date_day_offset_dict::get(); for (int i = 0; i < rows; i++) { @@ -727,14 +820,14 @@ class Int32ToDate : public PhysicalToLogicalConverter { struct Int64ToTimestamp : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); auto src_data = static_cast(src_col.get())->get_data().data(); - auto& data = static_cast(dst_col.get())->get_data(); + auto& data = static_cast(dst_col)->get_data(); for (int i = 0; i < rows; i++) { int64_t x = src_data[i]; @@ -760,14 +853,14 @@ struct Int64ToTimestamp : public PhysicalToLogicalConverter { struct Int64ToTimestampTz : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); const auto& src_data = assert_cast(src_col.get())->get_data(); - auto& dest_data = assert_cast(dst_col.get())->get_data(); + auto& dest_data = assert_cast(dst_col)->get_data(); static const cctz::time_zone UTC = cctz::utc_time_zone(); for (int i = 0; i < rows; i++) { @@ -784,14 +877,14 @@ struct Int64ToTimestampTz : public PhysicalToLogicalConverter { struct Int96toTimestamp : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size() / sizeof(ParquetInt96); auto& src_data = static_cast(src_col.get())->get_data(); auto ParquetInt96_data = (ParquetInt96*)src_data.data(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = static_cast(dst_col.get())->get_data(); + auto& data = static_cast(dst_col)->get_data(); for (int i = 0; i < rows; i++) { ParquetInt96 src_cell_data = ParquetInt96_data[i]; @@ -818,14 +911,14 @@ struct Int96toTimestamp : public PhysicalToLogicalConverter { struct Int96toTimestampTz : public PhysicalToLogicalConverter { Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& src_logical_column) override { ColumnPtr src_col = remove_nullable(src_physical_col); - MutableColumnPtr dst_col = remove_nullable(src_logical_column)->assume_mutable(); + IColumn* dst_col = get_mutable_inner_column(src_logical_column); size_t rows = src_col->size() / sizeof(ParquetInt96); const auto& src_data = assert_cast(src_col.get())->get_data(); auto* ParquetInt96_data = (ParquetInt96*)src_data.data(); size_t start_idx = dst_col->size(); dst_col->resize(start_idx + rows); - auto& data = assert_cast(dst_col.get())->get_data(); + auto& data = assert_cast(dst_col)->get_data(); static const cctz::time_zone UTC = cctz::utc_time_zone(); for (int i = 0; i < rows; i++) { diff --git a/be/src/format/parquet/vparquet_column_reader.cpp b/be/src/format/parquet/vparquet_column_reader.cpp index ba7d42a5aed84e..1deffec6a04633 100644 --- a/be/src/format/parquet/vparquet_column_reader.cpp +++ b/be/src/format/parquet/vparquet_column_reader.cpp @@ -328,12 +328,11 @@ Status ScalarColumnReader::_read_values(size_t num_ MutableColumnPtr data_column; std::vector null_map; NullMap* map_data_column = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { SCOPED_RAW_TIMER(&_decode_null_map_time); - // doris_column either originates from a mutable block in vparquet_group_reader - // or is a newly created ColumnPtr, and therefore can be modified. - auto* nullable_column = - assert_cast(const_cast(doris_column.get())); + auto mutable_column = doris_column->assume_mutable(); + auto* nullable_column = assert_cast(mutable_column.get()); data_column = nullable_column->get_nested_column_ptr(); map_data_column = &(nullable_column->get_null_map_data()); @@ -411,12 +410,11 @@ Status ScalarColumnReader::_read_nested_column( // Handle nullable columns MutableColumnPtr data_column; NullMap* map_data_column = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { SCOPED_RAW_TIMER(&_decode_null_map_time); - // doris_column either originates from a mutable block in vparquet_group_reader - // or is a newly created ColumnPtr, and therefore can be modified. - auto* nullable_column = - const_cast(assert_cast(doris_column.get())); + auto mutable_column = doris_column->assume_mutable(); + auto* nullable_column = assert_cast(mutable_column.get()); data_column = nullable_column->get_nested_column_ptr(); map_data_column = &(nullable_column->get_null_map_data()); } else { @@ -550,6 +548,10 @@ Status ScalarColumnReader::read_column_data( ColumnPtr resolved_column = _converter->get_physical_column(_field_schema->physical_type, _field_schema->data_type, doris_column, type, is_dict_filter); + if (_converter->read_directly_into_dst_logical_column()) { + DCHECK_EQ(resolved_column.get(), doris_column.get()); + resolved_column = std::move(doris_column); + } DataTypePtr& resolved_type = _converter->get_physical_type(); _def_levels.clear(); @@ -658,6 +660,7 @@ Status ArrayColumnReader::read_column_data( int64_t real_column_size) { MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { auto mutable_column = doris_column->assume_mutable(); auto* nullable_column = assert_cast(mutable_column.get()); @@ -713,6 +716,7 @@ Status MapColumnReader::read_column_data( int64_t real_column_size) { MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { auto mutable_column = doris_column->assume_mutable(); auto* nullable_column = assert_cast(mutable_column.get()); @@ -789,6 +793,7 @@ Status StructColumnReader::read_column_data( int64_t real_column_size) { MutableColumnPtr data_column; NullMap* null_map_ptr = nullptr; + doris_column = IColumn::mutate(std::move(doris_column)); if (doris_column->is_nullable()) { auto mutable_column = doris_column->assume_mutable(); auto* nullable_column = assert_cast(mutable_column.get()); @@ -986,6 +991,7 @@ Status StructColumnReader::read_column_data( auto& doris_field = doris_struct.get_column_ptr(idx); auto& doris_type = doris_struct_type->get_element(idx); DCHECK(doris_type->is_nullable()); + doris_field = IColumn::mutate(std::move(doris_field)); auto mutable_column = doris_field->assume_mutable(); auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(missing_column_sz); diff --git a/be/src/format/parquet/vparquet_column_reader.h b/be/src/format/parquet/vparquet_column_reader.h index 9d9fd2280c88f8..8673361eb46dd6 100644 --- a/be/src/format/parquet/vparquet_column_reader.h +++ b/be/src/format/parquet/vparquet_column_reader.h @@ -482,6 +482,7 @@ class SkipReadingReader : public ParquetColumnReader { // Simulate reading without actually reading data // Fill with default/null values based on column type + doris_column = IColumn::mutate(std::move(doris_column)); MutableColumnPtr data_column = doris_column->assume_mutable(); if (real_column_size > 0) { diff --git a/be/src/format/parquet/vparquet_group_reader.cpp b/be/src/format/parquet/vparquet_group_reader.cpp index 7d910e49203062..a346fa91585d6e 100644 --- a/be/src/format/parquet/vparquet_group_reader.cpp +++ b/be/src/format/parquet/vparquet_group_reader.cpp @@ -35,6 +35,7 @@ #include "core/assert_cast.h" #include "core/block/block.h" #include "core/block/column_with_type_and_name.h" +#include "core/column/column.h" #include "core/column/column_const.h" #include "core/column/column_nullable.h" #include "core/column/column_string.h" @@ -415,9 +416,7 @@ Status RowGroupReader::next_batch(Block* block, size_t batch_size, size_t* read_ } if (can_filter_all) { - for (auto& col : columns_to_filter) { - std::move(*block->get_by_position(col).column).assume_mutable()->clear(); - } + block->clear_column_data(columns_to_filter); Block::erase_useless_column(block, column_to_keep); RETURN_IF_ERROR(_convert_dict_cols_to_string_cols(block)); return Status::OK(); @@ -668,7 +667,8 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re if (_lazy_read_ctx.resize_first_column) { // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0 // The following process may be tricky and time-consuming, but we have no other way. - block->get_by_position(0).column->assume_mutable()->resize(pre_read_rows); + auto column_guard = block->mutate_column_scoped(0); + column_guard.mutable_column()->resize(pre_read_rows); } result_filter.assign(pre_read_rows, static_cast(1)); std::vector filters; @@ -693,7 +693,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re if (_lazy_read_ctx.resize_first_column) { // We have to clean the first column to insert right data. - block->get_by_position(0).column->assume_mutable()->clear(); + block->clear_column_data(std::vector {0}); } } @@ -703,22 +703,27 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re if (filter_map_ptr->filter_all()) { { SCOPED_RAW_TIMER(&_predicate_filter_time); + std::vector columns_to_clear; + columns_to_clear.reserve(_lazy_read_ctx.predicate_columns.first.size() + + _lazy_read_ctx.predicate_partition_columns.size() + + _lazy_read_ctx.predicate_missing_columns.size()); for (const auto& col : _lazy_read_ctx.predicate_columns.first) { // clean block to read predicate columns uint32_t block_pos = 0; RETURN_IF_ERROR(_get_block_column_pos(*block, col, &block_pos)); - block->get_by_position(block_pos).column->assume_mutable()->clear(); + columns_to_clear.emplace_back(block_pos); } for (const auto& col : _lazy_read_ctx.predicate_partition_columns) { uint32_t block_pos = 0; RETURN_IF_ERROR(_get_block_column_pos(*block, col.first, &block_pos)); - block->get_by_position(block_pos).column->assume_mutable()->clear(); + columns_to_clear.emplace_back(block_pos); } for (const auto& col : _lazy_read_ctx.predicate_missing_columns) { uint32_t block_pos = 0; RETURN_IF_ERROR(_get_block_column_pos(*block, col.first, &block_pos)); - block->get_by_position(block_pos).column->assume_mutable()->clear(); + columns_to_clear.emplace_back(block_pos); } + block->clear_column_data(columns_to_clear); RETURN_IF_ERROR(_table_format_reader->clear_synthesized_columns(block)); RETURN_IF_ERROR(_table_format_reader->clear_generated_columns(block)); Block::erase_useless_column(block, origin_column_num); @@ -893,7 +898,8 @@ Status RowGroupReader::_fill_missing_columns( RETURN_IF_ERROR(_get_block_column_pos(*block, kv.first, &block_pos)); if (kv.second == nullptr) { // no default column, fill with null - auto mutable_column = block->get_by_position(block_pos).column->assume_mutable(); + auto column_guard = block->mutate_column_scoped(block_pos); + auto& mutable_column = column_guard.mutable_column(); auto* nullable_column = assert_cast(mutable_column.get()); nullable_column->insert_many_defaults(rows); } else { diff --git a/be/src/format/parquet/vparquet_reader.cpp b/be/src/format/parquet/vparquet_reader.cpp index 2565b254338321..8485d9e9d2a173 100644 --- a/be/src/format/parquet/vparquet_reader.cpp +++ b/be/src/format/parquet/vparquet_reader.cpp @@ -297,6 +297,9 @@ Status ParquetReader::close() { void ParquetReader::_close_internal() { if (!_closed) { + _current_group_reader.reset(); + _tracing_file_reader.reset(); + _file_reader.reset(); _closed = true; } } diff --git a/be/src/format/parquet/vparquet_reader.h b/be/src/format/parquet/vparquet_reader.h index 68979bf9e4f027..cb6a1d21c7335c 100644 --- a/be/src/format/parquet/vparquet_reader.h +++ b/be/src/format/parquet/vparquet_reader.h @@ -187,7 +187,8 @@ class ParquetReader : public TableFormatReader { if (col_pos < 0) { return Status::InternalError("Column {} not found in block", col_name); } - auto col = block->get_by_position(col_pos).column->assume_mutable(); + auto column_guard = block->mutate_column_scoped(col_pos); + auto& col = column_guard.mutable_column(); const auto& row_ids = this->current_batch_row_positions(); RETURN_IF_ERROR( _row_id_column_iterator->read_by_rowids(row_ids.data(), row_ids.size(), col)); diff --git a/be/src/format/table/equality_delete.cpp b/be/src/format/table/equality_delete.cpp index 82deb7bd59c20a..dc94d8151f2048 100644 --- a/be/src/format/table/equality_delete.cpp +++ b/be/src/format/table/equality_delete.cpp @@ -68,9 +68,8 @@ Status SimpleEqualityDelete::filter_data_block( const NullMap& null_map = reinterpret_cast(column_and_type.column.get()) ->get_null_map_data(); - _hybrid_set->find_batch_nullable( - remove_nullable(column_and_type.column)->assume_mutable_ref(), rows, null_map, - *_single_filter); + _hybrid_set->find_batch_nullable(*remove_nullable(column_and_type.column), rows, null_map, + *_single_filter); if (_hybrid_set->contain_null()) { auto* filter_data = _single_filter->data(); for (size_t i = 0; i < rows; ++i) { @@ -78,8 +77,7 @@ Status SimpleEqualityDelete::filter_data_block( } } } else { - _hybrid_set->find_batch(column_and_type.column->assume_mutable_ref(), rows, - *_single_filter); + _hybrid_set->find_batch(*column_and_type.column, rows, *_single_filter); } // should reverse _filter auto* filter_data = filter.data(); diff --git a/be/src/format/table/es/es_http_reader.cpp b/be/src/format/table/es/es_http_reader.cpp index 24cc4410b63ed1..0efb3c19e75add 100644 --- a/be/src/format/table/es/es_http_reader.cpp +++ b/be/src/format/table/es/es_http_reader.cpp @@ -147,11 +147,8 @@ Status EsHttpReader::_do_get_next_block(Block* block, size_t* read_rows, bool* e return Status::OK(); } - auto column_size = _tuple_desc->slots().size(); - std::vector columns(column_size); - for (size_t i = 0; i < column_size; i++) { - columns[i] = block->get_by_position(i).column->assume_mutable(); - } + auto columns_guard = block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); size_t rows_before = columns[0]->size(); const int batch_size = _state->batch_size(); diff --git a/be/src/format/table/iceberg_reader_mixin.h b/be/src/format/table/iceberg_reader_mixin.h index 42c80c9b7d4ddc..50f29095e257af 100644 --- a/be/src/format/table/iceberg_reader_mixin.h +++ b/be/src/format/table/iceberg_reader_mixin.h @@ -159,8 +159,9 @@ class IcebergReaderMixin : public BaseReader, public TableSchemaChangeHelper { DORIS_CHECK(col_pos >= 0); if (_row_lineage_columns.first_row_id >= 0) { - auto col = block->get_by_position(col_pos).column->assume_mutable(); - auto* nullable_column = assert_cast(col.get()); + auto column_guard = block->mutate_column_scoped(col_pos); + auto* nullable_column = + assert_cast(column_guard.mutable_column().get()); auto& null_map = nullable_column->get_null_map_data(); auto& data = assert_cast(*nullable_column->get_nested_column_ptr()).get_data(); @@ -180,8 +181,9 @@ class IcebergReaderMixin : public BaseReader, public TableSchemaChangeHelper { DORIS_CHECK(col_pos >= 0); if (_row_lineage_columns.last_updated_sequence_number >= 0) { - auto col = block->get_by_position(col_pos).column->assume_mutable(); - auto* nullable_column = assert_cast(col.get()); + auto column_guard = block->mutate_column_scoped(col_pos); + auto* nullable_column = + assert_cast(column_guard.mutable_column().get()); auto& null_map = nullable_column->get_null_map_data(); auto& data = assert_cast(*nullable_column->get_nested_column_ptr()).get_data(); @@ -552,7 +554,8 @@ Status IcebergReaderMixin::_equality_delete_base( return st; } if (read_rows > 0) { - MutableBlock mutable_block(&eq_file_block); + ScopedMutableBlock scoped_mutable_block(&eq_file_block); + auto& mutable_block = scoped_mutable_block.mutable_block(); RETURN_IF_ERROR(mutable_block.merge(tmp_block)); } } @@ -586,13 +589,12 @@ Status IcebergReaderMixin::_expand_block_if_need(Block* block) { auto block_names = block->get_names(); names.insert(block_names.begin(), block_names.end()); for (auto& col : _expand_columns) { - col.column->assume_mutable()->clear(); if (names.contains(col.name)) { return Status::InternalError("Wrong expand column '{}'", col.name); } names.insert(col.name); (*this->col_name_to_block_idx_ref())[col.name] = static_cast(block->columns()); - block->insert(col); + block->insert({col.type->create_column(), col.type, col.name}); } return Status::OK(); } diff --git a/be/src/format/table/paimon_cpp_reader.cpp b/be/src/format/table/paimon_cpp_reader.cpp index 4925bbb3e7a9bd..c7454f043c9062 100644 --- a/be/src/format/table/paimon_cpp_reader.cpp +++ b/be/src/format/table/paimon_cpp_reader.cpp @@ -74,11 +74,11 @@ Status PaimonCppReader::_do_get_next_block(Block* block, size_t* read_rows, bool auto rows = std::min(_remaining_table_level_row_count, (int64_t)_state->query_options().batch_size); _remaining_table_level_row_count -= rows; - auto mutate_columns = block->mutate_columns(); + auto mutable_columns_guard = block->mutate_columns_scoped(); + auto& mutate_columns = mutable_columns_guard.mutable_columns(); for (auto& col : mutate_columns) { col->resize(rows); } - block->set_columns(std::move(mutate_columns)); *read_rows = rows; *eof = false; if (_remaining_table_level_row_count == 0) { @@ -117,6 +117,8 @@ Status PaimonCppReader::_do_get_next_block(Block* block, size_t* read_rows, bool auto record_batch = std::move(import_result).ValueUnsafe(); const auto num_rows = static_cast(record_batch->num_rows()); const auto num_columns = record_batch->num_columns(); + auto columns_guard = block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); for (int c = 0; c < num_columns; ++c) { const auto& field = record_batch->schema()->field(c); if (field->name() == VALUE_KIND_FIELD) { @@ -128,11 +130,13 @@ Status PaimonCppReader::_do_get_next_block(Block* block, size_t* read_rows, bool // Skip columns that are not in the block (e.g., partition columns handled elsewhere) continue; } - const ColumnWithTypeAndName& column_with_name = block->get_by_position(it->second); + const auto block_pos = it->second; try { - RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), record_batch->column(c).get(), 0, - num_rows, _ctzz)); + RETURN_IF_ERROR(columns_guard.get_datatype_by_position(block_pos) + ->get_serde() + ->read_column_from_arrow(*columns[block_pos], + record_batch->column(c).get(), 0, + num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError("Failed to convert from arrow to block: {}", e.what()); } diff --git a/be/src/format/table/paimon_jni_reader.cpp b/be/src/format/table/paimon_jni_reader.cpp index f50e59deb7ee3d..c2d43406504b73 100644 --- a/be/src/format/table/paimon_jni_reader.cpp +++ b/be/src/format/table/paimon_jni_reader.cpp @@ -105,11 +105,11 @@ Status PaimonJniReader::_do_get_next_block(Block* block, size_t* read_rows, bool auto rows = std::min(_remaining_table_level_row_count, (int64_t)_state->query_options().batch_size); _remaining_table_level_row_count -= rows; - auto mutate_columns = block->mutate_columns(); + auto mutable_columns_guard = block->mutate_columns_scoped(); + auto& mutate_columns = mutable_columns_guard.mutable_columns(); for (auto& col : mutate_columns) { col->resize(rows); } - block->set_columns(std::move(mutate_columns)); *read_rows = rows; if (_remaining_table_level_row_count == 0) { *eof = true; diff --git a/be/src/format/table/parquet_metadata_reader.cpp b/be/src/format/table/parquet_metadata_reader.cpp index 7df80f673cb602..c1e9b3372e261d 100644 --- a/be/src/format/table/parquet_metadata_reader.cpp +++ b/be/src/format/table/parquet_metadata_reader.cpp @@ -29,6 +29,7 @@ #include #include "core/block/block.h" +#include "core/column/column.h" #include "core/column/column_map.h" #include "core/column/column_nullable.h" #include "core/data_type/data_type_nullable.h" @@ -807,32 +808,31 @@ Status ParquetMetadataReader::_do_get_next_block(Block* block, size_t* read_rows // Scanner may call multiple times; we surface data once and mark eof on the next call. // When reusing a Block, wipe row data but keep column structure intact. - bool mem_reuse = block->mem_reuse(); - std::vector columns(_slots.size()); - if (mem_reuse) { - block->clear_column_data(); - for (size_t i = 0; i < _slots.size(); ++i) { - columns[i] = block->get_by_position(i).column->assume_mutable(); - } - } else { + const bool mem_reuse = block->mem_reuse(); + size_t produced = 0; + if (!mem_reuse) { + std::vector columns(_slots.size()); for (size_t i = 0; i < _slots.size(); ++i) { columns[i] = _slots[i]->get_empty_mutable_column(); } - } - size_t rows_before = block->rows(); - RETURN_IF_ERROR(_build_rows(columns)); - - if (!mem_reuse) { + RETURN_IF_ERROR(_build_rows(columns)); for (size_t i = 0; i < _slots.size(); ++i) { block->insert(ColumnWithTypeAndName( std::move(columns[i]), _slots[i]->get_data_type_ptr(), _slots[i]->col_name())); } + produced = block->rows(); } else { - columns.clear(); + auto columns_guard = block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); + for (size_t i = 0; i < _slots.size(); ++i) { + columns[i]->clear(); + } + + RETURN_IF_ERROR(_build_rows(columns)); + produced = columns[0]->size(); } - size_t produced = block->rows() - rows_before; *read_rows = produced; _eof = true; *eof = (produced == 0); diff --git a/be/src/format/table/remote_doris_reader.cpp b/be/src/format/table/remote_doris_reader.cpp index 5280b655a63ef8..0e2184d65b62f5 100644 --- a/be/src/format/table/remote_doris_reader.cpp +++ b/be/src/format/table/remote_doris_reader.cpp @@ -72,20 +72,24 @@ Status RemoteDorisReader::_do_get_next_block(Block* block, size_t* read_rows, bo auto batch = chunk.data; auto num_rows = batch->num_rows(); auto num_columns = batch->num_columns(); + const auto block_structure = block->dump_structure(); + auto columns_guard = block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); for (int c = 0; c < num_columns; ++c) { arrow::Array* column = batch->column(c).get(); std::string column_name = batch->schema()->field(c)->name(); if (!_col_name_to_block_idx->contains(column_name)) { return Status::InternalError("column {} not found in block {}", column_name, - block->dump_structure()); + block_structure); } try { - const ColumnWithTypeAndName& column_with_name = - block->get_by_position((*_col_name_to_block_idx)[column_name]); - RETURN_IF_ERROR(column_with_name.type->get_serde()->read_column_from_arrow( - column_with_name.column->assume_mutable_ref(), column, 0, num_rows, _ctzz)); + auto block_pos = (*_col_name_to_block_idx)[column_name]; + RETURN_IF_ERROR(columns_guard.get_datatype_by_position(block_pos) + ->get_serde() + ->read_column_from_arrow(*columns[block_pos], column, 0, + num_rows, _ctzz)); } catch (Exception& e) { return Status::InternalError( "Failed to convert from arrow to block, column_name: {}, e: {}", column_name, diff --git a/be/src/format/table/table_format_reader.h b/be/src/format/table/table_format_reader.h index 23587ab1095700..ed5b414aa44dc1 100644 --- a/be/src/format/table/table_format_reader.h +++ b/be/src/format/table/table_format_reader.h @@ -67,8 +67,9 @@ class TableFormatReader : public GenericReader { if (it == _fill_partition_values.end()) { continue; } - auto col_ptr = block->get_by_position((*_fill_col_name_to_block_idx)[col_name]) - .column->assume_mutable(); + auto column_guard = + block->mutate_column_scoped((*_fill_col_name_to_block_idx)[col_name]); + auto& col_ptr = column_guard.mutable_column(); const auto& [value, slot_desc] = it->second; auto text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); @@ -101,9 +102,9 @@ class TableFormatReader : public GenericReader { VExprContextSPtr ctx = (it != _fill_missing_defaults.end()) ? it->second : nullptr; if (ctx == nullptr) { - auto mutable_column = - block->get_by_position((*_fill_col_name_to_block_idx)[col_name]) - .column->assume_mutable(); + auto column_guard = + block->mutate_column_scoped((*_fill_col_name_to_block_idx)[col_name]); + auto& mutable_column = column_guard.mutable_column(); auto* nullable_column = static_cast(mutable_column.get()); nullable_column->insert_many_defaults(rows); } else { @@ -147,7 +148,7 @@ class TableFormatReader : public GenericReader { if (col_pos < 0) { continue; } - block->get_by_position(static_cast(col_pos)).column->assume_mutable()->clear(); + block->clear_column_data(std::vector {static_cast(col_pos)}); } return Status::OK(); } @@ -212,7 +213,7 @@ class TableFormatReader : public GenericReader { if (col_pos < 0) { continue; } - block->get_by_position(static_cast(col_pos)).column->assume_mutable()->clear(); + block->clear_column_data(std::vector {static_cast(col_pos)}); } return Status::OK(); } diff --git a/be/src/format/transformer/merge_partitioner.cpp b/be/src/format/transformer/merge_partitioner.cpp index f486c85476f0fc..89cf830d6bba53 100644 --- a/be/src/format/transformer/merge_partitioner.cpp +++ b/be/src/format/transformer/merge_partitioner.cpp @@ -210,7 +210,8 @@ Status MergePartitioner::do_partitioning(RuntimeState* state, Block* block) cons block->replace_by_position_if_const(col_idx); } - MutableColumns mutable_columns = block->mutate_columns(); + auto mutable_columns_guard = block->mutate_columns_scoped(); + MutableColumns& mutable_columns = mutable_columns_guard.mutable_columns(); MutableColumnPtr& op_mut = mutable_columns[op_idx]; ColumnInt8* op_values_col = nullptr; if (auto* nullable_col = check_and_get_column(op_mut.get())) { @@ -220,7 +221,6 @@ Status MergePartitioner::do_partitioning(RuntimeState* state, Block* block) cons op_values_col = check_and_get_column(op_mut.get()); } if (op_values_col == nullptr) { - block->set_columns(std::move(mutable_columns)); return Status::InternalError("Merge operation column must be tinyint"); } auto& op_values = op_values_col->get_data(); @@ -252,7 +252,6 @@ Status MergePartitioner::do_partitioning(RuntimeState* state, Block* block) cons _insert_random ? _next_rr_channel() : insert_hashes[row]; _channel_ids.push_back(insert_channel); } - block->set_columns(std::move(mutable_columns)); } return Status::OK(); diff --git a/be/src/information_schema/schema_active_queries_scanner.cpp b/be/src/information_schema/schema_active_queries_scanner.cpp index 00f0c5b5de763e..bceac0347b517f 100644 --- a/be/src/information_schema/schema_active_queries_scanner.cpp +++ b/be/src/information_schema/schema_active_queries_scanner.cpp @@ -131,7 +131,8 @@ Status SchemaActiveQueriesScanner::get_next_block_internal(Block* block, bool* e } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_active_query_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_authentication_integrations_scanner.cpp b/be/src/information_schema/schema_authentication_integrations_scanner.cpp index 95359b58264d15..37b9e6811baebb 100644 --- a/be/src/information_schema/schema_authentication_integrations_scanner.cpp +++ b/be/src/information_schema/schema_authentication_integrations_scanner.cpp @@ -134,7 +134,8 @@ Status SchemaAuthenticationIntegrationsScanner::get_next_block_internal(Block* b } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_authentication_integrations_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_backend_active_tasks.cpp b/be/src/information_schema/schema_backend_active_tasks.cpp index b41f116b7550af..5849a5ab438e9c 100644 --- a/be/src/information_schema/schema_backend_active_tasks.cpp +++ b/be/src/information_schema/schema_backend_active_tasks.cpp @@ -87,7 +87,8 @@ Status SchemaBackendActiveTasksScanner::get_next_block_internal(Block* block, bo } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_task_stats_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; @@ -95,4 +96,4 @@ Status SchemaBackendActiveTasksScanner::get_next_block_internal(Block* block, bo return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp b/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp index 3c7b1ec0bc5c9a..201262584f1304 100644 --- a/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp +++ b/be/src/information_schema/schema_backend_kerberos_ticket_cache.cpp @@ -83,7 +83,8 @@ Status SchemaBackendKerberosTicketCacheScanner::get_next_block_internal(Block* b } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_info_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp b/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp index fec899c252a933..d00c93a4c4eec9 100644 --- a/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp +++ b/be/src/information_schema/schema_catalog_meta_cache_stats_scanner.cpp @@ -143,7 +143,8 @@ Status SchemaCatalogMetaCacheStatsScanner::get_next_block_internal(Block* block, } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_database_properties_scanner.cpp b/be/src/information_schema/schema_database_properties_scanner.cpp index c73dd9301e056d..dcb4810f171bdb 100644 --- a/be/src/information_schema/schema_database_properties_scanner.cpp +++ b/be/src/information_schema/schema_database_properties_scanner.cpp @@ -147,7 +147,8 @@ Status SchemaDatabasePropertiesScanner::get_next_block_internal(Block* block, bo return Status::OK(); } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_dbproperties_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; if (!check_and_mark_eos(eos)) { diff --git a/be/src/information_schema/schema_file_cache_statistics.cpp b/be/src/information_schema/schema_file_cache_statistics.cpp index 0b69766bbeeae9..c8f1243c300ee2 100644 --- a/be/src/information_schema/schema_file_cache_statistics.cpp +++ b/be/src/information_schema/schema_file_cache_statistics.cpp @@ -75,7 +75,8 @@ Status SchemaFileCacheStatisticsScanner::get_next_block_internal(Block* block, b } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_stats_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_partitions_scanner.cpp b/be/src/information_schema/schema_partitions_scanner.cpp index 834fd928f7126e..4939fb2970247f 100644 --- a/be/src/information_schema/schema_partitions_scanner.cpp +++ b/be/src/information_schema/schema_partitions_scanner.cpp @@ -208,7 +208,8 @@ Status SchemaPartitionsScanner::get_next_block_internal(Block* block, bool* eos) } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_partitions_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_role_mappings_scanner.cpp b/be/src/information_schema/schema_role_mappings_scanner.cpp index 31e58e6cbe9fb5..99e5211fbd88a0 100644 --- a/be/src/information_schema/schema_role_mappings_scanner.cpp +++ b/be/src/information_schema/schema_role_mappings_scanner.cpp @@ -132,7 +132,8 @@ Status SchemaRoleMappingsScanner::get_next_block_internal(Block* block, bool* eo } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_role_mappings_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_scanner.cpp b/be/src/information_schema/schema_scanner.cpp index c056617e3972e0..b2b38c83d66410 100644 --- a/be/src/information_schema/schema_scanner.cpp +++ b/be/src/information_schema/schema_scanner.cpp @@ -96,6 +96,24 @@ namespace doris { class ObjectPool; +namespace { + +void insert_column_range(ColumnWithTypeAndName* dst, const ColumnWithTypeAndName& src, size_t start, + size_t length) { + DORIS_CHECK(dst->column.get() != nullptr); + DORIS_CHECK(src.column.get() != nullptr); + MutableColumnPtr dst_column = IColumn::mutate(std::move(dst->column)); + ColumnPtr src_column = src.column->convert_to_full_column_if_const(); + if (dst_column->is_nullable() && !src_column->is_nullable()) { + src_column = make_nullable(src_column); + } + DORIS_CHECK(dst_column->is_nullable() == src_column->is_nullable()); + dst_column->insert_range_from(*src_column, start, length); + dst->column = std::move(dst_column); +} + +} // namespace + SchemaScanner::SchemaScanner(const std::vector& columns, TSchemaTableType::type type) : _is_init(false), _columns(columns), _schema_table_type(type) {} @@ -116,10 +134,8 @@ Status SchemaScanner::get_next_block(RuntimeState* state, Block* block, bool* eo DCHECK(_async_thread_running == false); RETURN_IF_ERROR(_scanner_status.status()); for (size_t i = 0; i < block->columns(); i++) { - std::move(*block->get_by_position(i).column) - .mutate() - ->insert_range_from(*_data_block->get_by_position(i).column, 0, - _data_block->rows()); + insert_column_range(&block->get_by_position(i), _data_block->get_by_position(i), 0, + _data_block->rows()); } _data_block->clear_column_data(); *eos = _eos; @@ -298,11 +314,10 @@ void SchemaScanner::_init_block(Block* src_block) { Status SchemaScanner::fill_dest_column_for_range(Block* block, size_t pos, const std::vector& datas) { const ColumnDesc& col_desc = _columns[pos]; - MutableColumnPtr column_ptr; - column_ptr = std::move(*block->get_by_position(pos).column).assume_mutable(); + MutableColumnPtr column_ptr = IColumn::mutate(std::move(block->get_by_position(pos).column)); IColumn* col_ptr = column_ptr.get(); - auto* nullable_column = reinterpret_cast(col_ptr); + auto* nullable_column = assert_cast(col_ptr); // Resize in advance to improve insertion efficiency. size_t fill_num = datas.size(); @@ -443,6 +458,7 @@ Status SchemaScanner::fill_dest_column_for_range(Block* block, size_t pos, } } } + block->replace_by_position(pos, std::move(column_ptr)); return Status::OK(); } @@ -457,8 +473,8 @@ std::string SchemaScanner::get_db_from_full_name(const std::string& full_name) { Status SchemaScanner::insert_block_column(TCell cell, int col_index, Block* block, PrimitiveType type) { MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable(); - auto* nullable_column = reinterpret_cast(mutable_col_ptr.get()); + mutable_col_ptr = IColumn::mutate(std::move(block->get_by_position(col_index).column)); + auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); switch (type) { @@ -513,6 +529,7 @@ Status SchemaScanner::insert_block_column(TCell cell, int col_index, Block* bloc } } nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); return Status::OK(); } diff --git a/be/src/information_schema/schema_scanner_helper.cpp b/be/src/information_schema/schema_scanner_helper.cpp index 9ec2cdcd7cbaa2..7907dc264b66fd 100644 --- a/be/src/information_schema/schema_scanner_helper.cpp +++ b/be/src/information_schema/schema_scanner_helper.cpp @@ -19,6 +19,7 @@ #include "cctz/time_zone.h" #include "core/block/block.h" +#include "core/column/column_nullable.h" #include "core/data_type/data_type_factory.hpp" #include "core/data_type/primitive_type.h" #include "core/string_ref.h" @@ -31,29 +32,31 @@ namespace doris { void SchemaScannerHelper::insert_string_value(int col_index, std::string_view str_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_data(str_val.data(), str_val.size()); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_datetime_value(int col_index, const std::vector& datas, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); auto data = datas[0]; assert_cast(col_ptr)->insert_data(reinterpret_cast(data), 0); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_datetime_value(int col_index, int64_t timestamp, const cctz::time_zone& ctz, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); @@ -64,41 +67,46 @@ void SchemaScannerHelper::insert_datetime_value(int col_index, int64_t timestamp auto data = datas[0]; assert_cast(col_ptr)->insert_data(reinterpret_cast(data), 0); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_bool_value(int col_index, bool bool_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(bool_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_int32_value(int col_index, int32_t int_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(int_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_int64_value(int col_index, int64_t int_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(int_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } void SchemaScannerHelper::insert_double_value(int col_index, double double_val, Block* block) { - MutableColumnPtr mutable_col_ptr; - mutable_col_ptr = block->get_by_position(col_index).column->assume_mutable(); + MutableColumnPtr mutable_col_ptr = + IColumn::mutate(std::move(block->get_by_position(col_index).column)); auto* nullable_column = assert_cast(mutable_col_ptr.get()); IColumn* col_ptr = &nullable_column->get_nested_column(); assert_cast(col_ptr)->insert_value(double_val); nullable_column->push_false_to_nullmap(1); + block->replace_by_position(col_index, std::move(mutable_col_ptr)); } } // namespace doris diff --git a/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp b/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp index 2a223c144ba5fa..f52d5399bc4df7 100644 --- a/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp +++ b/be/src/information_schema/schema_sql_block_rule_status_scanner.cpp @@ -167,7 +167,8 @@ Status SchemaSqlBlockRuleStatusScanner::get_next_block_internal(Block* block, bo } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR( mblock.add_rows(_sql_block_rule_status_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_table_options_scanner.cpp b/be/src/information_schema/schema_table_options_scanner.cpp index 096f0860bfc3bd..e102853429b868 100644 --- a/be/src/information_schema/schema_table_options_scanner.cpp +++ b/be/src/information_schema/schema_table_options_scanner.cpp @@ -165,7 +165,8 @@ Status SchemaTableOptionsScanner::get_next_block_internal(Block* block, bool* eo } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_tableoptions_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_table_properties_scanner.cpp b/be/src/information_schema/schema_table_properties_scanner.cpp index 0affe500b35f7b..cd6680be7601a4 100644 --- a/be/src/information_schema/schema_table_properties_scanner.cpp +++ b/be/src/information_schema/schema_table_properties_scanner.cpp @@ -159,7 +159,8 @@ Status SchemaTablePropertiesScanner::get_next_block_internal(Block* block, bool* } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_tableproperties_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_table_stream_consumption_scanner.cpp b/be/src/information_schema/schema_table_stream_consumption_scanner.cpp index c2c5ceab41ceb2..5259b7b8db145b 100644 --- a/be/src/information_schema/schema_table_stream_consumption_scanner.cpp +++ b/be/src/information_schema/schema_table_stream_consumption_scanner.cpp @@ -129,7 +129,8 @@ Status SchemaTableStreamConsumptionScanner::get_next_block_internal(Block* block } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR( mblock.add_rows(_table_stream_consumption_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; @@ -138,4 +139,4 @@ Status SchemaTableStreamConsumptionScanner::get_next_block_internal(Block* block return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_table_streams_scanner.cpp b/be/src/information_schema/schema_table_streams_scanner.cpp index 288d4e56c9a876..0c9697341ae929 100644 --- a/be/src/information_schema/schema_table_streams_scanner.cpp +++ b/be/src/information_schema/schema_table_streams_scanner.cpp @@ -130,7 +130,8 @@ Status SchemaTableStreamsScanner::get_next_block_internal(Block* block, bool* eo } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_table_streams_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; @@ -138,4 +139,4 @@ Status SchemaTableStreamsScanner::get_next_block_internal(Block* block, bool* eo return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_view_dependency_scanner.cpp b/be/src/information_schema/schema_view_dependency_scanner.cpp index 1aa6ce614312f7..2a7f684b0e1f2b 100644 --- a/be/src/information_schema/schema_view_dependency_scanner.cpp +++ b/be/src/information_schema/schema_view_dependency_scanner.cpp @@ -131,7 +131,8 @@ Status SchemaViewDependencyScanner::get_next_block_internal(Block* block, bool* } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_view_dependency_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_workload_group_privileges.cpp b/be/src/information_schema/schema_workload_group_privileges.cpp index d0dab55965c3d1..627344036abc82 100644 --- a/be/src/information_schema/schema_workload_group_privileges.cpp +++ b/be/src/information_schema/schema_workload_group_privileges.cpp @@ -125,7 +125,8 @@ Status SchemaWorkloadGroupPrivilegesScanner::get_next_block_internal(Block* bloc } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR( mblock.add_rows(_workload_groups_privs_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp b/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp index 175b1dbd080e81..ada0151d14b190 100644 --- a/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp +++ b/be/src/information_schema/schema_workload_group_resource_usage_scanner.cpp @@ -78,7 +78,8 @@ Status SchemaBackendWorkloadGroupResourceUsage::get_next_block_internal(Block* b } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; @@ -86,4 +87,4 @@ Status SchemaBackendWorkloadGroupResourceUsage::get_next_block_internal(Block* b return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/information_schema/schema_workload_groups_scanner.cpp b/be/src/information_schema/schema_workload_groups_scanner.cpp index 5ad1b744e975e6..7375809b45e538 100644 --- a/be/src/information_schema/schema_workload_groups_scanner.cpp +++ b/be/src/information_schema/schema_workload_groups_scanner.cpp @@ -137,7 +137,8 @@ Status SchemaWorkloadGroupsScanner::get_next_block_internal(Block* block, bool* } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_workload_groups_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/information_schema/schema_workload_sched_policy_scanner.cpp b/be/src/information_schema/schema_workload_sched_policy_scanner.cpp index 040b747bb435c4..eb82b26b8769f6 100644 --- a/be/src/information_schema/schema_workload_sched_policy_scanner.cpp +++ b/be/src/information_schema/schema_workload_sched_policy_scanner.cpp @@ -127,7 +127,8 @@ Status SchemaWorkloadSchedulePolicyScanner::get_next_block_internal(Block* block } int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); - MutableBlock mblock = MutableBlock::build_mutable_block(block); + ScopedMutableBlock scoped_mblock(block); + auto& mblock = scoped_mblock.mutable_block(); RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); _row_idx += current_batch_rows; diff --git a/be/src/load/memtable/memtable.cpp b/be/src/load/memtable/memtable.cpp index 588d8543d7b4b4..9f5970a70b1568 100644 --- a/be/src/load/memtable/memtable.cpp +++ b/be/src/load/memtable/memtable.cpp @@ -201,9 +201,10 @@ Status MemTable::insert(const Block* input_block, const DorisVector& r if (_is_first_insertion) { _is_first_insertion = false; auto clone_block = input_block->clone_without_columns(&_column_offset); - _input_mutable_block = MutableBlock::build_mutable_block(&clone_block); + _input_mutable_block = MutableBlock::build_mutable_block(std::move(clone_block)); _vec_row_comparator->set_block(&_input_mutable_block); - _output_mutable_block = MutableBlock::build_mutable_block(&clone_block); + clone_block = input_block->clone_without_columns(&_column_offset); + _output_mutable_block = MutableBlock::build_mutable_block(std::move(clone_block)); if (_tablet_schema->has_sequence_col()) { if (_partial_update_mode == UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS) { // for unique key fixed partial update, sequence column index in block @@ -390,9 +391,9 @@ Status MemTable::_sort_by_cluster_keys() { _stat.sort_times++; // sort all rows Block in_block = _output_mutable_block.to_block(); - MutableBlock mutable_block = MutableBlock::build_mutable_block(&in_block); auto clone_block = in_block.clone_without_columns(); - _output_mutable_block = MutableBlock::build_mutable_block(&clone_block); + MutableBlock mutable_block = MutableBlock::build_mutable_block(std::move(in_block)); + _output_mutable_block = MutableBlock::build_mutable_block(std::move(clone_block)); DorisVector> row_in_blocks; row_in_blocks.reserve(mutable_block.rows()); @@ -453,12 +454,11 @@ void MemTable::_sort_one_column(DorisVector>& row_in } template -void MemTable::_finalize_one_row(RowInBlock* row, const ColumnsWithTypeAndName& block_data, - int row_pos) { +void MemTable::_finalize_one_row(RowInBlock* row, MutableBlock& mutable_block, int row_pos) { // move key columns for (size_t i = 0; i < _tablet_schema->num_key_columns(); ++i) { - _output_mutable_block.get_column_by_position(i)->insert_from(*block_data[i].column.get(), - row->_row_pos); + _output_mutable_block.get_column_by_position(i)->insert_from( + *mutable_block.get_column_by_position(i), row->_row_pos); } if (row->has_init_agg()) { // get value columns from agg_places @@ -490,7 +490,7 @@ void MemTable::_finalize_one_row(RowInBlock* row, const ColumnsWithTypeAndName& // move columns for rows do not need agg for (size_t i = _tablet_schema->num_key_columns(); i < _num_columns; ++i) { _output_mutable_block.get_column_by_position(i)->insert_from( - *block_data[i].column.get(), row->_row_pos); + *mutable_block.get_column_by_position(i), row->_row_pos); } } if constexpr (!is_final) { @@ -525,9 +525,9 @@ void MemTable::_aggregate() { SCOPED_RAW_TIMER(&_stat.agg_ns); _stat.agg_times++; Block in_block = _input_mutable_block.to_block(); - MutableBlock mutable_block = MutableBlock::build_mutable_block(&in_block); + std::unique_ptr empty_input_block = in_block.create_same_struct_block(0); + MutableBlock mutable_block = MutableBlock::build_mutable_block(std::move(in_block)); _vec_row_comparator->set_block(&mutable_block); - auto& block_data = in_block.get_columns_with_type_and_name(); DorisVector> temp_row_in_blocks; temp_row_in_blocks.reserve(_last_sorted_pos); //only init agg if needed @@ -558,7 +558,7 @@ void MemTable::_aggregate() { if (!temp_row_in_blocks.empty()) { // The rows from the previous batch of _row_in_blocks have been merged into temp_row_in_blocks, // now call finalize to write the aggregation results into _output_mutable_block. - _finalize_one_row(temp_row_in_blocks.back().get(), block_data, + _finalize_one_row(temp_row_in_blocks.back().get(), mutable_block, row_pos); } temp_row_in_blocks.push_back(cur_row_ptr); @@ -567,15 +567,15 @@ void MemTable::_aggregate() { } if (!temp_row_in_blocks.empty()) { // finalize the last low - _finalize_one_row(temp_row_in_blocks.back().get(), block_data, row_pos); + _finalize_one_row(temp_row_in_blocks.back().get(), mutable_block, row_pos); } } else { DCHECK(_delete_sign_col_idx != -1); if (_seq_col_idx_in_block == -1) { - _aggregate_for_flexible_partial_update_without_seq_col( - block_data, mutable_block, temp_row_in_blocks); + _aggregate_for_flexible_partial_update_without_seq_col(mutable_block, + temp_row_in_blocks); } else { - _aggregate_for_flexible_partial_update_with_seq_col(block_data, mutable_block, + _aggregate_for_flexible_partial_update_with_seq_col(mutable_block, temp_row_in_blocks); } } @@ -583,8 +583,7 @@ void MemTable::_aggregate() { // if is not final, we collect the agg results to input_block and then continue to insert _input_mutable_block.swap(_output_mutable_block); //TODO(weixang):opt here. - std::unique_ptr empty_input_block = in_block.create_same_struct_block(0); - _output_mutable_block = MutableBlock::build_mutable_block(empty_input_block.get()); + _output_mutable_block = MutableBlock::build_mutable_block(std::move(*empty_input_block)); _output_mutable_block.clear_column_data(); *_row_in_blocks = temp_row_in_blocks; _last_sorted_pos = _row_in_blocks->size(); @@ -593,8 +592,7 @@ void MemTable::_aggregate() { template void MemTable::_aggregate_for_flexible_partial_update_without_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, - DorisVector>& temp_row_in_blocks) { + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks) { std::shared_ptr prev_row {nullptr}; int row_pos = -1; auto& skip_bitmaps = @@ -609,12 +607,12 @@ void MemTable::_aggregate_for_flexible_partial_update_without_seq_col( auto finalize_rows = [&]() { if (row_with_delete_sign != nullptr) { temp_row_in_blocks.push_back(row_with_delete_sign); - _finalize_one_row(row_with_delete_sign.get(), block_data, ++row_pos); + _finalize_one_row(row_with_delete_sign.get(), mutable_block, ++row_pos); row_with_delete_sign = nullptr; } if (row_without_delete_sign != nullptr) { temp_row_in_blocks.push_back(row_without_delete_sign); - _finalize_one_row(row_without_delete_sign.get(), block_data, ++row_pos); + _finalize_one_row(row_without_delete_sign.get(), mutable_block, ++row_pos); row_without_delete_sign = nullptr; } // _arena.clear(); @@ -670,15 +668,14 @@ void MemTable::_aggregate_for_flexible_partial_update_without_seq_col( template void MemTable::_aggregate_for_flexible_partial_update_with_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, - DorisVector>& temp_row_in_blocks) { + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks) { // For flexible partial update, when table has sequence column, we don't do any aggregation // in memtable. These duplicate rows will be aggregated in VerticalSegmentWriter int row_pos = -1; for (const auto& row_ptr : *_row_in_blocks) { RowInBlock* row = row_ptr.get(); temp_row_in_blocks.push_back(row_ptr); - _finalize_one_row(row, block_data, ++row_pos); + _finalize_one_row(row, mutable_block, ++row_pos); } } diff --git a/be/src/load/memtable/memtable.h b/be/src/load/memtable/memtable.h index 42f96dd4f5f769..ad20667527fed1 100644 --- a/be/src/load/memtable/memtable.h +++ b/be/src/load/memtable/memtable.h @@ -262,7 +262,7 @@ class MemTable { void _sort_one_column(DorisVector>& row_in_blocks, Tie& tie, std::function cmp); template - void _finalize_one_row(RowInBlock* row, const ColumnsWithTypeAndName& block_data, int row_pos); + void _finalize_one_row(RowInBlock* row, MutableBlock& mutable_block, int row_pos); void _init_row_for_agg(RowInBlock* row, MutableBlock& mutable_block); void _clear_row_agg(RowInBlock* row); @@ -271,12 +271,12 @@ class MemTable { template void _aggregate_for_flexible_partial_update_without_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks); template void _aggregate_for_flexible_partial_update_with_seq_col( - const ColumnsWithTypeAndName& block_data, MutableBlock& mutable_block, + MutableBlock& mutable_block, DorisVector>& temp_row_in_blocks); Status _put_into_output(Block& in_block); diff --git a/be/src/runtime/query_cache/query_cache.cpp b/be/src/runtime/query_cache/query_cache.cpp index d79acfa7ef788d..28610d44808686 100644 --- a/be/src/runtime/query_cache/query_cache.cpp +++ b/be/src/runtime/query_cache/query_cache.cpp @@ -17,6 +17,8 @@ #include "runtime/query_cache/query_cache.h" +#include "common/logging.h" + namespace doris { std::vector* QueryCacheHandle::get_cache_slot_orders() { @@ -43,7 +45,10 @@ void QueryCache::insert(const CacheKey& key, int64_t version, CacheResult& res, CacheResult cache_result; for (auto& block_data : res) { cache_result.emplace_back(Block::create_unique())->swap(block_data->clone_empty()); - (void)MutableBlock(cache_result.back().get()).merge(*block_data); + ScopedMutableBlock scoped_mutable_block(cache_result.back().get()); + auto& mutable_block = scoped_mutable_block.mutable_block(); + auto st = mutable_block.merge(*block_data); + DORIS_CHECK(st.ok()); } auto cache_value_ptr = std::make_unique(version, std::move(cache_result), slot_orders); diff --git a/be/src/runtime/result_block_buffer.cpp b/be/src/runtime/result_block_buffer.cpp index ba7f135ce762d5..d644eb03b6184d 100644 --- a/be/src/runtime/result_block_buffer.cpp +++ b/be/src/runtime/result_block_buffer.cpp @@ -214,9 +214,11 @@ Status ResultBlockBuffer::add_batch(RuntimeState* state, (batch_size + _last_batch_bytes) <= config::thrift_max_message_size) { if constexpr (std::is_same_v) { auto last_block = _result_batch_queue.back(); + auto mutable_columns_guard = last_block->mutate_columns_scoped(); + auto& mutable_columns = mutable_columns_guard.mutable_columns(); for (size_t i = 0; i < last_block->columns(); i++) { - last_block->mutate_columns()[i]->insert_range_from( - *result->get_by_position(i).column, 0, num_rows); + mutable_columns[i]->insert_range_from(*result->get_by_position(i).column, 0, + num_rows); } } else { std::vector& back_rows = diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index af34a3fe1d4cfc..433cabb777defe 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -54,6 +54,7 @@ #include "storage/segment/column_reader.h" #include "storage/tablet/tablet_schema.h" #include "storage/utils.h" +#include "util/defer_op.h" #include "util/jsonb/serialize.h" #include "util/lru_cache.h" #include "util/simd/bits.h" @@ -498,96 +499,99 @@ Status PointQueryExecutor::_lookup_row_key() { Status PointQueryExecutor::_lookup_row_data() { // 3. get values SCOPED_TIMER(&_profile_metrics.lookup_data_ns); - for (size_t i = 0; i < _row_read_ctxs.size(); ++i) { - if (_row_read_ctxs[i]._cached_row_data.valid()) { - RETURN_IF_ERROR(JsonbSerializeUtil::jsonb_to_block( - _reusable->get_data_type_serdes(), - _row_read_ctxs[i]._cached_row_data.data().data, - _row_read_ctxs[i]._cached_row_data.data().size, _reusable->get_col_uid_to_idx(), - *_result_block, _reusable->get_col_default_values(), - _reusable->include_col_uids())); - continue; - } - if (!_row_read_ctxs[i]._row_location.has_value()) { - continue; - } - std::string value; - // fill block by row store - if (_reusable->rs_column_uid() != -1) { - bool use_row_cache = !config::disable_storage_row_cache; - RETURN_IF_ERROR(_tablet->lookup_row_data( - _row_read_ctxs[i]._primary_key, _row_read_ctxs[i]._row_location.value(), - *(_row_read_ctxs[i]._rowset_ptr), _profile_metrics.read_stats, value, - use_row_cache)); - // serilize value to block, currently only jsonb row formt - RETURN_IF_ERROR(JsonbSerializeUtil::jsonb_to_block( - _reusable->get_data_type_serdes(), value.data(), value.size(), - _reusable->get_col_uid_to_idx(), *_result_block, - _reusable->get_col_default_values(), _reusable->include_col_uids())); - } - if (!_reusable->missing_col_uids().empty()) { - if (!_reusable->runtime_state()->enable_short_circuit_query_access_column_store()) { - std::string missing_columns; - for (int cid : _reusable->missing_col_uids()) { - missing_columns += _tablet->tablet_schema()->column_by_uid(cid).name() + ","; - } - return Status::InternalError( - "Not support column store, set store_row_column=true or row_store_columns " - "in table " - "properties, missing columns: " + - missing_columns + " should be added to row store"); + { + auto result_columns_guard = _result_block->mutate_columns_scoped(); + MutableColumns& result_columns = result_columns_guard.mutable_columns(); + for (size_t i = 0; i < _row_read_ctxs.size(); ++i) { + if (_row_read_ctxs[i]._cached_row_data.valid()) { + RETURN_IF_ERROR(JsonbSerializeUtil::jsonb_to_columns( + _reusable->get_data_type_serdes(), + _row_read_ctxs[i]._cached_row_data.data().data, + _row_read_ctxs[i]._cached_row_data.data().size, + _reusable->get_col_uid_to_idx(), result_columns, + _reusable->get_col_default_values(), _reusable->include_col_uids())); + continue; + } + if (!_row_read_ctxs[i]._row_location.has_value()) { + continue; } - // fill missing columns by column store - RowLocation row_loc = _row_read_ctxs[i]._row_location.value(); - BetaRowsetSharedPtr rowset = - std::static_pointer_cast(_tablet->get_rowset(row_loc.rowset_id)); - SegmentCacheHandle segment_cache; - { - SCOPED_TIMER(&_profile_metrics.load_segment_data_stage_ns); - RETURN_IF_ERROR( - SegmentLoader::instance()->load_segments(rowset, &segment_cache, true)); + std::string value; + // fill block by row store + if (_reusable->rs_column_uid() != -1) { + bool use_row_cache = !config::disable_storage_row_cache; + RETURN_IF_ERROR(_tablet->lookup_row_data( + _row_read_ctxs[i]._primary_key, _row_read_ctxs[i]._row_location.value(), + *(_row_read_ctxs[i]._rowset_ptr), _profile_metrics.read_stats, value, + use_row_cache)); + // serialize value to block, currently only jsonb row format + RETURN_IF_ERROR(JsonbSerializeUtil::jsonb_to_columns( + _reusable->get_data_type_serdes(), value.data(), value.size(), + _reusable->get_col_uid_to_idx(), result_columns, + _reusable->get_col_default_values(), _reusable->include_col_uids())); } - // find segment - auto it = std::find_if(segment_cache.get_segments().cbegin(), - segment_cache.get_segments().cend(), - [&](const segment_v2::SegmentSharedPtr& seg) { - return seg->id() == row_loc.segment_id; - }); - const auto& segment = *it; - for (int cid : _reusable->missing_col_uids()) { - int pos = _reusable->get_col_uid_to_idx().at(cid); - std::vector row_ids { - static_cast(row_loc.row_id)}; - MutableColumnPtr column = - _result_block->get_by_position(pos).column->assume_mutable(); - std::unique_ptr iter; - SlotDescriptor* slot = _reusable->tuple_desc()->slots()[pos]; - StorageReadOptions storage_read_options; - storage_read_options.stats = &_read_stats; - storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY; - RETURN_IF_ERROR(segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot, - row_ids, column, - storage_read_options, iter)); - if (_tablet->tablet_schema() - ->column_by_uid(slot->col_unique_id()) - .has_char_type()) { - column->shrink_padding_chars(); + if (!_reusable->missing_col_uids().empty()) { + if (!_reusable->runtime_state()->enable_short_circuit_query_access_column_store()) { + std::string missing_columns; + for (int cid : _reusable->missing_col_uids()) { + missing_columns += + _tablet->tablet_schema()->column_by_uid(cid).name() + ","; + } + return Status::InternalError( + "Not support column store, set store_row_column=true or " + "row_store_columns in table properties, missing columns: " + + missing_columns + " should be added to row store"); + } + // fill missing columns by column store + RowLocation row_loc = _row_read_ctxs[i]._row_location.value(); + BetaRowsetSharedPtr rowset = std::static_pointer_cast( + _tablet->get_rowset(row_loc.rowset_id)); + SegmentCacheHandle segment_cache; + { + SCOPED_TIMER(&_profile_metrics.load_segment_data_stage_ns); + RETURN_IF_ERROR( + SegmentLoader::instance()->load_segments(rowset, &segment_cache, true)); + } + // find segment + auto it = std::find_if(segment_cache.get_segments().cbegin(), + segment_cache.get_segments().cend(), + [&](const segment_v2::SegmentSharedPtr& seg) { + return seg->id() == row_loc.segment_id; + }); + const auto& segment = *it; + for (int cid : _reusable->missing_col_uids()) { + int pos = _reusable->get_col_uid_to_idx().at(cid); + std::vector row_ids { + static_cast(row_loc.row_id)}; + auto& column = result_columns[pos]; + std::unique_ptr iter; + SlotDescriptor* slot = _reusable->tuple_desc()->slots()[pos]; + StorageReadOptions storage_read_options; + storage_read_options.stats = &_read_stats; + storage_read_options.io_ctx.reader_type = ReaderType::READER_QUERY; + auto st = segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot, + row_ids, column, storage_read_options, + iter); + if (st.ok() && _tablet->tablet_schema() + ->column_by_uid(slot->col_unique_id()) + .has_char_type()) { + column->shrink_padding_chars(); + } + RETURN_IF_ERROR(st); } } } - } - if (_result_block->columns() > _reusable->include_col_uids().size()) { - // Padding rows for some columns that no need to output to mysql client - // eg. SELECT k1,v1,v2 FROM TABLE WHERE k1 = 1, k1 is not in output slots, tuple as bellow - // TupleDescriptor{id=1, tbl=table_with_column_group} - // SlotDescriptor{id=8, col=v1, colUniqueId=1 ...} - // SlotDescriptor{id=9, col=v2, colUniqueId=2 ...} - // thus missing in include_col_uids and missing_col_uids - for (size_t i = 0; i < _result_block->columns(); ++i) { - auto column = _result_block->get_by_position(i).column; - int padding_rows = _row_hits - cast_set(column->size()); - if (padding_rows > 0) { - column->assume_mutable()->insert_many_defaults(padding_rows); + if (result_columns.size() > _reusable->include_col_uids().size()) { + // Padding rows for some columns that no need to output to mysql client + // eg. SELECT k1,v1,v2 FROM TABLE WHERE k1 = 1, k1 is not in output slots, tuple as bellow + // TupleDescriptor{id=1, tbl=table_with_column_group} + // SlotDescriptor{id=8, col=v1, colUniqueId=1 ...} + // SlotDescriptor{id=9, col=v2, colUniqueId=2 ...} + // thus missing in include_col_uids and missing_col_uids + for (auto& column : result_columns) { + int padding_rows = _row_hits - cast_set(column->size()); + if (padding_rows > 0) { + column->insert_many_defaults(padding_rows); + } } } } diff --git a/be/src/storage/iterator/block_reader.cpp b/be/src/storage/iterator/block_reader.cpp index e50ca8a9c831b1..c9af7fed63bf2d 100644 --- a/be/src/storage/iterator/block_reader.cpp +++ b/be/src/storage/iterator/block_reader.cpp @@ -167,8 +167,8 @@ Status BlockReader::_init_agg_state(const ReaderParams& read_params) { return Status::OK(); } - _stored_data_columns = - _next_row.block->create_same_struct_block(batch_max_rows())->mutate_columns(); + auto stored_block = _next_row.block->create_same_struct_block(batch_max_rows()); + _stored_data_columns = std::move(*stored_block).mutate_columns(); _stored_has_null_tag.resize(_stored_data_columns.size()); _stored_has_variable_length_tag.resize(_stored_data_columns.size()); @@ -344,7 +344,8 @@ Status BlockReader::_replace_key_next_block(Block* block, bool* eof) { } auto target_block_row = 0; - auto target_columns = block->mutate_columns(); + auto target_columns_guard = block->mutate_columns_scoped(); + auto& target_columns = target_columns_guard.mutable_columns(); // currently seq mapping only support mor table // so this will not be executed for the time being if (UNLIKELY(_reader_context.record_rowids)) { @@ -476,7 +477,8 @@ Status BlockReader::_agg_key_next_block(Block* block, bool* eof) { auto target_block_row = 0; auto merged_row = 0; - auto target_columns = block->mutate_columns(); + auto target_columns_guard = block->mutate_columns_scoped(); + auto& target_columns = target_columns_guard.mutable_columns(); RETURN_IF_ERROR(_insert_data_normal(target_columns)); target_block_row++; _append_agg_data(target_columns); @@ -520,7 +522,6 @@ Status BlockReader::_agg_key_next_block(Block* block, bool* eof) { _agg_data_counters.push_back(_last_agg_data_counter); _last_agg_data_counter = 0; _update_agg_data(target_columns); - block->set_columns(std::move(target_columns)); _merged_rows += merged_row; return Status::OK(); @@ -533,7 +534,8 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) { } auto target_block_row = 0; - auto target_columns = block->mutate_columns(); + auto target_columns_guard = block->mutate_columns_scoped(); + auto& target_columns = target_columns_guard.mutable_columns(); if (UNLIKELY(_reader_context.record_rowids)) { _block_row_locations.resize(batch_max_rows()); } @@ -580,9 +582,10 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) { LOG(WARNING) << "tablet_id: " << tablet()->tablet_id() << " delete sign idx " << delete_sign_idx << " not invalid, skip filter delete in base compaction"; + target_columns_guard.restore(); return Status::OK(); } - MutableColumnPtr delete_filter_column = (*std::move(_delete_filter_column)).mutate(); + auto delete_filter_column = IColumn::mutate(std::move(_delete_filter_column)); reinterpret_cast(delete_filter_column.get())->resize(target_block_row); auto* __restrict filter_data = @@ -603,18 +606,17 @@ Status BlockReader::_unique_key_next_block(Block* block, bool* eof) { } } auto target_columns_size = target_columns.size(); + _delete_filter_column = std::move(delete_filter_column); ColumnWithTypeAndName column_with_type_and_name {_delete_filter_column, std::make_shared(), "__DORIS_COMPACTION_FILTER__"}; - block->set_columns(std::move(target_columns)); + target_columns_guard.restore(); block->insert(column_with_type_and_name); RETURN_IF_ERROR(Block::filter_block(block, target_columns_size, target_columns_size)); _stats.rows_del_filtered += target_block_row - block->rows(); if (UNLIKELY(_reader_context.record_rowids)) { DCHECK_EQ(_block_row_locations.size(), block->rows() + delete_count); } - } else { - block->set_columns(std::move(target_columns)); } return Status::OK(); } diff --git a/be/src/storage/iterator/vcollect_iterator.cpp b/be/src/storage/iterator/vcollect_iterator.cpp index b26e5c5047a5fd..1c8954168310a0 100644 --- a/be/src/storage/iterator/vcollect_iterator.cpp +++ b/be/src/storage/iterator/vcollect_iterator.cpp @@ -292,7 +292,7 @@ Status VCollectIterator::_topn_next(Block* block) { } } } - MutableBlock mutable_block = MutableBlock::build_mutable_block(&clone_block); + MutableBlock mutable_block = MutableBlock::build_mutable_block(std::move(clone_block)); const std::vector* sort_columns = _reader->_reader_context.read_orderby_key_columns; for (auto column_idx : *sort_columns) { @@ -413,7 +413,7 @@ Status VCollectIterator::_topn_next(Block* block) { << mutable_block.rows() << " rows"; Block tmp_block = mutable_block.to_block(); clone_block = tmp_block.clone_empty(); - mutable_block = MutableBlock::build_mutable_block(&clone_block); + mutable_block = MutableBlock::build_mutable_block(std::move(clone_block)); for (auto it = sorted_row_pos.begin(); it != sorted_row_pos.end(); it++) { mutable_block.add_row(&tmp_block, cast_set(*it)); } @@ -843,8 +843,6 @@ bool VCollectIterator::Level1Iterator::collected_enough_rows(const MutableColumn Status VCollectIterator::Level1Iterator::_merge_next(Block* block) { SCOPED_RAW_TIMER(&_reader->_stats.collect_iterator_merge_next_timer); int target_block_row = 0; - auto target_columns = block->mutate_columns(); - size_t column_count = target_columns.size(); IteratorRowRef cur_row = _ref; IteratorRowRef pre_row_ref = _ref; @@ -852,6 +850,9 @@ Status VCollectIterator::Level1Iterator::_merge_next(Block* block) { for (size_t i = block->columns(); i < cur_row.block->columns(); ++i) { block->insert(cur_row.block->get_by_position(i).clone_empty()); } + auto target_columns_guard = block->mutate_columns_scoped(); + auto& target_columns = target_columns_guard.mutable_columns(); + size_t column_count = target_columns.size(); auto batch_size = _reader->batch_max_rows(); if (UNLIKELY(_reader->_reader_context.record_rowids)) { @@ -883,7 +884,6 @@ Status VCollectIterator::Level1Iterator::_merge_next(Block* block) { if (UNLIKELY(_reader->_reader_context.record_rowids)) { _block_row_locations.resize(target_block_row); } - block->set_columns(std::move(target_columns)); return res; } @@ -900,7 +900,6 @@ Status VCollectIterator::Level1Iterator::_merge_next(Block* block) { continuous_row_in_block); } } - block->set_columns(std::move(target_columns)); return Status::OK(); } if (continuous_row_in_block == 0) { @@ -932,7 +931,6 @@ Status VCollectIterator::Level1Iterator::_merge_next(Block* block) { if (UNLIKELY(_reader->_reader_context.record_rowids)) { _block_row_locations.resize(target_block_row); } - block->set_columns(std::move(target_columns)); return Status::OK(); } } while (true); diff --git a/be/src/storage/iterator/vertical_block_reader.cpp b/be/src/storage/iterator/vertical_block_reader.cpp index aa90c83ccb0a3d..13a8c0fdecb0f8 100644 --- a/be/src/storage/iterator/vertical_block_reader.cpp +++ b/be/src/storage/iterator/vertical_block_reader.cpp @@ -186,8 +186,8 @@ void VerticalBlockReader::_init_agg_state(const ReaderParams& read_params) { return; } DCHECK(_return_columns.size() == _next_row.block->columns()); - _stored_data_columns = - _next_row.block->create_same_struct_block(_reader_context.batch_size)->mutate_columns(); + auto stored_block = _next_row.block->create_same_struct_block(_reader_context.batch_size); + _stored_data_columns = std::move(*stored_block).mutate_columns(); _stored_has_null_tag.resize(_stored_data_columns.size()); _stored_has_variable_length_tag.resize(_stored_data_columns.size()); @@ -398,7 +398,8 @@ Status VerticalBlockReader::_agg_key_next_block(Block* block, bool* eof) { return Status::OK(); } int target_block_row = 0; - auto target_columns = block->mutate_columns(); + auto target_columns_guard = block->mutate_columns_scoped(); + auto& target_columns = target_columns_guard.mutable_columns(); // copy first row get from collect_iter in init _append_agg_data(target_columns); @@ -430,8 +431,6 @@ Status VerticalBlockReader::_agg_key_next_block(Block* block, bool* eof) { _agg_data_counters.push_back(_last_agg_data_counter); _last_agg_data_counter = 0; _update_agg_data(target_columns); - block->set_columns(std::move(target_columns)); - return Status::OK(); } @@ -483,12 +482,14 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { // delete sign column must store in last column of the block int delete_sign_idx = block->columns() - 1; DCHECK(delete_sign_idx > 0); - auto target_columns = block->mutate_columns(); - MutableColumnPtr delete_filter_column = (*std::move(_delete_filter_column)).mutate(); - reinterpret_cast(delete_filter_column.get())->resize(block_rows); - - auto* __restrict filter_data = - reinterpret_cast(delete_filter_column.get())->get_data().data(); + auto target_columns_guard = block->mutate_columns_scoped(); + auto& target_columns = target_columns_guard.mutable_columns(); + auto delete_filter_column = IColumn::mutate(std::move(_delete_filter_column)); + auto* delete_filter_data_column = + reinterpret_cast(delete_filter_column.get()); + delete_filter_data_column->resize(block_rows); + + auto* __restrict filter_data = delete_filter_data_column->get_data().data(); auto* __restrict delete_data = reinterpret_cast(target_columns[delete_sign_idx].get()) ->get_data() @@ -517,12 +518,14 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { row_source_idx++; } + const auto column_to_keep = target_columns.size(); + target_columns_guard.restore(); + _delete_filter_column = std::move(delete_filter_column); ColumnWithTypeAndName column_with_type_and_name {_delete_filter_column, std::make_shared(), "__DORIS_COMPACTION_FILTER__"}; block->insert(column_with_type_and_name); - RETURN_IF_ERROR( - Block::filter_block(block, target_columns.size(), target_columns.size())); + RETURN_IF_ERROR(Block::filter_block(block, column_to_keep, column_to_keep)); _stats.rows_del_filtered += block_rows - block->rows(); if (UNLIKELY(_reader_context.record_rowids)) { DCHECK_EQ(_block_row_locations.size(), block->rows() + delete_count); @@ -548,7 +551,8 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { } // Value column processing - use batch optimization - auto target_columns = block->mutate_columns(); + auto target_columns_guard = block->mutate_columns_scoped(); + auto& target_columns = target_columns_guard.mutable_columns(); const size_t column_count = block->columns(); // Try to use batch optimization for value column compaction @@ -589,7 +593,6 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { dst_offset += batch.count; } - block->set_columns(std::move(target_columns)); return Status::OK(); } } @@ -617,7 +620,6 @@ Status VerticalBlockReader::_unique_key_next_block(Block* block, bool* eof) { }); ++target_block_row; } while (target_block_row < _reader_context.batch_size); - block->set_columns(std::move(target_columns)); return Status::OK(); } diff --git a/be/src/storage/iterator/vgeneric_iterators.cpp b/be/src/storage/iterator/vgeneric_iterators.cpp index 8d3b1bb50ee2f3..17729857a324bd 100644 --- a/be/src/storage/iterator/vgeneric_iterators.cpp +++ b/be/src/storage/iterator/vgeneric_iterators.cpp @@ -63,7 +63,8 @@ Status VStatisticsIterator::next_batch(Block* block) { DCHECK(block->columns() == _column_iterators.size()); if (_output_rows < _target_rows) { block->clear_column_data(); - auto columns = block->mutate_columns(); + auto columns_guard = block->mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); size_t size = _push_down_agg_type_opt == TPushAggOp::MINMAX ? 2 @@ -86,7 +87,6 @@ Status VStatisticsIterator::next_batch(Block* block) { } } } - block->set_columns(std::move(columns)); _output_rows += size; return Status::OK(); } diff --git a/be/src/storage/partial_update_info.cpp b/be/src/storage/partial_update_info.cpp index 05d648d4819b15..a9861aa6ce9171 100644 --- a/be/src/storage/partial_update_info.cpp +++ b/be/src/storage/partial_update_info.cpp @@ -20,6 +20,7 @@ #include #include +#include #include "common/consts.h" #include "common/logging.h" @@ -39,6 +40,18 @@ #include "storage/utils.h" namespace doris { +namespace { + +ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t skip_bitmap_col_idx) { + auto skip_bitmap_column = + IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column)); + auto* skip_bitmap_column_ptr = assert_cast(skip_bitmap_column.get()); + block->replace_by_position(skip_bitmap_col_idx, std::move(skip_bitmap_column)); + return skip_bitmap_column_ptr; +} + +} // namespace + Status PartialUpdateInfo::init(int64_t tablet_id, int64_t txn_id, const TabletSchema& tablet_schema, UniqueKeyUpdateModePB unique_key_update_mode, PartialUpdateNewRowPolicyPB policy, @@ -326,7 +339,12 @@ Status FixedReadPlan::read_columns_by_plan( } } bool has_row_column = tablet_schema.has_row_store_for_all_columns(); - auto mutable_columns = block.mutate_columns(); + std::optional mutable_columns_guard; + MutableColumns* mutable_columns = nullptr; + if (!has_row_column) { + mutable_columns_guard.emplace(block); + mutable_columns = &mutable_columns_guard->mutable_columns(); + } uint32_t read_idx = 0; for (const auto& [rowset_id, segment_row_mappings] : plan) { for (const auto& [segment_id, mappings] : segment_row_mappings) { @@ -349,10 +367,11 @@ Status FixedReadPlan::read_columns_by_plan( } continue; } - for (size_t cid = 0; cid < mutable_columns.size(); ++cid) { + for (size_t cid = 0; cid < mutable_columns->size(); ++cid) { TabletColumn tablet_column = tablet_schema.column(cids_to_read[cid]); - auto st = doris::BaseTablet::fetch_value_by_rowids( - rowset_iter->second, segment_id, rids, tablet_column, mutable_columns[cid]); + auto st = doris::BaseTablet::fetch_value_by_rowids(rowset_iter->second, segment_id, + rids, tablet_column, + (*mutable_columns)[cid]); // set read value to output block if (!st.ok()) { LOG(WARNING) << "failed to fetch value"; @@ -361,7 +380,6 @@ Status FixedReadPlan::read_columns_by_plan( } } } - block.set_columns(std::move(mutable_columns)); return Status::OK(); } @@ -371,7 +389,8 @@ Status FixedReadPlan::fill_missing_columns( const TabletSchema& tablet_schema, Block& full_block, const std::vector& use_default_or_null_flag, bool has_default_or_nullable, uint32_t segment_start_pos, const Block* block) const { - auto mutable_full_columns = full_block.mutate_columns(); + auto mutable_full_columns_guard = full_block.mutate_columns_scoped(); + auto& mutable_full_columns = mutable_full_columns_guard.mutable_columns(); // create old value columns DCHECK(historical_context.partial_update_info != nullptr); DCHECK(historical_context.tablet_schema != nullptr); @@ -403,7 +422,8 @@ Status FixedReadPlan::fill_missing_columns( RETURN_IF_ERROR(BaseTablet::generate_default_value_block(tablet_schema, missing_cids, partial_update_info.default_values, old_value_block, default_value_block)); - auto mutable_default_value_columns = default_value_block.mutate_columns(); + auto mutable_default_value_columns_guard = default_value_block.mutate_columns_scoped(); + auto& mutable_default_value_columns = mutable_default_value_columns_guard.mutable_columns(); // fill all missing value from mutable_old_columns, need to consider default value and null value for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { @@ -461,7 +481,6 @@ Status FixedReadPlan::fill_missing_columns( } } } - full_block.set_columns(std::move(mutable_full_columns)); return Status::OK(); } @@ -482,7 +501,8 @@ Status FlexibleReadPlan::read_columns_by_plan( const TabletSchema& tablet_schema, const std::map& rsid_to_rowset, Block& old_value_block, std::map>* read_index) const { - auto mutable_columns = old_value_block.mutate_columns(); + auto mutable_columns_guard = old_value_block.mutate_columns_scoped(); + auto& mutable_columns = mutable_columns_guard.mutable_columns(); // cid -> next rid to fill in block std::map next_read_idx; @@ -513,7 +533,6 @@ Status FlexibleReadPlan::read_columns_by_plan( } } // !!!ATTENTION!!!: columns in block may have different size because every row has different columns to update - old_value_block.set_columns(std::move(mutable_columns)); return Status::OK(); } @@ -551,7 +570,8 @@ Status FlexibleReadPlan::fill_non_primary_key_columns( const std::vector& use_default_or_null_flag, bool has_default_or_nullable, uint32_t segment_start_pos, uint32_t block_start_pos, const Block* block, std::vector* skip_bitmaps) const { - auto mutable_full_columns = full_block.mutate_columns(); + auto mutable_full_columns_guard = full_block.mutate_columns_scoped(); + auto& mutable_full_columns = mutable_full_columns_guard.mutable_columns(); DCHECK(historical_context.partial_update_info != nullptr); // missing_cids are all non sort key columns' cids @@ -570,7 +590,6 @@ Status FlexibleReadPlan::fill_non_primary_key_columns( old_value_block, mutable_full_columns, use_default_or_null_flag, has_default_or_nullable, segment_start_pos, block_start_pos, block, skip_bitmaps)); } - full_block.set_columns(std::move(mutable_full_columns)); return Status::OK(); } @@ -806,8 +825,7 @@ void BlockAggregator::merge_one_row(MutableBlock& dst_block, Block* src_block, i ->get_data() .back(); const auto& new_row_skip_bitmap = - assert_cast( - src_block->get_by_position(cid).column->assume_mutable().get()) + assert_cast(src_block->get_by_position(cid).column.get()) ->get_data()[rid]; cur_skip_bitmap &= new_row_skip_bitmap; continue; @@ -952,15 +970,13 @@ Status BlockAggregator::aggregate_for_sequence_column( DCHECK_EQ(block->columns(), _tablet_schema.num_columns()); // the process logic here is the same as MemTable::_aggregate_for_flexible_partial_update_without_seq_col() // after this function, there will be at most 2 rows for a specified key - std::vector* skip_bitmaps = &( - assert_cast(block->get_by_position(_tablet_schema.skip_bitmap_col_idx()) - .column->assume_mutable() - .get()) - ->get_data()); + std::vector* skip_bitmaps = + &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx()) + ->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); auto filtered_block = _tablet_schema.create_block(); - MutableBlock output_block = MutableBlock::build_mutable_block(&filtered_block); + MutableBlock output_block = MutableBlock::build_mutable_block(std::move(filtered_block)); int same_key_rows {0}; std::string previous_key {}; @@ -1025,11 +1041,9 @@ Status BlockAggregator::aggregate_for_insert_after_delete( // there will be at most 2 rows for a specified key in block when control flow reaches here // after this function, there will not be duplicate rows in block - std::vector* skip_bitmaps = &( - assert_cast(block->get_by_position(_tablet_schema.skip_bitmap_col_idx()) - .column->assume_mutable() - .get()) - ->get_data()); + std::vector* skip_bitmaps = + &get_mutable_skip_bitmap_column(block, _tablet_schema.skip_bitmap_col_idx()) + ->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); auto filter_column = ColumnUInt8::create(num_rows, 1); diff --git a/be/src/storage/schema_change/schema_change.cpp b/be/src/storage/schema_change/schema_change.cpp index 009231f2da7b01..9cd6cd702a9900 100644 --- a/be/src/storage/schema_change/schema_change.cpp +++ b/be/src/storage/schema_change/schema_change.cpp @@ -169,14 +169,18 @@ class MultiBlockMerger { if (i == rows - 1 || _cmp.compare(row_refs[i], row_refs[i + 1])) { for (int j = 0; j < key_number; j++) { - finalized_block.get_by_position(j).column->assume_mutable()->insert_from( - *row_ref.get_column(j), row_ref.position); + auto& column_ptr = finalized_block.get_by_position(j).column; + auto column = column_ptr->assume_mutable(); + column->insert_from(*row_ref.get_column(j), row_ref.position); + column_ptr = std::move(column); } for (int j = key_number; j < columns; j++) { + auto& column_ptr = finalized_block.get_by_position(j).column; + auto column = column_ptr->assume_mutable(); agg_functions[j - key_number]->insert_result_into( - agg_places[j - key_number], - finalized_block.get_by_position(j).column->assume_mutable_ref()); + agg_places[j - key_number], *column); + column_ptr = std::move(column); agg_functions[j - key_number]->reset(agg_places[j - key_number]); } @@ -222,12 +226,14 @@ class MultiBlockMerger { int limit = std::min(ALTER_TABLE_BATCH_SIZE, rows - i); for (int idx = 0; idx < columns; idx++) { - auto column = finalized_block.get_by_position(idx).column->assume_mutable(); + auto& column_ptr = finalized_block.get_by_position(idx).column; + auto column = column_ptr->assume_mutable(); for (int j = 0; j < limit; j++) { auto row_ref = pushed_row_refs[i + j]; column->insert_from(*row_ref.get_column(idx), row_ref.position); } + column_ptr = std::move(column); } RETURN_IF_ERROR(rowset_writer->add_block(&finalized_block)); finalized_block.clear_column_data(); @@ -379,6 +385,7 @@ Status BlockChanger::change_block(Block* ref_block, Block* new_block) const { column = column->convert_to_predicate_column_if_dictionary(); column->insert_duplicate_fields(value, row_num); } + new_block->get_by_position(idx).column = std::move(column); } else { // same type, just swap column swap_idx_list.emplace_back(_schema_mapping[idx].ref_column_idx, idx); @@ -395,21 +402,20 @@ Status BlockChanger::change_block(Block* ref_block, Block* new_block) const { if (ref_col_nullable != new_col_nullable) { // not nullable to nullable if (new_col_nullable) { - auto* new_nullable_col = - assert_cast(new_col->assume_mutable().get()); + auto mutable_new_col = new_col->assume_mutable(); + auto* new_nullable_col = assert_cast(mutable_new_col.get()); new_nullable_col->change_nested_column(ref_col); new_nullable_col->get_null_map_data().resize_fill(ref_col->size()); + new_col = std::move(mutable_new_col); } else { // nullable to not nullable: // suppose column `c_phone` is originally varchar(16) NOT NULL, // then do schema change `alter table test modify column c_phone int not null`, // the cast expr of schema change is `CastExpr(CAST String to Nullable(Int32))`, // so need to handle nullable to not nullable here - auto* ref_nullable_col = - assert_cast(ref_col->assume_mutable().get()); - - new_col = ref_nullable_col->get_nested_column_ptr(); + const auto& ref_nullable_col = assert_cast(*ref_col); + new_col = ref_nullable_col.get_nested_column_ptr(); } } else { new_block->get_by_position(it.second).column = diff --git a/be/src/storage/segment/column_reader.cpp b/be/src/storage/segment/column_reader.cpp index 630a60d6f9f8ef..02c59ce6cb62b1 100644 --- a/be/src/storage/segment/column_reader.cpp +++ b/be/src/storage/segment/column_reader.cpp @@ -77,6 +77,7 @@ #include "util/bitmap.h" #include "util/block_compression.h" #include "util/concurrency_stats.h" +#include "util/defer_op.h" #include "util/rle_encoding.h" // for RleDecoder #include "util/slice.h" @@ -996,7 +997,8 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* auto& column_map = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); - auto column_offsets_ptr = column_map.get_offsets_column().assume_mutable(); + auto column_offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + Defer defer_offsets {[&] { column_map.get_offsets_ptr() = std::move(column_offsets_ptr); }}; bool offsets_has_null = false; ssize_t start = column_offsets_ptr->size(); RETURN_IF_ERROR(_offsets_iterator->next_batch(n, column_offsets_ptr, &offsets_has_null)); @@ -1008,10 +1010,12 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* DCHECK(column_offsets.get_data().back() >= column_offsets.get_data()[start - 1]); size_t num_items = column_offsets.get_data().back() - column_offsets.get_data()[start - 1]; // -1 is valid - auto key_ptr = column_map.get_keys().assume_mutable(); - auto val_ptr = column_map.get_values().assume_mutable(); if (num_items > 0) { + auto key_ptr = IColumn::mutate(std::move(column_map.get_keys_ptr())); + auto val_ptr = IColumn::mutate(std::move(column_map.get_values_ptr())); + Defer defer_keys {[&] { column_map.get_keys_ptr() = std::move(key_ptr); }}; + Defer defer_values {[&] { column_map.get_values_ptr() = std::move(val_ptr); }}; if (read_offset_only()) { // OFFSET_ONLY mode: skip reading actual key/value data, fill with defaults key_ptr->insert_many_defaults(num_items); @@ -1024,9 +1028,6 @@ Status MapFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bool* RETURN_IF_ERROR(_val_iterator->next_batch(&num_read, val_ptr, &val_has_null)); DCHECK(num_read == num_items); } - - column_map.get_keys_ptr() = std::move(key_ptr); - column_map.get_values_ptr() = std::move(val_ptr); } if (dst->is_nullable()) { @@ -1081,9 +1082,10 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t return Status::OK(); } // resolve ColumnMap and nullable wrapper - const auto& column_map = assert_cast( + auto& column_map = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); - auto offsets_ptr = column_map.get_offsets_column().assume_mutable(); + auto offsets_ptr = IColumn::mutate(std::move(column_map.get_offsets_ptr())); + Defer defer_offsets {[&] { column_map.get_offsets_ptr() = std::move(offsets_ptr); }}; auto& offsets = static_cast(*offsets_ptr); size_t base = offsets.get_data().empty() ? 0 : offsets.get_data().back(); @@ -1167,8 +1169,10 @@ Status MapFileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t } // 6. read key/value elements for non-empty sizes - auto keys_ptr = column_map.get_keys().assume_mutable(); - auto vals_ptr = column_map.get_values().assume_mutable(); + auto keys_ptr = IColumn::mutate(std::move(column_map.get_keys_ptr())); + auto vals_ptr = IColumn::mutate(std::move(column_map.get_values_ptr())); + Defer defer_keys {[&] { column_map.get_keys_ptr() = std::move(keys_ptr); }}; + Defer defer_values {[&] { column_map.get_values_ptr() = std::move(vals_ptr); }}; size_t this_run = sizes[0]; auto start_idx = starts_data[0]; @@ -1413,12 +1417,13 @@ Status StructFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, bo dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); for (size_t i = 0; i < column_struct.tuple_size(); i++) { size_t num_read = *n; - auto sub_column_ptr = column_struct.get_column(i).assume_mutable(); + auto sub_column_ptr = IColumn::mutate(std::move(column_struct.get_column_ptr(i))); + Defer defer_sub_column { + [&] { column_struct.get_column_ptr(i) = std::move(sub_column_ptr); }}; bool column_has_null = false; RETURN_IF_ERROR( _sub_column_iterators[i]->next_batch(&num_read, sub_column_ptr, &column_has_null)); DCHECK(num_read == *n); - column_struct.get_column_ptr(i) = std::move(sub_column_ptr); } if (dst->is_nullable()) { @@ -1773,11 +1778,12 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, boo return Status::OK(); } - const auto& column_array = assert_cast( + auto& column_array = assert_cast( dst->is_nullable() ? static_cast(*dst).get_nested_column() : *dst); bool offsets_has_null = false; - auto column_offsets_ptr = column_array.get_offsets_column().assume_mutable(); + auto column_offsets_ptr = IColumn::mutate(std::move(column_array.get_offsets_ptr())); + Defer defer_offsets {[&] { column_array.get_offsets_ptr() = std::move(column_offsets_ptr); }}; ssize_t start = column_offsets_ptr->size(); RETURN_IF_ERROR(_offset_iterator->next_batch(n, column_offsets_ptr, &offsets_has_null)); if (*n == 0) { @@ -1787,8 +1793,9 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, MutableColumnPtr& dst, boo RETURN_IF_ERROR(_offset_iterator->_calculate_offsets(start, column_offsets)); size_t num_items = column_offsets.get_data().back() - column_offsets.get_data()[start - 1]; // -1 is valid - auto column_items_ptr = column_array.get_data().assume_mutable(); if (num_items > 0) { + auto column_items_ptr = IColumn::mutate(std::move(column_array.get_data_ptr())); + Defer defer_items {[&] { column_array.get_data_ptr() = std::move(column_items_ptr); }}; if (read_offset_only()) { // OFFSET_ONLY mode: skip reading actual item data, fill with defaults column_items_ptr->insert_many_defaults(num_items); diff --git a/be/src/storage/segment/historical_row_retriever.cpp b/be/src/storage/segment/historical_row_retriever.cpp index 45ed91b281c9e9..ec8c8f9e466f34 100644 --- a/be/src/storage/segment/historical_row_retriever.cpp +++ b/be/src/storage/segment/historical_row_retriever.cpp @@ -198,29 +198,30 @@ Status PrimaryKeyModelRowRetriever::build_before_block(Block* before_block, old_value_block, &read_index, false, nullptr)); - auto mutable_before_columns = before_block->mutate_columns(); - // Fill each row in before_block. - for (uint32_t idx = 0; idx < num_rows; ++idx) { - auto it = read_index.find(idx); - if (it == read_index.end()) { - // No historical row, fill BEFORE with NULL. - for (size_t i = 0; i < value_cids.size(); ++i) { - auto* nullable_column = - assert_cast(mutable_before_columns[i].get()); - nullable_column->insert_many_defaults(1); + { + auto mutable_before_columns_guard = before_block->mutate_columns_scoped(); + auto& mutable_before_columns = mutable_before_columns_guard.mutable_columns(); + // Fill each row in before_block. + for (uint32_t idx = 0; idx < num_rows; ++idx) { + auto it = read_index.find(idx); + if (it == read_index.end()) { + // No historical row, fill BEFORE with NULL. + for (size_t i = 0; i < value_cids.size(); ++i) { + auto* nullable_column = + assert_cast(mutable_before_columns[i].get()); + nullable_column->insert_many_defaults(1); + } + continue; } - continue; - } - uint32_t pos_in_old_block = it->second; - for (size_t i = 0; i < value_cids.size(); ++i) { - insert_value_to_nullable_column(mutable_before_columns[i].get(), - *old_value_block.get_by_position(i).column, - pos_in_old_block); + uint32_t pos_in_old_block = it->second; + for (size_t i = 0; i < value_cids.size(); ++i) { + insert_value_to_nullable_column(mutable_before_columns[i].get(), + *old_value_block.get_by_position(i).column, + pos_in_old_block); + } } } - - before_block->set_columns(std::move(mutable_before_columns)); return Status::OK(); } diff --git a/be/src/storage/segment/row_binlog_segment_writer.cpp b/be/src/storage/segment/row_binlog_segment_writer.cpp index 11dafa3a78a4b2..ccfc53d9fe9223 100644 --- a/be/src/storage/segment/row_binlog_segment_writer.cpp +++ b/be/src/storage/segment/row_binlog_segment_writer.cpp @@ -297,50 +297,55 @@ Status RowBinlogSegmentWriter::_fill_binlog_columns(size_t num_rows, std::vector binlog_cids = {_binlog_col_start_id, _binlog_col_start_id + 1, _binlog_col_start_id + 2}; Block binlog_prefix_block = _tablet_schema->create_block_by_cids(binlog_cids); - MutableColumns binlog_prefix_columns = binlog_prefix_block.mutate_columns(); - // we can't get correct lsn number before commit, because we can't get the version before commit, - // but we can fill auto-inc lsn to ensure the order first, then fill version when read single rowset. - IColumn* lsn_col_ptr = binlog_prefix_columns[0].get(); - CHECK(_lsn_ids->size() >= num_rows) << _lsn_ids->size() << " vs " << num_rows; - for (int i = 0; i < num_rows; i++) { - assert_cast(lsn_col_ptr) - ->insert_value(static_cast(_lsn_ids->at(i))); - } - - // wrong op only happens when partial-update, it will be fixed by delete bitmap when publish - const FieldType op_col_type = _tablet_schema->column(binlog_cids[1]).type(); - IColumn* op_col_ptr = binlog_prefix_columns[1].get(); - auto* op_nullable_column = typeid_cast(op_col_ptr); - IColumn* op_nested_column = - op_nullable_column != nullptr ? &op_nullable_column->get_nested_column() : op_col_ptr; - - CHECK(op_types.size() >= num_rows) << op_types.size() << " vs " << num_rows; - CHECK(op_col_type == FieldType::OLAP_FIELD_TYPE_BIGINT) - << "row binlog op column type must be BIGINT, actual=" << static_cast(op_col_type); - auto* op_int64_column = assert_cast(op_nested_column); - for (int i = 0; i < num_rows; i++) { - op_int64_column->insert_value(op_types[i]); - } - - // we can't get correct timestamp when commit - IColumn* ts_col_ptr = binlog_prefix_columns[2].get(); - auto timestamp = UnixMillis(); - auto* ts_nullable_column = typeid_cast(ts_col_ptr); - if (ts_nullable_column != nullptr) { - assert_cast(&ts_nullable_column->get_nested_column()) - ->insert_many_vals(timestamp, num_rows); - } else { - assert_cast(ts_col_ptr)->insert_many_vals(timestamp, num_rows); - } + { + auto binlog_prefix_columns_guard = binlog_prefix_block.mutate_columns_scoped(); + auto& binlog_prefix_columns = binlog_prefix_columns_guard.mutable_columns(); + // we can't get correct lsn number before commit, because we can't get the version before commit, + // but we can fill auto-inc lsn to ensure the order first, then fill version when read single rowset. + IColumn* lsn_col_ptr = binlog_prefix_columns[0].get(); + CHECK(_lsn_ids->size() >= num_rows) << _lsn_ids->size() << " vs " << num_rows; + for (int i = 0; i < num_rows; i++) { + assert_cast(lsn_col_ptr) + ->insert_value(static_cast(_lsn_ids->at(i))); + } - // finally update null map - for (int i = 0; i < num_rows; i++) { - //lsn_column->get_null_map_data().emplace_back(0); - if (op_nullable_column != nullptr) { - op_nullable_column->get_null_map_data().emplace_back(0); + // wrong op only happens when partial-update, it will be fixed by delete bitmap when publish + const FieldType op_col_type = _tablet_schema->column(binlog_cids[1]).type(); + IColumn* op_col_ptr = binlog_prefix_columns[1].get(); + auto* op_nullable_column = typeid_cast(op_col_ptr); + IColumn* op_nested_column = op_nullable_column != nullptr + ? &op_nullable_column->get_nested_column() + : op_col_ptr; + + CHECK(op_types.size() >= num_rows) << op_types.size() << " vs " << num_rows; + CHECK(op_col_type == FieldType::OLAP_FIELD_TYPE_BIGINT) + << "row binlog op column type must be BIGINT, actual=" + << static_cast(op_col_type); + auto* op_int64_column = assert_cast(op_nested_column); + for (int i = 0; i < num_rows; i++) { + op_int64_column->insert_value(op_types[i]); } + + // we can't get correct timestamp when commit + IColumn* ts_col_ptr = binlog_prefix_columns[2].get(); + auto timestamp = UnixMillis(); + auto* ts_nullable_column = typeid_cast(ts_col_ptr); if (ts_nullable_column != nullptr) { - ts_nullable_column->get_null_map_data().emplace_back(0); + assert_cast(&ts_nullable_column->get_nested_column()) + ->insert_many_vals(timestamp, num_rows); + } else { + assert_cast(ts_col_ptr)->insert_many_vals(timestamp, num_rows); + } + + // finally update null map + for (int i = 0; i < num_rows; i++) { + //lsn_column->get_null_map_data().emplace_back(0); + if (op_nullable_column != nullptr) { + op_nullable_column->get_null_map_data().emplace_back(0); + } + if (ts_nullable_column != nullptr) { + ts_nullable_column->get_null_map_data().emplace_back(0); + } } } @@ -389,13 +394,12 @@ Status RowBinlogSegmentWriter::_fill_before_columns(size_t num_rows) { // Compatibility path: only fill empty BEFORE values. if (_fill_empty_before_value) { - MutableColumns before_mutable_columns = before_block.mutate_columns(); - for (auto& before_mutable_column : before_mutable_columns) { + auto before_mutable_columns_guard = before_block.mutate_columns_scoped(); + for (auto& before_mutable_column : before_mutable_columns_guard.mutable_columns()) { auto* before_nullable_column = reinterpret_cast(before_mutable_column.get()); before_nullable_column->insert_many_defaults(num_rows); } - before_block.set_columns(std::move(before_mutable_columns)); } else { DCHECK(_historical_data_writer != nullptr); diff --git a/be/src/storage/segment/segment_writer.cpp b/be/src/storage/segment/segment_writer.cpp index edf3ebc81dd0b7..917777f24ebfbd 100644 --- a/be/src/storage/segment/segment_writer.cpp +++ b/be/src/storage/segment/segment_writer.cpp @@ -389,7 +389,7 @@ void SegmentWriter::_maybe_invalid_row_cache(const std::string& key) { } } -void SegmentWriter::_serialize_block_to_row_column(const Block& block) { +void SegmentWriter::_serialize_block_to_row_column(Block& block) { if (block.rows() == 0) { return; } @@ -398,14 +398,14 @@ void SegmentWriter::_serialize_block_to_row_column(const Block& block) { int row_column_id = 0; for (int i = 0; i < _tablet_schema->num_columns(); ++i) { if (_tablet_schema->column(i).is_row_store_column()) { - auto* row_store_column = static_cast( - block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get()); - row_store_column->clear(); + auto row_store_column_ptr = block.get_by_position(i).column->clone_empty(); + auto* row_store_column = static_cast(row_store_column_ptr.get()); DataTypeSerDeSPtrs serdes = create_data_type_serdes(block.get_data_types()); JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, cast_set(_tablet_schema->num_columns()), serdes, {_tablet_schema->row_columns_uids().begin(), _tablet_schema->row_columns_uids().end()}); + block.replace_by_position(i, std::move(row_store_column_ptr)); break; } } @@ -719,7 +719,7 @@ Status SegmentWriter::append_block(const Block* block, size_t row_pos, size_t nu // or it's schema change write(since column data type maybe changed, so we should reubild) if (_opts.write_type == DataWriteType::TYPE_DIRECT || _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) { - _serialize_block_to_row_column(*block); + _serialize_block_to_row_column(*const_cast(block)); } if (_opts.rowset_ctx->write_type != DataWriteType::TYPE_COMPACTION && diff --git a/be/src/storage/segment/segment_writer.h b/be/src/storage/segment/segment_writer.h index 9b6b8b55c3aea1..5623a06a70cf48 100644 --- a/be/src/storage/segment/segment_writer.h +++ b/be/src/storage/segment/segment_writer.h @@ -182,7 +182,7 @@ class SegmentWriter { void set_min_max_key(const Slice& key); void set_min_key(const Slice& key); void set_max_key(const Slice& key); - void _serialize_block_to_row_column(const Block& block); + void _serialize_block_to_row_column(Block& block); Status _generate_primary_key_index( const std::vector& primary_key_coders, const std::vector& primary_key_columns, diff --git a/be/src/storage/segment/variant/binary_column_extract_iterator.h b/be/src/storage/segment/variant/binary_column_extract_iterator.h index a58cf10a6b0e5a..a9aef9ca5aa1af 100644 --- a/be/src/storage/segment/variant/binary_column_extract_iterator.h +++ b/be/src/storage/segment/variant/binary_column_extract_iterator.h @@ -153,8 +153,8 @@ class BinaryColumnExtractIterator : public BaseBinaryColumnProcessor { _sparse_column_cache->binary_column->get_ptr(), 0, _sparse_column_cache->binary_column->size()); var.incr_num_rows(_sparse_column_cache->binary_column->size()); - var.get_sparse_column()->assume_mutable()->resize(var.rows()); - var.get_doc_value_column()->assume_mutable()->resize(var.rows()); + var.get_sparse_column_mutable().resize(var.rows()); + var.get_doc_value_column_mutable().resize(var.rows()); ENABLE_CHECK_CONSISTENCY(&var); } diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp index cd8cd77696771f..878ca16019b0f4 100644 --- a/be/src/storage/segment/variant/hierarchical_data_iterator.cpp +++ b/be/src/storage/segment/variant/hierarchical_data_iterator.cpp @@ -203,7 +203,7 @@ Status HierarchicalDataIterator::_process_sub_columns( ColumnVariant& container_variant, const PathsWithColumnAndType& non_nested_subcolumns) { for (const auto& entry : non_nested_subcolumns) { DCHECK(!entry.path.has_nested_part()); - bool add = container_variant.add_sub_column(entry.path, entry.column->assume_mutable(), + bool add = container_variant.add_sub_column(entry.path, IColumn::mutate(entry.column), entry.type); if (!add) { return Status::InternalError("Duplicated {}, type {}", entry.path.get_path(), @@ -225,7 +225,7 @@ Status HierarchicalDataIterator::_process_nested_columns( assert_cast(remove_nullable(entry.second[0].column).get()); MutableColumnPtr nested_object = ColumnVariant::create(0, false, base_array->get_data().size()); - MutableColumnPtr offset = base_array->get_offsets_ptr()->assume_mutable(); + MutableColumnPtr offset = IColumn::mutate(base_array->get_offsets_ptr()); auto* nested_object_ptr = assert_cast(nested_object.get()); // flatten nested arrays for (const auto& subcolumn : entry.second) { @@ -246,7 +246,7 @@ Status HierarchicalDataIterator::_process_nested_columns( subcolumn.path.get_path(), subcolumn.type->get_name()); } #endif - MutableColumnPtr flattend_column = target_array->get_data_ptr()->assume_mutable(); + MutableColumnPtr flattend_column = IColumn::mutate(target_array->get_data_ptr()); DataTypePtr flattend_type = check_and_get_data_type(remove_nullable(type).get()) ->get_nested_type(); @@ -255,14 +255,18 @@ Status HierarchicalDataIterator::_process_nested_columns( subcolumn.path.copy_pop_nfront(entry.first.get_parts().size()), std::move(flattend_column), std::move(flattend_type)); } - nested_object = make_nullable(nested_object->get_ptr())->assume_mutable(); - auto array = - make_nullable(ColumnArray::create(std::move(nested_object), std::move(offset))); + const size_t nested_object_size = nested_object->size(); + nested_object = ColumnNullable::create(std::move(nested_object), + ColumnUInt8::create(nested_object_size, 0)); + auto array = ColumnArray::create(std::move(nested_object), std::move(offset)); + const size_t array_size = array->size(); + auto nullable_array = + ColumnNullable::create(std::move(array), ColumnUInt8::create(array_size, 0)); PathInDataBuilder builder; // add parent prefix builder.append(entry.first.get_parts(), false); PathInData parent_path = builder.build(); - container_variant.add_sub_column(parent_path, array->assume_mutable(), + container_variant.add_sub_column(parent_path, std::move(nullable_array), container_variant.NESTED_TYPE); } return Status::OK(); @@ -283,14 +287,17 @@ Status HierarchicalDataIterator::_init_container(MutableColumnPtr& container, si // auto column = root_var.get_root(); // auto type = root_var.get_root_type(); - MutableColumnPtr column = _root_reader->column->get_ptr(); + MutableColumnPtr column = IColumn::mutate(_root_reader->column->get_ptr()); // container_variant.add_sub_column({}, std::move(column), _root_reader->type); DCHECK(column->size() == nrows); - auto nullable_column = make_nullable(column->get_ptr()); + if (!column->is_nullable()) { + const size_t column_size = column->size(); + column = ColumnNullable::create(std::move(column), ColumnUInt8::create(column_size, 0)); + } auto type = make_nullable(_root_reader->type); // make sure the root type is nullable container = ColumnVariant::create(max_subcolumns_count, enable_doc_mode, type, - nullable_column->assume_mutable()); + std::move(column)); } else { DataTypePtr root_type = std::make_shared(); auto column = ColumnNothing::create(nrows); @@ -359,10 +366,10 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container if (_path.get_parts().empty()) { if (_read_type == ReadType::SUBCOLUMNS_AND_SPARSE) { container_variant.set_sparse_column(_binary_column_reader->column->get_ptr()); - container_variant.get_doc_value_column()->assume_mutable()->resize(nrows); + container_variant.get_doc_value_column_mutable().resize(nrows); } else if (_read_type == ReadType::DOC_VALUE_COLUMN) { container_variant.set_doc_value_column(_binary_column_reader->column->get_ptr()); - container_variant.get_sparse_column()->assume_mutable()->resize(nrows); + container_variant.get_sparse_column_mutable().resize(nrows); } else { return Status::InternalError("Invalid read type {}", _read_type); } @@ -378,7 +385,7 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container const auto& src_values = assert_cast(src_map.get_values()); // Clear pre-initialized doc_value offsets (created by ColumnVariant ctor with num_rows) - container_variant.get_doc_value_column()->assume_mutable()->clear(); + container_variant.get_doc_value_column_mutable().clear(); auto [dst_paths, dst_values] = container_variant.get_doc_value_data_paths_and_values(); auto& dst_offsets = container_variant.serialized_doc_value_column_offsets(); @@ -419,13 +426,13 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container } dst_offsets.push_back(dst_paths->size()); } - container_variant.get_sparse_column()->assume_mutable()->resize(nrows); + container_variant.get_sparse_column_mutable().resize(nrows); } else { const auto& offsets = assert_cast(*_binary_column_reader->column).get_offsets(); /// Check if there is no data in shared data in current range. if (offsets.back() == offsets[-1]) { - container_variant.get_sparse_column()->assume_mutable()->resize(nrows); + container_variant.get_sparse_column_mutable().resize(nrows); } else { // Read for variant sparse column // Example path: a.b @@ -444,8 +451,7 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container assert_cast(sparse_data_map.get_values()); auto& sparse_data_offsets = - assert_cast( - *container_variant.get_sparse_column()->assume_mutable()) + assert_cast(container_variant.get_sparse_column_mutable()) .get_offsets(); auto [sparse_data_paths, sparse_data_values] = container_variant.get_sparse_data_paths_and_values(); @@ -544,7 +550,7 @@ Status HierarchicalDataIterator::_process_binary_column(ColumnVariant& container } } } - container_variant.get_doc_value_column()->assume_mutable()->resize(nrows); + container_variant.get_doc_value_column_mutable().resize(nrows); } ENABLE_CHECK_CONSISTENCY(&container_variant); return Status::OK(); diff --git a/be/src/storage/segment/variant/hierarchical_data_iterator.h b/be/src/storage/segment/variant/hierarchical_data_iterator.h index ae7f96526a633f..fb8f5cad819bec 100644 --- a/be/src/storage/segment/variant/hierarchical_data_iterator.h +++ b/be/src/storage/segment/variant/hierarchical_data_iterator.h @@ -137,6 +137,7 @@ class HierarchicalDataIterator : public ColumnIterator { // process read template Status process_read(ReadFunction&& read_func, MutableColumnPtr& dst, size_t nrows) { + dst = IColumn::mutate(std::move(dst)); // // Read all sub columns, and merge with root column ColumnNullable* nullable_column = nullptr; if (dst->is_nullable()) { diff --git a/be/src/storage/segment/variant/variant_column_reader.cpp b/be/src/storage/segment/variant/variant_column_reader.cpp index 0008cbca94a6c3..e7913785274dc1 100644 --- a/be/src/storage/segment/variant/variant_column_reader.cpp +++ b/be/src/storage/segment/variant/variant_column_reader.cpp @@ -1587,8 +1587,9 @@ static void fill_nested_with_defaults(MutableColumnPtr& dst, MutableColumnPtr& s } auto new_nested = dst_array->get_data_ptr()->clone_resized(sibling_array->get_data_ptr()->size()); - auto new_array = make_nullable(ColumnArray::create( - new_nested->assume_mutable(), sibling_array->get_offsets_ptr()->assume_mutable())); + ColumnPtr nested_column = std::move(new_nested); + auto new_array = + make_nullable(ColumnArray::create(nested_column, sibling_array->get_offsets_ptr())); dst->insert_range_from(*new_array, 0, new_array->size()); #ifndef NDEBUG if (!dst_array->has_equal_offsets(*sibling_array)) { diff --git a/be/src/storage/segment/variant/variant_column_writer_impl.cpp b/be/src/storage/segment/variant/variant_column_writer_impl.cpp index 95f266e15c44cb..01d01f8c51f203 100644 --- a/be/src/storage/segment/variant/variant_column_writer_impl.cpp +++ b/be/src/storage/segment/variant/variant_column_writer_impl.cpp @@ -1221,8 +1221,14 @@ Status VariantColumnWriterImpl::_process_root_column(ColumnVariant* ptr, DCHECK_EQ(ptr->get_root()->get_ptr()->size(), num_rows); converter->add_column_data_convertor(*_tablet_column); const uint8_t* nullmap = nullptr; - auto& nullable_column = assert_cast(*ptr->get_root()->assume_mutable()); - auto root_column = nullable_column.get_nested_column_ptr(); + // get_root() already returns a MutableColumnPtr; store it to avoid dangling ref and + // to avoid calling assume_mutable() again (which would see use_count>1 and throw). + auto root_mut = ptr->get_root(); + auto& nullable_column = assert_cast(*root_mut); + // Use const access to get the nested column ptr without bumping use_count in the + // non-const chameleon_ptr path, then mutate() to get exclusive ownership. + auto root_column = IColumn::mutate( + static_cast(nullable_column).get_nested_column_ptr()); const bool has_root_ng = std::ranges::any_of(_nested_group_routing_plan.ng_only_prefixes, @@ -1234,13 +1240,15 @@ Status VariantColumnWriterImpl::_process_root_column(ColumnVariant* ptr, // If the root variant is nullable, then update the root column null column with the outer null column. if (_tablet_column->is_nullable()) { // use outer null column as final null column + // Move root_column (exclusive) directly into create() to avoid sharing ownership. root_column = - ColumnNullable::create(root_column->get_ptr(), ColumnUInt8::create(*_null_column)); + ColumnNullable::create(std::move(root_column), ColumnUInt8::create(*_null_column)); nullmap = _null_column->get_data().data(); } else { // Otherwise setting to all not null. - root_column = ColumnNullable::create(root_column->get_ptr(), - ColumnUInt8::create(root_column->size(), 0)); + size_t col_size = root_column->size(); + root_column = + ColumnNullable::create(std::move(root_column), ColumnUInt8::create(col_size, 0)); } // make sure the root_column is nullable RETURN_IF_ERROR(converter->set_source_content_with_specifid_column( diff --git a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp index 48ea040c7bafe8..3dbe3026a5a634 100644 --- a/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp +++ b/be/src/storage/segment/variant/variant_streaming_compaction_writer.cpp @@ -140,8 +140,10 @@ Status VariantStreamingCompactionWriter::_append_root_column(const ColumnVariant auto expected_root_type = make_nullable(std::make_shared()); variant->ensure_root_node_type(expected_root_type); - auto& nullable_column = assert_cast(*variant->get_root()->assume_mutable()); - auto root_column = nullable_column.get_nested_column_ptr(); + auto root_mut = variant->get_root(); + auto& nullable_column = assert_cast(*root_mut); + auto root_column = IColumn::mutate( + static_cast(nullable_column).get_nested_column_ptr()); const size_t num_rows = chunk_variant.rows(); variant_writer_helpers::maybe_remove_root_jsonb_with_empty_defaults( &root_column, num_rows, _streaming_plan.can_remove_root_jsonb()); @@ -156,10 +158,11 @@ Status VariantStreamingCompactionWriter::_append_root_column(const ColumnVariant } else { null_column->insert_many_defaults(num_rows); } - root_column = ColumnNullable::create(root_column->get_ptr(), std::move(null_column)); + root_column = ColumnNullable::create(std::move(root_column), std::move(null_column)); } else { - root_column = ColumnNullable::create(root_column->get_ptr(), - ColumnUInt8::create(root_column->size(), 0)); + const size_t root_column_size = root_column->size(); + root_column = ColumnNullable::create(std::move(root_column), + ColumnUInt8::create(root_column_size, 0)); } auto converter = std::make_unique(); diff --git a/be/src/storage/segment/vertical_segment_writer.cpp b/be/src/storage/segment/vertical_segment_writer.cpp index 56d99d7249efa2..fdd84bef48c56b 100644 --- a/be/src/storage/segment/vertical_segment_writer.cpp +++ b/be/src/storage/segment/vertical_segment_writer.cpp @@ -92,6 +92,14 @@ inline std::string vertical_segment_writer_mem_tracker_name(uint32_t segment_id) return "VerticalSegmentWriter:Segment-" + std::to_string(segment_id); } +static ColumnBitmap* get_mutable_skip_bitmap_column(Block* block, size_t skip_bitmap_col_idx) { + auto skip_bitmap_column = + IColumn::mutate(std::move(block->get_by_position(skip_bitmap_col_idx).column)); + auto* skip_bitmap_column_ptr = assert_cast(skip_bitmap_column.get()); + block->replace_by_position(skip_bitmap_col_idx, std::move(skip_bitmap_column)); + return skip_bitmap_column_ptr; +} + VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, TabletSchemaSPtr tablet_schema, BaseTabletSPtr tablet, DataDir* data_dir, @@ -362,7 +370,7 @@ void VerticalSegmentWriter::_maybe_invalid_row_cache(const std::string& key) con } } -void VerticalSegmentWriter::_serialize_block_to_row_column(const Block& block) { +void VerticalSegmentWriter::_serialize_block_to_row_column(Block& block) { if (block.rows() == 0) { return; } @@ -371,15 +379,15 @@ void VerticalSegmentWriter::_serialize_block_to_row_column(const Block& block) { int row_column_id = 0; for (int i = 0; i < _tablet_schema->num_columns(); ++i) { if (_tablet_schema->column(i).is_row_store_column()) { - auto* row_store_column = static_cast( - block.get_by_position(i).column->assume_mutable_ref().assume_mutable().get()); - row_store_column->clear(); + auto row_store_column_ptr = block.get_by_position(i).column->clone_empty(); + auto* row_store_column = static_cast(row_store_column_ptr.get()); DataTypeSerDeSPtrs serdes = create_data_type_serdes(block.get_data_types()); std::unordered_set row_store_cids_set(_tablet_schema->row_columns_uids().begin(), _tablet_schema->row_columns_uids().end()); JsonbSerializeUtil::block_to_jsonb(*_tablet_schema, block, *row_store_column, cast_set(_tablet_schema->num_columns()), serdes, row_store_cids_set); + block.replace_by_position(i, std::move(row_store_column_ptr)); break; } } @@ -766,10 +774,9 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn RETURN_IF_ERROR(_block_aggregator.convert_seq_column(const_cast(data.block), data.row_pos, data.num_rows, seq_column)); - std::vector* skip_bitmaps = &( - assert_cast( - data.block->get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()) - ->get_data()); + auto* mutable_block = const_cast(data.block); + std::vector* skip_bitmaps = + &get_mutable_skip_bitmap_column(mutable_block, skip_bitmap_col_idx)->get_data(); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*data.block, data.row_pos + data.num_rows); DCHECK(delete_signs != nullptr); @@ -1010,7 +1017,7 @@ Status VerticalSegmentWriter::write_batch() { _opts.write_type == DataWriteType::TYPE_SCHEMA_CHANGE) { for (auto& data : _batched_blocks) { // TODO: maybe we should pass range to this method - _serialize_block_to_row_column(*data.block); + _serialize_block_to_row_column(*const_cast(data.block)); } } diff --git a/be/src/storage/segment/vertical_segment_writer.h b/be/src/storage/segment/vertical_segment_writer.h index 5c0ec0930e522d..39235811c07880 100644 --- a/be/src/storage/segment/vertical_segment_writer.h +++ b/be/src/storage/segment/vertical_segment_writer.h @@ -158,7 +158,7 @@ class VerticalSegmentWriter { void _set_min_max_key(const Slice& key); void _set_min_key(const Slice& key); void _set_max_key(const Slice& key); - void _serialize_block_to_row_column(const Block& block); + void _serialize_block_to_row_column(Block& block); Status _probe_key_for_mow(std::string key, std::size_t segment_pos, bool have_input_seq_column, bool have_delete_sign, const std::vector& specified_rowsets, diff --git a/be/src/storage/tablet/base_tablet.cpp b/be/src/storage/tablet/base_tablet.cpp index 0bde14b5b31606..b9beda9c052a38 100644 --- a/be/src/storage/tablet/base_tablet.cpp +++ b/be/src/storage/tablet/base_tablet.cpp @@ -876,17 +876,20 @@ Status BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, } Status BaseTablet::sort_block(Block& in_block, Block& output_block) { - MutableBlock mutable_input_block = MutableBlock::build_mutable_block(&in_block); - MutableBlock mutable_output_block = MutableBlock::build_mutable_block(&output_block); + ScopedMutableBlock scoped_input_block(&in_block); + auto& mutable_input_block = scoped_input_block.mutable_block(); + ScopedMutableBlock scoped_output_block(&output_block); + auto& mutable_output_block = scoped_output_block.mutable_block(); std::shared_ptr vec_row_comparator = std::make_shared(_tablet_meta->tablet_schema()); vec_row_comparator->set_block(&mutable_input_block); std::vector> row_in_blocks; - DCHECK(in_block.rows() <= std::numeric_limits::max()); - row_in_blocks.reserve(in_block.rows()); - for (size_t i = 0; i < in_block.rows(); ++i) { + const auto input_rows = mutable_input_block.rows(); + DCHECK(input_rows <= std::numeric_limits::max()); + row_in_blocks.reserve(input_rows); + for (size_t i = 0; i < input_rows; ++i) { row_in_blocks.emplace_back(std::make_unique(i)); } std::sort(row_in_blocks.begin(), row_in_blocks.end(), @@ -898,12 +901,14 @@ Status BaseTablet::sort_block(Block& in_block, Block& output_block) { return value < 0; }); std::vector row_pos_vec; - row_pos_vec.reserve(in_block.rows()); + row_pos_vec.reserve(input_rows); for (auto& block : row_in_blocks) { row_pos_vec.emplace_back(block->_row_pos); } - return mutable_output_block.add_rows(&in_block, row_pos_vec.data(), - row_pos_vec.data() + in_block.rows()); + scoped_input_block.restore(); + RETURN_IF_ERROR(mutable_output_block.add_rows(&in_block, row_pos_vec.data(), + row_pos_vec.data() + input_rows)); + return Status::OK(); } // fetch value by row column @@ -988,7 +993,8 @@ Status BaseTablet::generate_default_value_block(const TabletSchema& schema, const std::vector& default_values, const Block& ref_block, Block& default_value_block) { - auto mutable_default_value_columns = default_value_block.mutate_columns(); + auto mutable_default_value_columns_guard = default_value_block.mutate_columns_scoped(); + auto& mutable_default_value_columns = mutable_default_value_columns_guard.mutable_columns(); for (auto i = 0; i < cids.size(); ++i) { const auto& column = schema.column(cids[i]); if (column.has_default_value()) { @@ -998,7 +1004,6 @@ Status BaseTablet::generate_default_value_block(const TabletSchema& schema, str, *mutable_default_value_columns[i])); } } - default_value_block.set_columns(std::move(mutable_default_value_columns)); return Status::OK(); } @@ -1012,7 +1017,8 @@ Status BaseTablet::generate_new_block_for_partial_update( // 3. write a new segment and modify rowset meta // 4. mark current keys deleted CHECK(output_block); - auto full_mutable_columns = output_block->mutate_columns(); + auto full_mutable_columns_guard = output_block->mutate_columns_scoped(); + auto& full_mutable_columns = full_mutable_columns_guard.mutable_columns(); const auto& missing_cids = partial_update_info->missing_cids; const auto& update_cids = partial_update_info->update_cids; auto old_block = rowset_schema->create_block_by_cids(missing_cids); @@ -1115,7 +1121,7 @@ Status BaseTablet::generate_new_block_for_partial_update( } } } - output_block->set_columns(std::move(full_mutable_columns)); + full_mutable_columns_guard.restore(); VLOG_DEBUG << "full block when publish: " << output_block->dump_data(); return Status::OK(); } @@ -1220,7 +1226,8 @@ Status BaseTablet::generate_new_block_for_flexible_partial_update( old_block, default_value_block)); // 4. build the final block - auto full_mutable_columns = output_block->mutate_columns(); + auto full_mutable_columns_guard = output_block->mutate_columns_scoped(); + auto& full_mutable_columns = full_mutable_columns_guard.mutable_columns(); DCHECK(rowset_schema->has_skip_bitmap_col()); auto skip_bitmap_col_idx = rowset_schema->skip_bitmap_col_idx(); const std::vector* skip_bitmaps = @@ -1273,7 +1280,7 @@ Status BaseTablet::generate_new_block_for_flexible_partial_update( DCHECK_EQ(full_mutable_columns[cid]->size(), update_rows); } - output_block->set_columns(std::move(full_mutable_columns)); + full_mutable_columns_guard.restore(); VLOG_DEBUG << "full block when publish: " << output_block->dump_data(); return Status::OK(); } diff --git a/be/src/util/jsonb/serialize.cpp b/be/src/util/jsonb/serialize.cpp index 0088c6249f0030..6ff4a076f89025 100644 --- a/be/src/util/jsonb/serialize.cpp +++ b/be/src/util/jsonb/serialize.cpp @@ -36,6 +36,7 @@ #include "core/value/jsonb_value.h" #include "runtime/descriptors.h" #include "storage/tablet/tablet_schema.h" +#include "util/defer_op.h" #include "util/jsonb_document.h" #include "util/jsonb_stream.h" #include "util/jsonb_writer.h" @@ -79,51 +80,55 @@ Status JsonbSerializeUtil::jsonb_to_block( const std::unordered_map& col_id_to_idx, Block& dst, const std::vector& default_values, const std::unordered_set& include_cids) { + auto dst_columns_guard = dst.mutate_columns_scoped(); + MutableColumns& dst_columns = dst_columns_guard.mutable_columns(); for (int i = 0; i < jsonb_column.size(); ++i) { StringRef jsonb_data = jsonb_column.get_data_at(i); - RETURN_IF_ERROR(jsonb_to_block(serdes, jsonb_data.data, jsonb_data.size, col_id_to_idx, dst, - default_values, include_cids)); + RETURN_IF_ERROR(jsonb_to_columns(serdes, jsonb_data.data, jsonb_data.size, col_id_to_idx, + dst_columns, default_values, include_cids)); } return Status::OK(); } -// single row -Status JsonbSerializeUtil::jsonb_to_block( +Status JsonbSerializeUtil::jsonb_to_columns( const DataTypeSerDeSPtrs& serdes, const char* data, size_t size, - const std::unordered_map& col_id_to_idx, Block& dst, + const std::unordered_map& col_id_to_idx, MutableColumns& dst_columns, const std::vector& default_values, const std::unordered_set& include_cids) { const JsonbDocument* pdoc = nullptr; RETURN_IF_ERROR(JsonbDocument::checkAndCreateDocument(data, size, &pdoc)); const JsonbDocument& doc = *pdoc; - size_t num_rows = dst.rows(); + DCHECK(!dst_columns.empty()); + size_t num_rows = dst_columns[0]->size(); size_t filled_columns = 0; for (auto it = doc->begin(); it != doc->end(); ++it) { auto col_it = col_id_to_idx.find(it->getKeyId()); if (col_it != col_id_to_idx.end() && (include_cids.empty() || include_cids.contains(it->getKeyId()))) { - MutableColumnPtr dst_column = - dst.get_by_position(col_it->second).column->assume_mutable(); + auto& dst_column = dst_columns[col_it->second]; serdes[col_it->second]->read_one_cell_from_jsonb(*dst_column, it->value()); ++filled_columns; } } - if (filled_columns >= dst.columns()) { + if (filled_columns >= dst_columns.size()) { return Status::OK(); } - auto fill_column = [&](Block& dst, int pos, size_t old_num_rows) { - MutableColumnPtr dst_column = dst.get_by_position(pos).column->assume_mutable(); + auto fill_column = [&](size_t pos, size_t old_num_rows) { + auto& dst_column = dst_columns[pos]; if (dst_column->size() < old_num_rows + 1) { DCHECK(dst_column->size() == old_num_rows); + Status st = Status::OK(); if (default_values[pos].empty()) { dst_column->insert_default(); } else { Slice value(default_values[pos].data(), default_values[pos].size()); DataTypeSerDe::FormatOptions opt; opt.converted_from_string = true; - RETURN_IF_ERROR( - serdes[pos]->deserialize_one_cell_from_json(*dst_column, value, opt)); + st = serdes[pos]->deserialize_one_cell_from_json(*dst_column, value, opt); } + RETURN_IF_ERROR(st); + DCHECK(dst_column->size() == num_rows + 1); + return Status::OK(); } DCHECK(dst_column->size() == num_rows + 1); return Status::OK(); @@ -135,14 +140,26 @@ Status JsonbSerializeUtil::jsonb_to_block( if (col_it == col_id_to_idx.end()) { continue; } - RETURN_IF_ERROR(fill_column(dst, col_it->second, num_rows)); + RETURN_IF_ERROR(fill_column(static_cast(col_it->second), num_rows)); } } else { - for (int i = 0; i < dst.columns(); ++i) { - RETURN_IF_ERROR(fill_column(dst, i, num_rows)); + for (size_t i = 0; i < dst_columns.size(); ++i) { + RETURN_IF_ERROR(fill_column(i, num_rows)); } } return Status::OK(); } -} // namespace doris \ No newline at end of file +// single row +Status JsonbSerializeUtil::jsonb_to_block( + const DataTypeSerDeSPtrs& serdes, const char* data, size_t size, + const std::unordered_map& col_id_to_idx, Block& dst, + const std::vector& default_values, + const std::unordered_set& include_cids) { + auto dst_columns_guard = dst.mutate_columns_scoped(); + MutableColumns& dst_columns = dst_columns_guard.mutable_columns(); + return jsonb_to_columns(serdes, data, size, col_id_to_idx, dst_columns, default_values, + include_cids); +} + +} // namespace doris diff --git a/be/src/util/jsonb/serialize.h b/be/src/util/jsonb/serialize.h index f19474abe939fc..e25ecc00af4e10 100644 --- a/be/src/util/jsonb/serialize.h +++ b/be/src/util/jsonb/serialize.h @@ -46,10 +46,16 @@ class JsonbSerializeUtil { const std::unordered_map& col_id_to_idx, Block& dst, const std::vector& default_values, const std::unordered_set& include_cids); + // append single row into mutable columns + static Status jsonb_to_columns(const DataTypeSerDeSPtrs& serdes, const char* data, size_t size, + const std::unordered_map& col_id_to_idx, + MutableColumns& dst_columns, + const std::vector& default_values, + const std::unordered_set& include_cids); // single row static Status jsonb_to_block(const DataTypeSerDeSPtrs& serdes, const char* data, size_t size, const std::unordered_map& col_id_to_idx, Block& dst, const std::vector& default_values, const std::unordered_set& include_cids); }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/ai/ai_function_test.cpp b/be/test/ai/ai_function_test.cpp index 7a1ecdc0c9a7fe..23855611861733 100644 --- a/be/test/ai/ai_function_test.cpp +++ b/be/test/ai/ai_function_test.cpp @@ -27,6 +27,7 @@ #include "core/block/block.h" #include "core/column/column_array.h" +#include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_vector.h" #include "core/data_type/data_type_array.h" @@ -181,6 +182,28 @@ class OneShotHttpServer { std::thread _thread; }; +namespace { +MutableColumnPtr create_string_array_column(const std::vector>& rows) { + auto nested_column = ColumnString::create(); + auto null_map_column = ColumnUInt8::create(); + auto offsets_column = ColumnOffset64::create(); + + IColumn::Offset offset = 0; + for (const auto& row : rows) { + for (const auto& value : row) { + nested_column->insert_data(value.data(), value.size()); + null_map_column->insert_value(0); + } + offset += row.size(); + offsets_column->insert_value(offset); + } + + return ColumnArray::create( + ColumnNullable::create(std::move(nested_column), std::move(null_map_column)), + std::move(offsets_column)); +} +} // namespace + TEST(AIFunctionTest, AISummarizeTest) { FunctionAISummarize function; @@ -233,20 +256,7 @@ TEST(AIFunctionTest, AIMaskTest) { auto col_resource = ColumnHelper::create_column(resources); auto col_text = ColumnHelper::create_column(texts); - - auto nested_column = ColumnString::create(); - auto offsets_column = ColumnOffset64::create(); - - IColumn::Offset offset = 0; - for (const auto& row : labels) { - for (const auto& value : row) { - nested_column->insert_data(value.data(), value.size()); - } - offset += row.size(); - offsets_column->insert_value(offset); - } - - auto array_column = ColumnArray::create(std::move(nested_column), std::move(offsets_column)); + auto array_column = create_string_array_column(labels); Block block; block.insert({std::move(col_resource), std::make_shared(), "resource"}); @@ -315,20 +325,7 @@ TEST(AIFunctionTest, AIExtractTest) { auto col_resource = ColumnHelper::create_column(resources); auto col_text = ColumnHelper::create_column(texts); - - auto nested_column = ColumnString::create(); - auto offsets_column = ColumnOffset64::create(); - - IColumn::Offset offset = 0; - for (const auto& row : labels) { - for (const auto& value : row) { - nested_column->insert_data(value.data(), value.size()); - } - offset += row.size(); - offsets_column->insert_value(offset); - } - - auto array_column = ColumnArray::create(std::move(nested_column), std::move(offsets_column)); + auto array_column = create_string_array_column(labels); Block block; block.insert({std::move(col_resource), std::make_shared(), "resource"}); @@ -355,20 +352,7 @@ TEST(AIFunctionTest, AIClassifyTest) { auto col_resource = ColumnHelper::create_column(resources); auto col_text = ColumnHelper::create_column(texts); - - auto nested_column = ColumnString::create(); - auto offsets_column = ColumnOffset64::create(); - - IColumn::Offset offset = 0; - for (const auto& row : labels) { - for (const auto& value : row) { - nested_column->insert_data(value.data(), value.size()); - } - offset += row.size(); - offsets_column->insert_value(offset); - } - - auto array_column = ColumnArray::create(std::move(nested_column), std::move(offsets_column)); + auto array_column = create_string_array_column(labels); Block block; block.insert({std::move(col_resource), std::make_shared(), "resource"}); diff --git a/be/test/core/block/block_test.cpp b/be/test/core/block/block_test.cpp index ff80cc4c425de9..bb60f2fabf7136 100644 --- a/be/test/core/block/block_test.cpp +++ b/be/test/core/block/block_test.cpp @@ -35,12 +35,14 @@ #include "agent/be_exec_version_manager.h" #include "common/config.h" +#include "common/exception.h" #include "common/object_pool.h" #include "core/column/column.h" #include "core/column/column_array.h" #include "core/column/column_complex.h" #include "core/column/column_const.h" #include "core/column/column_decimal.h" +#include "core/column/column_dummy.h" #include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_vector.h" @@ -64,9 +66,55 @@ #include "runtime/descriptor_helper.h" #include "runtime/descriptors.h" #include "testutil/column_helper.h" +#include "util/debug_points.h" +#include "util/defer_op.h" namespace doris { +namespace { + +static constexpr auto CONVERT_COLUMN_IF_OVERFLOW_DEBUG_POINT = + "ColumnStr.convert_column_if_overflow.max_string_size"; + +class ThrowOnCloneColumn final : public COWHelper { +private: + friend class COWHelper; + + ThrowOnCloneColumn(size_t size, bool throw_on_clone, bool throw_on_clone_empty) + : _throw_on_clone(throw_on_clone), _throw_on_clone_empty(throw_on_clone_empty) { + s = size; + } + + ThrowOnCloneColumn(const ThrowOnCloneColumn&) = default; + + MutableColumnPtr clone() const override { + if (_throw_on_clone) { + throw Exception(ErrorCode::INTERNAL_ERROR, "injected clone failure"); + } + return MutableColumnPtr(new ThrowOnCloneColumn(*this)); + } + +public: + std::string get_name() const override { return "ThrowOnClone"; } + + MutableColumnPtr clone_dummy(size_t size) const override { + if (_throw_on_clone_empty) { + throw Exception(ErrorCode::INTERNAL_ERROR, "injected clone_empty failure"); + } + return ThrowOnCloneColumn::create(size, _throw_on_clone, _throw_on_clone_empty); + } + + bool structure_equals(const IColumn& rhs) const override { + return typeid(rhs) == typeid(ThrowOnCloneColumn); + } + +private: + bool _throw_on_clone = false; + bool _throw_on_clone_empty = false; +}; + +} // namespace + void block_to_pb( const Block& block, PBlock* pblock, segment_v2::CompressionTypePB compression_type = segment_v2::CompressionTypePB::SNAPPY) { @@ -895,11 +943,11 @@ TEST(BlockTest, merge_with_shared_columns) { Block temp_block({test_k1_temp, test_v1_temp, test_v2_temp}); - MutableBlock mutable_block(&src_block); + ScopedMutableBlock scoped_mutable_block(&src_block); + auto& mutable_block = scoped_mutable_block.mutable_block(); auto status = mutable_block.merge(temp_block); ASSERT_TRUE(status.ok()); - - src_block.set_columns(std::move(mutable_block.mutable_columns())); + scoped_mutable_block.restore(); for (auto& column : src_block.get_columns()) { EXPECT_EQ(1034, column->size()); @@ -955,6 +1003,52 @@ TEST(BlockTest, clear_blocks) { } } +TEST(BlockTest, merge_returns_error_and_restores_output_block) { + auto input_block = + ColumnHelper::create_block(std::vector {"abcde", "fghij"}); + input_block.insert(ColumnHelper::create_column_with_name({1, 2})); + auto output_block = ColumnHelper::create_block(std::vector {}); + + auto status = [&]() { + ScopedMutableBlock scoped_mutable_block(&output_block); + return scoped_mutable_block.mutable_block().merge(input_block); + }(); + ASSERT_FALSE(status.ok()); + EXPECT_NE(status.to_string().find("Merge block not match"), std::string::npos) + << status.to_string(); + + ASSERT_EQ(output_block.rows(), 0); + ASSERT_FALSE(output_block.get_by_position(0).column->is_column_string64()); +} + +TEST(BlockTest, merge_ignore_overflow_keeps_owned_accumulation_convertible) { + auto input_block = + ColumnHelper::create_block(std::vector {"abcde", "fghij"}); + auto output_block = ColumnHelper::create_block(std::vector {}); + + const auto origin_enable_debug_points = config::enable_debug_points; + config::enable_debug_points = true; + DebugPoints::instance()->add_with_params(CONVERT_COLUMN_IF_OVERFLOW_DEBUG_POINT, + {{"max_string_size", "9"}}); + Defer defer([origin_enable_debug_points]() { + DebugPoints::instance()->remove(CONVERT_COLUMN_IF_OVERFLOW_DEBUG_POINT); + config::enable_debug_points = origin_enable_debug_points; + }); + + ColumnPtr converted_column; + { + ScopedMutableBlock scoped_mutable_block(&output_block); + auto& mutable_block = scoped_mutable_block.mutable_block(); + auto status = mutable_block.merge_ignore_overflow(input_block); + ASSERT_TRUE(status.ok()) << status.to_string(); + converted_column = mutable_block.get_column_by_position(0)->convert_column_if_overflow(); + } + ASSERT_TRUE(converted_column->is_column_string64()); + ASSERT_EQ(converted_column->size(), 2); + EXPECT_EQ(converted_column->get_data_at(0).to_string(), "abcde"); + EXPECT_EQ(converted_column->get_data_at(1).to_string(), "fghij"); +} + TEST(BlockTest, replace_by_position) { auto block = ColumnHelper::create_block({1, 2, 3}); block.insert(0, ColumnHelper::create_column_with_name({"a", "b", "c"})); @@ -1023,7 +1117,7 @@ TEST(BlockTest, merge_impl_ignore_overflow) { block.insert(ColumnHelper::create_column_with_name({})); auto block2 = ColumnHelper::create_block({}); - auto mutable_block = MutableBlock::build_mutable_block(&block); + auto mutable_block = MutableBlock::build_mutable_block(std::move(block)); auto st = mutable_block.merge_ignore_overflow(std::move(block2)); ASSERT_FALSE(st.ok()); @@ -1274,7 +1368,8 @@ TEST(BlockTest, others) { ASSERT_EQ(block.get_by_position(0).type->get_primitive_type(), TYPE_INT); ASSERT_EQ(block.columns(), 1); - MutableBlock mutable_block(&block); + ScopedMutableBlock scoped_mutable_block(&block); + auto& mutable_block = scoped_mutable_block.mutable_block(); auto dumped = mutable_block.dump_data(); ASSERT_GT(dumped.size(), 0) << "Dumped data size: " << dumped.size(); auto dumped_json = mutable_block.dump_data_json(); @@ -1291,4 +1386,230 @@ TEST(BlockTest, others) { ASSERT_TRUE(dumped_names.empty()) << "Dumped names: " << dumped_names; } +TEST(BlockTest, ClearSelectedColumnDataClonesSharedColumn) { + auto type = std::make_shared(); + auto mutable_col0 = ColumnInt32::create(); + mutable_col0->insert_value(1); + mutable_col0->insert_value(2); + ColumnPtr old_col0 = mutable_col0->get_ptr(); + + auto mutable_col1 = ColumnInt32::create(); + mutable_col1->insert_value(10); + mutable_col1->insert_value(20); + ColumnPtr old_col1 = mutable_col1->get_ptr(); + + Block block; + block.insert({std::move(mutable_col0), type, "c0"}); + block.insert({std::move(mutable_col1), type, "c1"}); + + block.clear_column_data(std::vector {0}); + + EXPECT_EQ(block.get_by_position(0).column->size(), 0); + EXPECT_EQ(old_col0->size(), 2); + EXPECT_NE(block.get_by_position(0).column.get(), old_col0.get()); + EXPECT_EQ(block.get_by_position(1).column->size(), 2); + EXPECT_EQ(block.get_by_position(1).column.get(), old_col1.get()); +} + +TEST(BlockTest, ClearColumnDataPropagatesSharedCloneEmptyFailure) { + auto type = std::make_shared(); + auto mutable_col = ThrowOnCloneColumn::create(2, false, true); + ColumnPtr old_col = mutable_col->get_ptr(); + + Block block; + block.insert({std::move(mutable_col), type, "c0"}); + + EXPECT_THROW(block.clear_column_data(), Exception); + ASSERT_NE(block.get_by_position(0).column.get(), nullptr); + EXPECT_EQ(block.get_by_position(0).column.get(), old_col.get()); + EXPECT_EQ(block.get_by_position(0).column->size(), 2); + EXPECT_EQ(old_col->size(), 2); +} + +TEST(BlockTest, ClearSelectedColumnDataPropagatesSharedCloneEmptyFailure) { + auto type = std::make_shared(); + auto mutable_col = ThrowOnCloneColumn::create(2, false, true); + ColumnPtr old_col = mutable_col->get_ptr(); + + Block block; + block.insert({std::move(mutable_col), type, "c0"}); + + EXPECT_THROW(block.clear_column_data(std::vector {0}), Exception); + ASSERT_NE(block.get_by_position(0).column.get(), nullptr); + EXPECT_EQ(block.get_by_position(0).column.get(), old_col.get()); + EXPECT_EQ(block.get_by_position(0).column->size(), 2); + EXPECT_EQ(old_col->size(), 2); +} + +TEST(BlockTest, ScopedMutableColumnsRestoreOnErrorAndDetachSharedColumn) { + auto type = std::make_shared(); + auto mutable_col = ColumnInt32::create(); + mutable_col->insert_value(1); + mutable_col->insert_value(2); + ColumnPtr old_col = mutable_col->get_ptr(); + + Block block; + block.insert({std::move(mutable_col), type, "c0"}); + + auto status = [&]() -> Status { + auto columns_guard = block.mutate_columns_scoped(); + columns_guard.mutable_columns()[0]->insert(Field::create_field(3)); + return Status::InternalError("force early return"); + }(); + + EXPECT_FALSE(status.ok()); + EXPECT_EQ(block.rows(), 3); + EXPECT_EQ(old_col->size(), 2); + EXPECT_NE(block.get_by_position(0).column.get(), old_col.get()); +} + +TEST(BlockTest, ScopedMutableColumnsReadSchemaFromLiveBlock) { + auto type = std::make_shared(); + auto mutable_col = ColumnInt32::create(); + mutable_col->insert_value(1); + + Block block; + block.insert({std::move(mutable_col), type, "c0"}); + + auto columns_guard = block.mutate_columns_scoped(); + EXPECT_EQ(block.get_by_position(0).column.get(), nullptr); + EXPECT_EQ(&columns_guard.get_datatype_by_position(0), &block.get_by_position(0).type); + EXPECT_EQ(&columns_guard.get_name_by_position(0), &block.get_by_position(0).name); + EXPECT_EQ(columns_guard.get_datatype_by_position(0).get(), type.get()); + EXPECT_EQ(columns_guard.get_name_by_position(0), "c0"); +} + +TEST(BlockTest, ScopedMutableColumnsConstructorFailureRestoresAcquiredColumns) { + auto type = std::make_shared(); + auto mutable_col = ColumnInt32::create(); + mutable_col->insert_value(1); + mutable_col->insert_value(2); + const IColumn* old_col = mutable_col.get(); + + auto throwing_col = ThrowOnCloneColumn::create(2, true, false); + ColumnPtr old_throwing_col = throwing_col->get_ptr(); + + Block block; + block.insert({std::move(mutable_col), type, "c0"}); + block.insert({std::move(throwing_col), type, "throwing"}); + + EXPECT_THROW( + [&]() { + auto columns_guard = block.mutate_columns_scoped(); + static_cast(columns_guard); + }(), + Exception); + + ASSERT_NE(block.get_by_position(0).column.get(), nullptr); + ASSERT_NE(block.get_by_position(1).column.get(), nullptr); + EXPECT_EQ(block.get_by_position(0).column.get(), old_col); + EXPECT_EQ(block.get_by_position(0).column->size(), 2); + EXPECT_EQ(block.get_by_position(1).column.get(), old_throwing_col.get()); + EXPECT_EQ(block.get_by_position(1).column->size(), 2); +} + +TEST(BlockTest, ScopedMutableColumnRestoreOnErrorDetachSharedAndCreateMissingColumn) { + auto type = std::make_shared(); + auto mutable_col = ColumnInt32::create(); + mutable_col->insert_value(1); + mutable_col->insert_value(2); + ColumnPtr old_col = mutable_col->get_ptr(); + + Block block; + block.insert({std::move(mutable_col), type, "c0"}); + block.insert({nullptr, type, "empty"}); + + auto status = [&]() -> Status { + auto column_guard = block.mutate_column_scoped(0); + EXPECT_EQ(block.get_by_position(0).column.get(), nullptr); + column_guard.mutable_column()->insert(Field::create_field(3)); + return Status::InternalError("force early return"); + }(); + + EXPECT_FALSE(status.ok()); + EXPECT_EQ(block.get_by_position(0).column->size(), 3); + EXPECT_EQ(old_col->size(), 2); + EXPECT_NE(block.get_by_position(0).column.get(), old_col.get()); + + { + auto column_guard = block.mutate_column_scoped(1); + EXPECT_EQ(block.get_by_position(1).column.get(), nullptr); + column_guard.mutable_column()->insert(Field::create_field(10)); + } + + ASSERT_NE(block.get_by_position(1).column.get(), nullptr); + EXPECT_EQ(block.get_by_position(1).column->size(), 1); +} + +TEST(BlockTest, ScopedMutableColumnConstructorFailureKeepsOriginalColumn) { + auto type = std::make_shared(); + auto throwing_col = ThrowOnCloneColumn::create(2, true, false); + ColumnPtr old_throwing_col = throwing_col->get_ptr(); + + Block block; + block.insert({std::move(throwing_col), type, "throwing"}); + + EXPECT_THROW( + [&]() { + auto column_guard = block.mutate_column_scoped(0); + static_cast(column_guard); + }(), + Exception); + + ASSERT_NE(block.get_by_position(0).column.get(), nullptr); + EXPECT_EQ(block.get_by_position(0).column.get(), old_throwing_col.get()); + EXPECT_EQ(block.get_by_position(0).column->size(), 2); +} + +TEST(BlockTest, ScopedMutableBlockRestoreOnErrorAndDetachSharedColumn) { + auto type = std::make_shared(); + auto mutable_col = ColumnInt32::create(); + mutable_col->insert_value(1); + mutable_col->insert_value(2); + ColumnPtr old_col = mutable_col->get_ptr(); + + Block block; + block.insert({std::move(mutable_col), type, "c0"}); + + auto status = [&]() -> Status { + ScopedMutableBlock scoped_block(&block); + scoped_block.mutable_columns()[0]->insert(Field::create_field(3)); + return Status::InternalError("force early return"); + }(); + + EXPECT_FALSE(status.ok()); + EXPECT_EQ(block.rows(), 3); + EXPECT_EQ(old_col->size(), 2); + EXPECT_NE(block.get_by_position(0).column.get(), old_col.get()); +} + +TEST(BlockTest, ScopedMutableBlockConstructorFailureRestoresBlockColumns) { + auto type = std::make_shared(); + auto mutable_col = ColumnInt32::create(); + mutable_col->insert_value(1); + mutable_col->insert_value(2); + const IColumn* old_col = mutable_col.get(); + + auto throwing_col = ThrowOnCloneColumn::create(2, true, false); + ColumnPtr old_throwing_col = throwing_col->get_ptr(); + + Block block; + block.insert({std::move(mutable_col), type, "c0"}); + block.insert({std::move(throwing_col), type, "throwing"}); + + EXPECT_THROW( + [&]() { + ScopedMutableBlock scoped_block(&block); + static_cast(scoped_block); + }(), + Exception); + + ASSERT_NE(block.get_by_position(0).column.get(), nullptr); + ASSERT_NE(block.get_by_position(1).column.get(), nullptr); + EXPECT_EQ(block.get_by_position(0).column.get(), old_col); + EXPECT_EQ(block.get_by_position(0).column->size(), 2); + EXPECT_EQ(block.get_by_position(1).column.get(), old_throwing_col.get()); + EXPECT_EQ(block.get_by_position(1).column->size(), 2); +} + } // namespace doris diff --git a/be/test/core/block/column_map_test.cpp b/be/test/core/block/column_map_test.cpp index 0be1bf8fc8c60d..c0b8930695ddd4 100644 --- a/be/test/core/block/column_map_test.cpp +++ b/be/test/core/block/column_map_test.cpp @@ -347,6 +347,120 @@ TEST(ColumnMapTest2, StringKeyTestDuplicatedKeysNestedMap) { ASSERT_EQ(v2_values[1].get(), 333); }; +TEST(ColumnMapTest2, SharedCreatePreservesImmutableSubcolumns) { + auto keys_mut = ColumnString::create(); + keys_mut->insert_data("k", 1); + ColumnPtr keys = std::move(keys_mut); + ColumnPtr keys_alias = keys; + + auto values_mut = ColumnInt32::create(); + values_mut->insert_value(1); + ColumnPtr values = std::move(values_mut); + ColumnPtr values_alias = values; + + auto offsets_mut = ColumnArray::ColumnOffsets::create(); + offsets_mut->get_data().push_back(1); + ColumnPtr offsets = std::move(offsets_mut); + ColumnPtr offsets_alias = offsets; + + auto map_column = ColumnMap::create(keys, values, offsets); + EXPECT_EQ(map_column->get_keys_ptr().get(), keys_alias.get()); + EXPECT_EQ(map_column->get_values_ptr().get(), values_alias.get()); + EXPECT_EQ(map_column->get_offsets_ptr().get(), offsets_alias.get()); +} + +TEST(ColumnMapTest2, ConstFilterAndPermuteKeepInputAliasesUntouched) { + auto keys_mut = ColumnString::create(); + keys_mut->insert_data("a", 1); + keys_mut->insert_data("b", 1); + keys_mut->insert_data("c", 1); + ColumnPtr keys = std::move(keys_mut); + ColumnPtr keys_alias = keys; + + auto values_mut = ColumnInt32::create(); + values_mut->insert_value(1); + values_mut->insert_value(2); + values_mut->insert_value(3); + ColumnPtr values = std::move(values_mut); + ColumnPtr values_alias = values; + + auto offsets_mut = ColumnArray::ColumnOffsets::create(); + offsets_mut->get_data().push_back(2); + offsets_mut->get_data().push_back(3); + ColumnPtr offsets = std::move(offsets_mut); + ColumnPtr offsets_alias = offsets; + + auto map_column = ColumnMap::create(keys, values, offsets); + + IColumn::Filter filter; + filter.push_back(0); + filter.push_back(1); + auto filtered = map_column->filter(filter, 1); + const auto& filtered_map = assert_cast(*filtered); + EXPECT_EQ(filtered_map.size(), 1); + EXPECT_EQ(filtered_map.get_keys().size(), 1); + EXPECT_EQ(assert_cast(filtered_map.get_values()).get_element(0), 3); + + IColumn::Permutation perm; + perm.push_back(1); + perm.push_back(0); + auto permuted = map_column->permute(perm, 0); + const auto& permuted_map = assert_cast(*permuted); + EXPECT_EQ(permuted_map.size(), 2); + EXPECT_EQ(permuted_map.get_offsets()[0], 1); + EXPECT_EQ(permuted_map.get_offsets()[1], 3); + + EXPECT_EQ(keys_alias->size(), 3); + EXPECT_EQ(values_alias->size(), 3); + EXPECT_EQ(offsets_alias->size(), 2); +} + +TEST(ColumnMapTest2, DeduplicateNestedNullableMapValuesDetachesSharedValueColumn) { + auto inner_values = ColumnMap::create(ColumnString::create(), ColumnInt32::create(), + ColumnArray::ColumnOffsets::create()); + Map inner_map; + inner_map.push_back(Field::create_field( + Array {Field::create_field("a"), Field::create_field("a")})); + inner_map.push_back(Field::create_field( + Array {Field::create_field(1), Field::create_field(2)})); + inner_values->insert(Field::create_field(inner_map)); + + ColumnPtr shared_inner_values = std::move(inner_values); + ColumnPtr inner_values_alias = shared_inner_values; + + auto null_map_mut = ColumnUInt8::create(); + null_map_mut->insert_value(0); + ColumnPtr null_map = std::move(null_map_mut); + ColumnPtr nullable_values = ColumnNullable::create(shared_inner_values, null_map); + + auto outer_keys_mut = ColumnString::create(); + outer_keys_mut->insert_data("outer", 5); + ColumnPtr outer_keys = std::move(outer_keys_mut); + + auto outer_offsets_mut = ColumnArray::ColumnOffsets::create(); + outer_offsets_mut->get_data().push_back(1); + ColumnPtr outer_offsets = std::move(outer_offsets_mut); + + auto outer_map = ColumnMap::create(outer_keys, nullable_values, outer_offsets); + auto st = outer_map->deduplicate_keys(true); + ASSERT_TRUE(st.ok()) << st.to_string(); + + const auto& alias_inner_map = assert_cast(*inner_values_alias); + EXPECT_EQ(alias_inner_map.get_keys().size(), 2); + EXPECT_EQ(alias_inner_map.get_values().size(), 2); + + const auto& outer_map_ref = *outer_map; + const auto& outer_values_nullable = + assert_cast(outer_map_ref.get_values()); + const auto& deduplicated_inner_map = + assert_cast(outer_values_nullable.get_nested_column()); + EXPECT_EQ(deduplicated_inner_map.get_keys().size(), 1); + EXPECT_EQ(deduplicated_inner_map.get_values().size(), 1); + EXPECT_EQ(deduplicated_inner_map.get_keys().get_data_at(0).to_string(), "a"); + EXPECT_EQ(assert_cast(deduplicated_inner_map.get_values()).get_element(0), + 2); +} + TEST(ColumnMapTest2, StringValueTest) { auto col_map_str64 = ColumnMap(ColumnInt64::create(), ColumnString64::create(), ColumnArray::ColumnOffsets::create()); @@ -415,4 +529,4 @@ TEST(ColumnMapTest2, StringValueTest) { EXPECT_EQ(v[i], v3[i]); } }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/core/block/column_nullable_test.cpp b/be/test/core/block/column_nullable_test.cpp index dc837c335b13d1..0b92d1813fb8fb 100644 --- a/be/test/core/block/column_nullable_test.cpp +++ b/be/test/core/block/column_nullable_test.cpp @@ -44,7 +44,7 @@ TEST(ColumnNullableTest, HashTest) { nullable_column->update_hash_with_value(0, hashes[1]); EXPECT_EQ(hashes[0].get64(), hashes[1].get64()); - auto& null_map = ((ColumnNullable)(*nullable_column)).get_null_map_data(); + auto& null_map = nullable_column->get_null_map_data(); null_map[1] = true; column->update_hash_with_value(1, hashes[0]); nullable_column->update_hash_with_value(1, hashes[1]); diff --git a/be/test/core/block/column_test.cpp b/be/test/core/block/column_test.cpp index d25160d6fafa2f..69af3ca5bc8563 100644 --- a/be/test/core/block/column_test.cpp +++ b/be/test/core/block/column_test.cpp @@ -105,6 +105,25 @@ TEST_F(ColumnTest, CutColumnDecimal64) { EXPECT_THROW({ col_dcm->cut(0, 10); }, doris::Exception); } +TEST_F(ColumnTest, AssumeMutableRequiresExclusiveOwnership) { + ColumnPtr column = ColumnInt64::create(); + { + auto mutable_column = column->assume_mutable(); + assert_cast(mutable_column.get())->insert_value(1); + } + + ColumnPtr alias = column; + EXPECT_THROW({ (void)column->assume_mutable(); }, doris::Exception); + EXPECT_THROW({ (void)column->assume_mutable_ref(); }, doris::Exception); + + auto cloned = IColumn::mutate(std::move(column)); + auto* cloned_int = assert_cast(cloned.get()); + cloned_int->insert_value(2); + + EXPECT_EQ(alias->size(), 1); + EXPECT_EQ(cloned_int->size(), 2); +} + TEST_F(ColumnTest, ShrinkColumnString) { auto shrunk_col = col_str->shrink(2); EXPECT_EQ(shrunk_col->size(), 2); diff --git a/be/test/core/column/column_array_test.cpp b/be/test/core/column/column_array_test.cpp index 7b2ec0a2544ff2..68a175d3c853aa 100644 --- a/be/test/core/column/column_array_test.cpp +++ b/be/test/core/column/column_array_test.cpp @@ -611,6 +611,30 @@ TEST_F(ColumnArrayTest, ShrinkPaddingCharsTest) { } //////////////////////// special function from column_array.h //////////////////////// +TEST_F(ColumnArrayTest, SharedCreateValidatesOffsetsAndDataSize) { + auto data_mut = ColumnInt32::create(); + data_mut->insert_value(1); + data_mut->insert_value(2); + ColumnPtr data = std::move(data_mut); + + EXPECT_ANY_THROW({ auto array_column = ColumnArray::create(data); }); + + auto bad_offsets_mut = ColumnArray::ColumnOffsets::create(); + bad_offsets_mut->get_data().push_back(1); + ColumnPtr bad_offsets = std::move(bad_offsets_mut); + EXPECT_ANY_THROW({ auto array_column = ColumnArray::create(data, bad_offsets); }); + + ColumnPtr wrong_offsets = ColumnUInt8::create(); + EXPECT_ANY_THROW({ auto array_column = ColumnArray::create(data, wrong_offsets); }); + + auto good_offsets_mut = ColumnArray::ColumnOffsets::create(); + good_offsets_mut->get_data().push_back(2); + ColumnPtr good_offsets = std::move(good_offsets_mut); + auto array_column = ColumnArray::create(data, good_offsets); + EXPECT_EQ(array_column->get_data_ptr().get(), data.get()); + EXPECT_EQ(array_column->get_offsets_ptr().get(), good_offsets.get()); +} + TEST_F(ColumnArrayTest, CreateArrayTest) { // Test ColumnArray constructor constraints: nested_column and offsets_column must not be ColumnConst. // The constructor enforces this via check_const_only_in_top_level(), preventing COW-related issues: @@ -688,8 +712,7 @@ TEST_F(ColumnArrayTest, ConvertIfOverflowAndInsertTest) { // check ptr is itself auto ptr = column->convert_column_if_overflow(); EXPECT_EQ(ptr.get(), column.get()); - auto arr_col = - check_and_get_column(remove_nullable(column->assume_mutable()).get()); + auto arr_col = check_and_get_column(remove_nullable(column->get_ptr()).get()); auto nested_col = arr_col->get_data_ptr(); auto array_col1 = check_and_get_column(remove_nullable(ptr).get()); auto nested_col1 = array_col1->get_data_ptr(); diff --git a/be/test/core/column/column_ip_test.cpp b/be/test/core/column/column_ip_test.cpp index fc03446e45503b..05cf6034ed37e5 100644 --- a/be/test/core/column/column_ip_test.cpp +++ b/be/test/core/column/column_ip_test.cpp @@ -77,32 +77,32 @@ class ColumnIPTest : public CommonColumnTest { TEST_F(ColumnIPTest, InsertRangeFromTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_range_from_callback); } TEST_F(ColumnIPTest, InsertManyFromTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_many_from_callback); } TEST_F(ColumnIPTest, InsertIndicesFromTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_indices_from_callback); } TEST_F(ColumnIPTest, InsertDefaultTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); // ipv4 default value is '0.0.0.0' and ipv6 default value is '::' check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_default_callback); } @@ -110,31 +110,31 @@ TEST_F(ColumnIPTest, InsertDefaultTest) { TEST_F(ColumnIPTest, InsertManyDefaultsTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_insert_many_defaults_callback); } TEST_F(ColumnIPTest, GetDataAtTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_get_data_at_callback); } TEST_F(ColumnIPTest, FieldTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_field_callback); } TEST_F(ColumnIPTest, GetRawDataTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, {serde[1]}, ';', {2}, data_files[0], assert_get_raw_data_callback); } @@ -142,8 +142,8 @@ TEST_F(ColumnIPTest, GetRawDataTest) { TEST_F(ColumnIPTest, SerDeVecTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); ser_deser_vec(ip_cols, {dt_ipv4, dt_ipv6}); } @@ -151,8 +151,8 @@ TEST_F(ColumnIPTest, SerDeVecTest) { TEST_F(ColumnIPTest, serDeserializeWithArenaImpl) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); ser_deserialize_with_arena_impl(ip_cols, {dt_ipv4, dt_ipv6}); @@ -161,16 +161,16 @@ TEST_F(ColumnIPTest, serDeserializeWithArenaImpl) { TEST_F(ColumnIPTest, SizeTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_size_callback); } TEST_F(ColumnIPTest, ByteSizeTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_byte_size_callback); } @@ -178,8 +178,8 @@ TEST_F(ColumnIPTest, ByteSizeTest) { TEST_F(ColumnIPTest, AllocateBytesTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_allocated_bytes_callback); } @@ -187,8 +187,8 @@ TEST_F(ColumnIPTest, AllocateBytesTest) { TEST_F(ColumnIPTest, PopbackTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_pop_back_callback); } @@ -197,18 +197,18 @@ TEST_F(ColumnIPTest, CloneTest) { // we test the column with clone_resize, clone_empty for assert size and ptr // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); - assert_clone_empty(column_ipv4->assume_mutable_ref()); - assert_clone_empty(column_ipv6->assume_mutable_ref()); + assert_clone_empty(ip_cols[0]->assume_mutable_ref()); + assert_clone_empty(ip_cols[1]->assume_mutable_ref()); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_clone_resized_callback); } TEST_F(ColumnIPTest, CutTest) { MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_cut_callback); } @@ -216,24 +216,24 @@ TEST_F(ColumnIPTest, CutTest) { TEST_F(ColumnIPTest, ResizeTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_resize_callback); } TEST_F(ColumnIPTest, ReserveTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_reserve_callback); } TEST_F(ColumnIPTest, ReplaceColumnTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); // replace_column_data @@ -246,26 +246,26 @@ TEST_F(ColumnIPTest, ReplaceColumnTest) { TEST_F(ColumnIPTest, AppendDataBySelectorTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_append_data_by_selector_callback); } TEST_F(ColumnIPTest, PermutationAndSortTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[1], ';', {1, 2}); - assert_column_permutations(column_ipv4->assume_mutable_ref(), dt_ipv4); - assert_column_permutations(column_ipv6->assume_mutable_ref(), dt_ipv6); + assert_column_permutations(ip_cols[0]->assume_mutable_ref(), dt_ipv4); + assert_column_permutations(ip_cols[1]->assume_mutable_ref(), dt_ipv6); } TEST_F(ColumnIPTest, FilterTest) { // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); check_data(ip_cols, serde, ';', {1, 2}, data_files[0], assert_filter_callback); } @@ -274,8 +274,8 @@ TEST_F(ColumnIPTest, HashTest) { // XXHash // insert from data csv and assert insert result MutableColumns ip_cols; - ip_cols.push_back(column_ipv4->get_ptr()); - ip_cols.push_back(column_ipv6->get_ptr()); + ip_cols.push_back(std::move(column_ipv4)); + ip_cols.push_back(std::move(column_ipv6)); load_data_from_csv(serde, ip_cols, data_files[0], ';', {1, 2}); // update_hashes_with_value diff --git a/be/test/core/column/column_nullable_test.cpp b/be/test/core/column/column_nullable_test.cpp index 3d3b45e218bdef..799cc6a9826059 100644 --- a/be/test/core/column/column_nullable_test.cpp +++ b/be/test/core/column/column_nullable_test.cpp @@ -49,7 +49,7 @@ TEST(ColumnNullableTest, NullTest) { dst_col->clear(); EXPECT_FALSE(dst_col->has_null()); dst_col->insert_range_from( - *ColumnNullable::create(std::move(source_col), ColumnUInt8::create(10, 0)), 5, 5); + *ColumnNullable::create(std::move(source_col), ColumnUInt8::create(100, 0)), 5, 5); EXPECT_FALSE(dst_col->has_null()); dst_col->clear(); @@ -81,6 +81,15 @@ TEST(ColumnNullableTest, NullTest) { EXPECT_TRUE(dst_col->has_null()); } +TEST(ColumnNullableTest, CreateRejectsMismatchedNestedAndNullMapSizes) { + EXPECT_THROW( + { + auto nullable = ColumnNullable::create(create_nested_column(100), + ColumnUInt8::create(10, 0)); + }, + doris::Exception); +} + TEST(ColumnNullableTest, PredicateTest) { auto nullable_pred = ColumnNullable::create(PredicateColumnType::create(), ColumnUInt8::create()); @@ -103,6 +112,25 @@ TEST(ColumnNullableTest, PredicateTest) { EXPECT_TRUE(null_dst->has_null()); } +TEST(ColumnNullableTest, SharedCreatePreservesImmutableSubcolumns) { + auto nested_mut = ColumnInt64::create(); + nested_mut->insert_value(10); + ColumnPtr nested = std::move(nested_mut); + ColumnPtr nested_alias = nested; + + auto null_map_mut = ColumnUInt8::create(); + null_map_mut->insert_value(0); + ColumnPtr null_map = std::move(null_map_mut); + ColumnPtr null_map_alias = null_map; + + auto nullable = ColumnNullable::create(nested, null_map); + const auto& nullable_ref = *nullable; + EXPECT_EQ(nullable_ref.get_nested_column_ptr().get(), nested_alias.get()); + EXPECT_EQ(nullable_ref.get_null_map_column_ptr().get(), null_map_alias.get()); + EXPECT_EQ(nested_alias->size(), 1); + EXPECT_EQ(null_map_alias->size(), 1); +} + TEST(ColumnNullableTest, append_data_by_selector) { auto srt_column = ColumnHelper::create_nullable_column( {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, @@ -206,4 +234,4 @@ TEST(ColumnNullableTest, ScalaTypeNullStringTest2erase) { } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/core/column/column_variant_test.cpp b/be/test/core/column/column_variant_test.cpp index ebf59b77345d14..71f007a73c1615 100644 --- a/be/test/core/column/column_variant_test.cpp +++ b/be/test/core/column/column_variant_test.cpp @@ -1178,9 +1178,11 @@ TEST_F(ColumnVariantTest, field_test) { ColumnVariant::MutablePtr obj; obj = ColumnVariant::create(1, false); MutableColumns cols; - cols.push_back(obj->get_ptr()); + cols.push_back(std::move(obj)); const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl"; load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj); + obj = ColumnVariant::cast_to_column_mutptr(assert_cast(cols[0].get())); + cols.clear(); EXPECT_TRUE(!obj->empty()); test_func(obj); } @@ -2122,13 +2124,16 @@ TEST_F(ColumnVariantTest, fill_path_column_from_sparse_data) { ColumnVariant::MutablePtr obj; obj = ColumnVariant::create(1, false); MutableColumns cols; - cols.push_back(obj->get_ptr()); + cols.push_back(std::move(obj)); const auto& json_file_obj = test_data_dir_json + "json_variant/object_boundary.jsonl"; load_columns_data_from_file(cols, serde, '\n', {0}, json_file_obj); + obj = ColumnVariant::cast_to_column_mutptr(assert_cast(cols[0].get())); + cols.clear(); EXPECT_TRUE(!obj->empty()); auto sparse_col = obj->get_sparse_column(); auto cloned_sparse = sparse_col->clone_empty(); - auto& offsets = obj->serialized_sparse_column_offsets(); + const auto& offsets = + static_cast(*obj).serialized_sparse_column_offsets(); for (size_t i = 0; i != offsets.size(); ++i) { auto start = offsets[i - 1]; auto end = offsets[i]; diff --git a/be/test/core/column/common_column_test.h b/be/test/core/column/common_column_test.h index ac4ed5eff76582..4a283670daf029 100644 --- a/be/test/core/column/common_column_test.h +++ b/be/test/core/column/common_column_test.h @@ -634,11 +634,15 @@ class CommonColumnTest : public ::testing::Test { Block block; for (size_t i = 0; i < load_cols.size(); ++i) { ColumnWithTypeAndName columnTypeAndName; - columnTypeAndName.column = load_cols[i]->assume_mutable(); + columnTypeAndName.column = load_cols[i]->get_ptr(); columnTypeAndName.type = types[i]; block.insert(columnTypeAndName); } - MutableBlock mb = MutableBlock::build_mutable_block(&block); + MutableBlock mb = MutableBlock::build_mutable_block(std::move(block)); + // Rebuild block from load_cols after build_mutable_block stole the column pointers + for (size_t i = 0; i < load_cols.size(); ++i) { + block.get_by_position(i).column = load_cols[i]->get_ptr(); + } // step2. to construct a block for assert_cols Block assert_block; Block empty_block; @@ -649,7 +653,7 @@ class CommonColumnTest : public ::testing::Test { assert_block.insert(columnTypeAndName); empty_block.insert(columnTypeAndName); } - MutableBlock assert_mb = MutableBlock::build_mutable_block(&empty_block); + MutableBlock assert_mb = MutableBlock::build_mutable_block(std::move(empty_block)); // step3. to insert data from load_cols to assert_cols Status st = mb.merge_impl_ignore_overflow(assert_block); EXPECT_TRUE(st.ok()) << "Failed to merge block: " << st.to_string(); @@ -691,7 +695,9 @@ class CommonColumnTest : public ::testing::Test { continue; } else if (*pos + *cl > source_column->size()) { if (is_column( - remove_nullable(source_column->assume_mutable()).get())) { + remove_nullable(static_cast(source_column.get()) + ->get_ptr()) + .get())) { // insert_range_from in array has DCHECK_LG continue; } @@ -3544,13 +3550,13 @@ auto assert_column_vector_serialize_vec_callback = [](auto x, if (test_null_map) { cloned_target_column->serialize(input_keys.data(), rows); deser_column_wrapper = cloned_target_column->clone_empty(); - deser_column = ((ColumnNullable*)deser_column_wrapper.get())->get_nested_column_ptr(); } else { target_column->serialize(input_keys.data(), rows); deser_column = source_column->clone_empty(); } if (test_null_map) { deser_column_wrapper->deserialize(input_keys.data(), rows); + deser_column = ((ColumnNullable*)deser_column_wrapper.get())->get_nested_column_ptr(); } else { deser_column->deserialize(input_keys.data(), rows); } @@ -3944,4 +3950,4 @@ auto assert_byte_size_with_file_callback = [](const MutableColumns& load_cols, test_func(false); }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/core/data_type/common_data_type_serder_test.h b/be/test/core/data_type/common_data_type_serder_test.h index d968cc1213e92d..a7393b9d8eee0c 100644 --- a/be/test/core/data_type/common_data_type_serder_test.h +++ b/be/test/core/data_type/common_data_type_serder_test.h @@ -277,7 +277,7 @@ class CommonDataTypeSerdeTest : public ::testing::Test { jsonb_column->reserve(load_cols[0]->size()); MutableColumns assert_cols; for (size_t i = 0; i < load_cols.size(); ++i) { - assert_cols.push_back(load_cols[i]->assume_mutable()); + assert_cols.push_back(load_cols[i]->clone_empty()); } DataTypeSerDe::FormatOptions options; auto tz = cctz::utc_time_zone(); diff --git a/be/test/core/data_type/complex_type_test.cpp b/be/test/core/data_type/complex_type_test.cpp index 54dc360e2a8fa1..f4f9654a15a05b 100644 --- a/be/test/core/data_type/complex_type_test.cpp +++ b/be/test/core/data_type/complex_type_test.cpp @@ -20,8 +20,17 @@ #include #include +#include +#include "agent/be_exec_version_manager.h" +#include "core/assert_cast.h" #include "core/column/column.h" +#include "core/column/column_array.h" +#include "core/column/column_map.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_struct.h" +#include "core/column/column_vector.h" #include "core/data_type/data_type.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_map.h" @@ -34,6 +43,25 @@ namespace doris { +namespace { + +std::vector serialize_column(const DataTypePtr& type, const ColumnPtr& column) { + const int be_exec_version = BeExecVersionManager::get_newest_version(); + std::vector buf(type->get_uncompressed_serialized_bytes(*column, be_exec_version)); + char* end = type->serialize(*column, buf.data(), be_exec_version); + EXPECT_EQ(buf.data() + buf.size(), end); + return buf; +} + +void deserialize_column(const DataTypePtr& type, const std::vector& buf, + MutableColumnPtr* column) { + const int be_exec_version = BeExecVersionManager::get_newest_version(); + const char* end = type->deserialize(buf.data(), column, be_exec_version); + EXPECT_EQ(buf.data() + buf.size(), end); +} + +} // namespace + TEST(ComplexTypeTest, CreateColumnConstWithDefaultValue) { DataTypePtr n1 = std::make_shared(std::make_shared()); DataTypePtr n2 = std::make_shared(std::make_shared()); @@ -70,4 +98,102 @@ TEST(ComplexTypeTest, CreateColumnConstWithDefaultValue) { col_a->get(0, af); EXPECT_EQ(PrimitiveType::TYPE_ARRAY, af.get_type()); } + +TEST(ComplexTypeTest, DeserializeArrayWritesBackSharedNestedColumn) { + DataTypePtr nested_type = std::make_shared(); + DataTypePtr array_type = std::make_shared(nested_type); + + auto src_column = array_type->create_column(); + src_column->insert(Field::create_field( + Array {Field::create_field(1), Field::create_field(2)})); + src_column->insert(Field::create_field(Array {Field::create_field(3)})); + auto buf = serialize_column(array_type, src_column->get_ptr()); + + ColumnPtr shared_nested_data_column = ColumnInt32::create(); + ColumnPtr shared_nested_null_map_column = ColumnUInt8::create(); + ColumnPtr shared_nested_column = + ColumnNullable::create(shared_nested_data_column, shared_nested_null_map_column); + ColumnPtr shared_offsets_column = ColumnArray::ColumnOffsets::create(); + MutableColumnPtr dst_column = ColumnArray::create(shared_nested_column, shared_offsets_column); + deserialize_column(array_type, buf, &dst_column); + + const auto& array_column = assert_cast(*dst_column); + EXPECT_EQ(2, array_column.size()); + EXPECT_EQ(0, shared_nested_data_column->size()); + EXPECT_EQ(0, shared_nested_null_map_column->size()); + EXPECT_EQ(0, shared_offsets_column->size()); + EXPECT_EQ(3, array_column.get_data().size()); + EXPECT_EQ(2, array_column.get_offsets()[0]); + EXPECT_EQ(3, array_column.get_offsets()[1]); + + const auto& nullable_data = assert_cast(array_column.get_data()); + const auto& data = + assert_cast(nullable_data.get_nested_column()).get_data(); + EXPECT_EQ(1, data[0]); + EXPECT_EQ(2, data[1]); + EXPECT_EQ(3, data[2]); + EXPECT_FALSE(nullable_data.has_null()); +} + +TEST(ComplexTypeTest, DeserializeMapWritesBackSharedKeyAndValueColumns) { + DataTypePtr key_type = std::make_shared(); + DataTypePtr value_type = std::make_shared(); + DataTypePtr map_type = std::make_shared(key_type, value_type); + + auto src_column = map_type->create_column(); + Map map; + map.push_back(Field::create_field( + Array {Field::create_field(10), Field::create_field(20)})); + map.push_back(Field::create_field( + Array {Field::create_field("a"), Field::create_field("b")})); + src_column->insert(Field::create_field(map)); + auto buf = serialize_column(map_type, src_column->get_ptr()); + + ColumnPtr shared_keys_column = ColumnInt32::create(); + ColumnPtr shared_values_column = ColumnString::create(); + ColumnPtr offsets_column = ColumnArray::ColumnOffsets::create(); + MutableColumnPtr dst_column = + ColumnMap::create(shared_keys_column, shared_values_column, offsets_column); + deserialize_column(map_type, buf, &dst_column); + + const auto& map_column = assert_cast(*dst_column); + EXPECT_EQ(1, map_column.size()); + EXPECT_EQ(0, shared_keys_column->size()); + EXPECT_EQ(0, shared_values_column->size()); + EXPECT_EQ(0, offsets_column->size()); + EXPECT_EQ(2, map_column.get_keys().size()); + EXPECT_EQ(2, map_column.get_values().size()); + + const auto& keys = assert_cast(map_column.get_keys()).get_data(); + EXPECT_EQ(10, keys[0]); + EXPECT_EQ(20, keys[1]); + EXPECT_EQ("a", map_column.get_values().get_data_at(0).to_string()); + EXPECT_EQ("b", map_column.get_values().get_data_at(1).to_string()); +} + +TEST(ComplexTypeTest, DeserializeStructWritesBackSharedChildren) { + DataTypes children_types {std::make_shared(), + std::make_shared()}; + DataTypePtr struct_type = std::make_shared(children_types); + + auto src_column = struct_type->create_column(); + src_column->insert(Field::create_field( + Struct {Field::create_field(7), Field::create_field("seven")})); + auto buf = serialize_column(struct_type, src_column->get_ptr()); + + ColumnPtr shared_int_column = ColumnInt32::create(); + ColumnPtr shared_string_column = ColumnString::create(); + Columns shared_columns {shared_int_column, shared_string_column}; + MutableColumnPtr dst_column = ColumnStruct::create(shared_columns); + deserialize_column(struct_type, buf, &dst_column); + + const auto& struct_column = assert_cast(*dst_column); + EXPECT_EQ(1, struct_column.size()); + EXPECT_EQ(0, shared_int_column->size()); + EXPECT_EQ(0, shared_string_column->size()); + + const auto& ints = assert_cast(struct_column.get_column(0)).get_data(); + EXPECT_EQ(7, ints[0]); + EXPECT_EQ("seven", struct_column.get_column(1).get_data_at(0).to_string()); +} } // namespace doris diff --git a/be/test/core/data_type/data_type_array_test.cpp b/be/test/core/data_type/data_type_array_test.cpp index ebc6f3eedb8d42..819bf33f227fa2 100644 --- a/be/test/core/data_type/data_type_array_test.cpp +++ b/be/test/core/data_type/data_type_array_test.cpp @@ -27,6 +27,9 @@ #include #include "core/column/column.h" +#include "core/column/column_array.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" #include "core/data_type/common_data_type_serder_test.h" #include "core/data_type/common_data_type_test.h" #include "core/data_type/data_type.h" @@ -420,6 +423,27 @@ TEST_F(DataTypeArrayTest, CreateColumnTest) { } } +TEST_F(DataTypeArrayTest, CreateColumnUsesNullableNestedColumn) { + auto nested_type = std::make_shared(); + auto array_type = std::make_shared(nested_type); + EXPECT_TRUE(array_type->get_nested_type()->is_nullable()); + + auto column = array_type->create_column(); + auto& array_column = assert_cast(*column); + auto& nested_column = assert_cast(array_column.get_data()); + array_column.insert(Field::create_field( + Array {Field::create_field(1), Field::create_field(2)})); + + EXPECT_EQ(1, array_column.size()); + EXPECT_EQ(2, nested_column.size()); + EXPECT_FALSE(nested_column.has_null()); + EXPECT_TRUE(array_type->check_column(*column).ok()); + + auto old_shape_column = + ColumnArray::create(ColumnInt32::create(), ColumnArray::ColumnOffsets::create()); + EXPECT_FALSE(array_type->check_column(*old_shape_column).ok()); +} + TEST_F(DataTypeArrayTest, GetFieldTest) { TExprNode node; node.node_type = TExprNodeType::ARRAY_LITERAL; diff --git a/be/test/core/data_type_serde/data_type_serde_csv_test.cpp b/be/test/core/data_type_serde/data_type_serde_csv_test.cpp index 0478507cab0844..84bce05751a061 100644 --- a/be/test/core/data_type_serde/data_type_serde_csv_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_csv_test.cpp @@ -512,8 +512,9 @@ TEST(CsvSerde, ComplexTypeSerdeSchemaChangedCsvTest) { DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); Status st = serde->deserialize_one_cell_from_hive_text(*col, slice, formatOptions); EXPECT_EQ(st, Status::OK()); - auto struct_col = static_cast( - static_cast(*col.get()).get_nested_column()); + // Use const access for read-only assertions: avoids assume_mutable_ref() on sub-columns. + const auto& struct_col = static_cast( + static_cast(*col.get()).get_nested_column()); EXPECT_EQ(struct_col.get_column(0).get_data_at(0).to_string(), "false"); EXPECT_EQ(struct_col.get_column(1).get_data_at(0).to_string(), "example"); @@ -537,11 +538,11 @@ TEST(CsvSerde, ComplexTypeSerdeSchemaChangedCsvTest) { DataTypeSerDeSPtr serde = data_type_ptr->get_serde(); Status st = serde->deserialize_one_cell_from_hive_text(*col, slice, formatOptions); EXPECT_EQ(st, Status::OK()); - auto array_col = static_cast( - static_cast(*col.get()).get_nested_column()); + const auto& array_col = static_cast( + static_cast(*col.get()).get_nested_column()); - auto string_col = static_cast( - static_cast(array_col.get_data()).get_nested_column()); + const auto& string_col = static_cast( + static_cast(array_col.get_data()).get_nested_column()); EXPECT_EQ(string_col.get_data_at(0).to_string(), "1\003example"); EXPECT_EQ(string_col.get_data_at(1).to_string(), "2\003test"); } diff --git a/be/test/core/data_type_serde/data_type_serde_struct_test.cpp b/be/test/core/data_type_serde/data_type_serde_struct_test.cpp index e583b50e4302f2..5158ab01c75f12 100644 --- a/be/test/core/data_type_serde/data_type_serde_struct_test.cpp +++ b/be/test/core/data_type_serde/data_type_serde_struct_test.cpp @@ -144,10 +144,9 @@ TEST_F(DataTypeStructSerDeTest, ArrowMemNotAligned) { EXPECT_EQ(string_values_address % 4, 1); // 5.Test read_column_from_arrow - std::vector vector_columns; - vector_columns.emplace_back(ColumnInt32::create()); - vector_columns.emplace_back(ColumnString::create()); - auto ser_col = ColumnStruct::create(vector_columns); + // Create sub-columns exclusively (no extra refs) so that ColumnStruct::get_column() + // non-const path does not find use_count > 1. + auto ser_col = ColumnStruct::create(Columns {ColumnInt32::create(), ColumnString::create()}); cctz::time_zone tz; DataTypeSerDeSPtrs elem_serdes = {serde_int32, serde_str}; Strings field_names = {"int_field", "string_field"}; diff --git a/be/test/core/jsonb/serialize_test.cpp b/be/test/core/jsonb/serialize_test.cpp index 2419383b0eddb3..cc721cc618d044 100644 --- a/be/test/core/jsonb/serialize_test.cpp +++ b/be/test/core/jsonb/serialize_test.cpp @@ -35,6 +35,7 @@ #include "agent/be_exec_version_manager.h" #include "common/exception.h" +#include "core/assert_cast.h" #include "core/block/block.h" #include "core/block/column_with_type_and_name.h" #include "core/column/column.h" @@ -621,6 +622,45 @@ static void fill_block_with_array_string(Block& block) { block.insert(test_array_string); } +TEST(BlockSerializeCowTest, JsonbToBlockMutatesDestinationOwnerColumn) { + TabletSchema schema; + TabletColumn c1; + c1.set_name("k1"); + c1.set_unique_id(1); + c1.set_type(FieldType::OLAP_FIELD_TYPE_INT); + schema.append_column(c1); + + auto src_column = ColumnInt32::create(); + src_column->insert_value(10); + src_column->insert_value(20); + auto int_type = std::make_shared(); + Block src_block; + src_block.insert({std::move(src_column), int_type, "k1"}); + + auto jsonb_column = ColumnString::create(); + auto serdes = create_data_type_serdes(src_block.get_data_types()); + JsonbSerializeUtil::block_to_jsonb(schema, src_block, *jsonb_column, src_block.columns(), + serdes, {}); + + ColumnPtr shared_column = ColumnInt32::create(); + const auto* original_column = shared_column.get(); + Block dst_block; + dst_block.insert({shared_column, int_type, "k1"}); + + std::unordered_map col_uid_to_idx {{1, 0}}; + std::vector default_values(1); + THROW_IF_ERROR(JsonbSerializeUtil::jsonb_to_block(serdes, *jsonb_column, col_uid_to_idx, + dst_block, default_values, {})); + + EXPECT_NE(dst_block.get_by_position(0).column.get(), original_column); + EXPECT_EQ(shared_column->size(), 0); + EXPECT_EQ(dst_block.rows(), 2); + EXPECT_EQ(assert_cast(*dst_block.get_by_position(0).column).get_data()[0], + 10); + EXPECT_EQ(assert_cast(*dst_block.get_by_position(0).column).get_data()[1], + 20); +} + TEST(BlockSerializeTest, Array) { TabletSchema schema; TabletColumn c1; diff --git a/be/test/exec/column_type_convert_test.cpp b/be/test/exec/column_type_convert_test.cpp index 5178cddbd59d2f..f336a245568cbd 100644 --- a/be/test/exec/column_type_convert_test.cpp +++ b/be/test/exec/column_type_convert_test.cpp @@ -63,8 +63,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerWideningConversions) { src_data.push_back(std::numeric_limits::max()); src_data.push_back(std::numeric_limits::min()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -96,8 +95,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerWideningConversions) { src_data.push_back(std::numeric_limits::max()); src_data.push_back(std::numeric_limits::min()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -130,8 +128,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerNarrowingConversions) { src_data.push_back(std::numeric_limits::max()); src_data.push_back(std::numeric_limits::min()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -160,8 +157,7 @@ TEST_F(ColumnTypeConverterTest, TestIntegerNarrowingConversions) { src_data.push_back(std::numeric_limits::max() + 1); src_data.push_back(std::numeric_limits::min() - 1); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(!st.ok()); @@ -189,8 +185,7 @@ TEST_F(ColumnTypeConverterTest, TestFloatingPointConversions) { src_data.push_back((1L << 23) - 1); src_data.push_back(1L << 23); src_data.push_back((1L << 23) + 1); - auto dst_nullable_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_nullable_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -232,8 +227,7 @@ TEST_F(ColumnTypeConverterTest, TestFloatingPointConversions) { src_col->insert_data("invalid", 7); // Invalid string src_col->insert_data("", 0); // Empty string - auto dst_nullable_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_nullable_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -289,8 +283,7 @@ TEST_F(ColumnTypeConverterTest, TestFloatingPointConversions) { src_data.push_back(-std::numeric_limits::infinity()); src_data.push_back(std::numeric_limits::quiet_NaN()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -325,8 +318,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal32(12345)); // 123.45 src_data.push_back(Decimal32(-12345)); // -123.45 - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -354,8 +346,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal32(12345)); // 123.45 src_data.push_back(Decimal32(-67890)); // -678.90 - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -385,8 +376,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal64(12345678901234)); // Normal value: 1234567890.1234 src_data.push_back(Decimal64(-98765432109876)); // Negative value: -9876543210.9876 - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); // Perform conversion Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); @@ -419,8 +409,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal32(-12345)); // -123.45 src_data.push_back(Decimal32(23345)); // Too large 233.45 - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -458,8 +447,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal128V3(-102345)); src_data.push_back(Decimal128V3(203345)); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -499,8 +487,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal256(655363345)); src_data.push_back(Decimal256(3333333333332345)); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -539,8 +526,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-67890); // -678.90 after scaling src_data.push_back(0); // Zero check - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -569,8 +555,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-67890); src_data.push_back(0); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -598,8 +583,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-123); // -678.90 after scaling src_data.push_back(0); // Zero check - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -628,8 +612,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(-123); // -678.90 after scaling src_data.push_back(0); // Zero check - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); @@ -667,8 +650,7 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal64(-999999999)); // Edge case: negative max for Decimal32 src_data.push_back(Decimal64(-1000000000)); // Out of range (underflow) - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); @@ -698,9 +680,8 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(Decimal64(999999999)); // Edge case: max for Decimal32 src_data.push_back(Decimal64(-999999999)); // Edge case: negative max for Decimal32 ASSERT_EQ(3, src_data.size()); - auto dst_col = nullable_dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); + mutable_dst->resize(0); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -743,9 +724,8 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_data.push_back(std::numeric_limits::infinity()); // Infinity src_data.push_back(std::numeric_limits::quiet_NaN()); // NaN - auto dst_col = nullable_dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); + mutable_dst->resize(0); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -791,9 +771,8 @@ TEST_F(ColumnTypeConverterTest, TestDecimalConversions) { src_col->insert_data("0.0", 3); // Zero value src_col->insert_data("9999999999.99", 13); // Edge case: max valid value within precision - auto dst_col = nullable_dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); + mutable_dst->resize(0); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -837,8 +816,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(std::numeric_limits::min()); src_data.push_back(0); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -869,8 +847,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(std::numeric_limits::infinity()); src_data.push_back(std::numeric_limits::quiet_NaN()); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -901,8 +878,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_col->insert_data("not a number", 11); src_col->insert_data("2147483648", 10); // Greater than INT32_MAX - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); auto& null_map = nullable_col.get_null_map_data(); @@ -935,9 +911,8 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(Decimal32(-67890)); // -678.90 src_data.push_back(Decimal32(0)); // Zero - auto dst_col = dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); + mutable_dst->resize(0); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -972,9 +947,8 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { value.unchecked_set_time(2070, 1, 1, 0, 0, 0); src_data.push_back(*reinterpret_cast(&value)); // "2070-01-01" in days format - auto dst_col = dst_type->create_column(); - dst_col->resize(0); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); + mutable_dst->resize(0); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -1005,8 +979,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_data.push_back(1); // true src_data.push_back(0); // false - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -1041,8 +1014,7 @@ TEST_F(ColumnTypeConverterTest, TestStringConversions) { src_col->insert_data("1.5", 3); // Hive: null (not an integer) src_col->insert_data("", 0); // Hive: null - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast( @@ -1090,8 +1062,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("abc", 3); // Invalid - should be NULL src_col->insert_data("", 0); // Empty - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1137,8 +1108,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("-32769", 6); // Underflow - should be NULL src_col->insert_data("123.45", 6); // Decimal - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1182,8 +1152,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("1000000", 7); // Million src_col->insert_data("2147483648", 10); // Overflow - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1227,8 +1196,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("9223372036854775808", 19); // Overflow - should be NULL src_col->insert_data("123abc", 6); // Invalid - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1270,8 +1238,7 @@ TEST_F(ColumnTypeConverterTest, TestStringToIntegerTypes) { src_col->insert_data("0", 1); // Zero src_col->insert_data("123e45", 6); // Scientific notation - should be NULL - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& nested_col = static_cast(nullable_col.get_nested_column()); @@ -1460,8 +1427,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { // 2024-01-01 00:00:00.123456 auto src_col = make_datetimev2_col({{2024, 1, 1, 0, 0, 0, 123456}}); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); @@ -1484,8 +1450,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { // 1970-01-01 00:00:00.000000 // 3000-01-01 00:00:00.000000 auto src_col = make_datetimev2_col({{1970, 1, 1, 0, 0, 0, 0}, {3000, 1, 1, 0, 0, 0, 0}}); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); null_map.resize_fill(src_col->size(), 0); @@ -1512,8 +1477,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { // 3000-01-01 00:00:00.000000(会溢出int32) auto src_col = make_datetimev2_col({{3000, 1, 1, 0, 0, 0, 0}}); - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_FALSE(st.ok()); @@ -1545,8 +1509,7 @@ TEST_F(ColumnTypeConverterTest, TestDateTimeV2ToNumericConversions) { src_col->get_data().push_back(parse_datetimev2_str("2022-05-01 13:00:00")); src_col->get_data().push_back(parse_datetimev2_str("2022-05-01 14:00:00")); - auto dst_col = nullable_dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = nullable_dst_type->create_column(); auto& nullable_col = static_cast(*mutable_dst); auto& null_map = nullable_col.get_null_map_data(); null_map.resize_fill(src_col->size(), 0); @@ -1715,10 +1678,9 @@ TEST_F(ColumnTypeConverterTest, TestEmptyColumnConversions) { ASSERT_FALSE(converter->is_consistent()); auto src_col = ColumnInt32::create(); // Empty column (no data) - auto dst_col = dst_type->create_column(); - auto mutable_dst = dst_col->assume_mutable(); + auto mutable_dst = dst_type->create_column(); src_col->resize(0); - dst_col->resize(0); + mutable_dst->resize(0); // Perform conversion Status st = converter->convert(reinterpret_cast(src_col), mutable_dst); ASSERT_TRUE(st.ok()); diff --git a/be/test/exec/common/schema_util_rowset_test.cpp b/be/test/exec/common/schema_util_rowset_test.cpp index cf99c9824956c5..cdfb84431d395a 100644 --- a/be/test/exec/common/schema_util_rowset_test.cpp +++ b/be/test/exec/common/schema_util_rowset_test.cpp @@ -148,7 +148,7 @@ static void fill_varaint_column(auto& variant_column, int size, int uid) { } static void fill_block_with_test_data(Block* block, int size) { - auto columns = block->mutate_columns(); + auto columns = std::move(*block).mutate_columns(); // insert key for (int i = 0; i < size; i++) { auto field = Field::create_field(i); @@ -172,6 +172,7 @@ static void fill_block_with_test_data(Block* block, int size) { auto v4 = Field::create_field(i); columns[4]->insert(v4); } + block->set_columns(std::move(columns)); } static int64_t inc_id = 1000; static RowsetWriterContext rowset_writer_context(const std::unique_ptr& data_dir, diff --git a/be/test/exec/common/schema_util_test.cpp b/be/test/exec/common/schema_util_test.cpp index 273a7238fe8177..3599036fe2f76e 100644 --- a/be/test/exec/common/schema_util_test.cpp +++ b/be/test/exec/common/schema_util_test.cpp @@ -791,9 +791,7 @@ TEST_F(SchemaUtilTest, TestCastColumnEdgeCases) { auto variant_type = std::make_shared(10, false); auto nullable_array_type = make_nullable(std::make_shared(std::make_shared())); - auto array_column = - ColumnArray::create(ColumnInt32::create(), ColumnArray::ColumnOffsets::create()); - auto nullable_array_column = make_nullable(array_column->get_ptr()); + ColumnPtr nullable_array_column = nullable_array_type->create_column()->get_ptr(); ColumnWithTypeAndName array_col; array_col.type = nullable_array_type; @@ -817,7 +815,8 @@ TEST_F(SchemaUtilTest, TestCastColumnEdgeCases) { // Test casting from variant to variant auto variant_column = ColumnVariant::create(10, false); - variant_column->create_root(nullable_array_type, nullable_array_column->assume_mutable()); + // nullable_array_column is also stored in array_col.column (use_count=2), so mutate() clones it. + variant_column->create_root(nullable_array_type, IColumn::mutate(nullable_array_column)); ColumnWithTypeAndName variant_col; variant_col.type = variant_type; @@ -1947,14 +1946,14 @@ TEST_F(SchemaUtilTest, parse_and_materialize_variant_columns_ambiguous_paths) { // Prepare the variant column with the string column as root ColumnVariant::Subcolumns dynamic_subcolumns; dynamic_subcolumns.create_root( - ColumnVariant::Subcolumn(string_col->assume_mutable(), string_type, true)); + ColumnVariant::Subcolumn(std::move(string_col), string_type, true)); auto variant_col = ColumnVariant::create(0, false, std::move(dynamic_subcolumns)); auto variant_type = std::make_shared(); // Construct the block Block block; - block.insert(ColumnWithTypeAndName(variant_col->assume_mutable(), variant_type, "v")); + block.insert(ColumnWithTypeAndName(std::move(variant_col), variant_type, "v")); // The variant column is at index 0 std::vector variant_pos = {0}; diff --git a/be/test/exec/connector/vjdbc_connector_test.cpp b/be/test/exec/connector/vjdbc_connector_test.cpp index 16ff8689aafaf2..5ec3fb7046a5a9 100644 --- a/be/test/exec/connector/vjdbc_connector_test.cpp +++ b/be/test/exec/connector/vjdbc_connector_test.cpp @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include @@ -33,26 +34,55 @@ class JdbcUtilsTest : public ::testing::Test { void SetUp() override { // Save original config and environment original_jdbc_drivers_dir_ = config::jdbc_drivers_dir; - original_doris_home_ = getenv("DORIS_HOME"); + const char* original_doris_home = getenv("DORIS_HOME"); + if (original_doris_home != nullptr) { + original_doris_home_ = original_doris_home; + has_original_doris_home_ = true; + } // Set DORIS_HOME for testing - setenv("DORIS_HOME", "/tmp/test_doris", 1); + temp_home_ = std::filesystem::temp_directory_path() / + ("doris_jdbc_utils_test_" + std::to_string(::getpid())); + second_temp_home_ = std::filesystem::temp_directory_path() / + ("doris_jdbc_utils_test_second_" + std::to_string(::getpid())); + std::filesystem::remove_all(temp_home_); + std::filesystem::remove_all(second_temp_home_); + std::filesystem::create_directories(temp_home_); + setenv("DORIS_HOME", temp_home_.c_str(), 1); } void TearDown() override { // Restore original config and environment config::jdbc_drivers_dir = original_jdbc_drivers_dir_; - if (original_doris_home_) { - setenv("DORIS_HOME", original_doris_home_, 1); + if (has_original_doris_home_) { + setenv("DORIS_HOME", original_doris_home_.c_str(), 1); } else { unsetenv("DORIS_HOME"); } + std::filesystem::remove_all(temp_home_); + std::filesystem::remove_all(second_temp_home_); + } + + std::string default_driver_dir() const { + return (temp_home_ / "plugins" / "jdbc_drivers").string(); + } + + std::string old_driver_dir() const { return (temp_home_ / "jdbc_drivers").string(); } + + std::string second_default_driver_dir() const { + return (second_temp_home_ / "plugins" / "jdbc_drivers").string(); + } + + std::string second_old_driver_dir() const { + return (second_temp_home_ / "jdbc_drivers").string(); } -private: std::string original_jdbc_drivers_dir_; - const char* original_doris_home_ = nullptr; + std::string original_doris_home_; + bool has_original_doris_home_ = false; + std::filesystem::path temp_home_; + std::filesystem::path second_temp_home_; }; // Test resolve_driver_url with absolute URLs @@ -79,10 +109,10 @@ TEST_F(JdbcUtilsTest, TestResolveDriverUrlWithRelativeUrl) { std::string result_url; // Set config to default value to trigger the default directory logic - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); // Create the target directory and file for testing - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/mysql-connector.jar"; // Create directory and file @@ -104,10 +134,10 @@ TEST_F(JdbcUtilsTest, TestResolveDriverUrlWithRelativeUrl) { // Test resolve_driver_url with default directory TEST_F(JdbcUtilsTest, TestResolveWithDefaultConfig) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); // Create the target directory and file for testing - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/mysql-connector.jar"; std::filesystem::create_directories(dir); @@ -138,9 +168,9 @@ TEST_F(JdbcUtilsTest, TestResolveWithCustomConfig) { } TEST_F(JdbcUtilsTest, TestDefaultDirectoryFileExistsPath) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/existing-driver.jar"; std::filesystem::create_directories(dir); @@ -160,10 +190,10 @@ TEST_F(JdbcUtilsTest, TestDefaultDirectoryFileExistsPath) { } TEST_F(JdbcUtilsTest, TestFallbackToOldDirectory) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); // Create only the old directory and file (not the new one) - std::string old_dir = "/tmp/test_doris/jdbc_drivers"; + std::string old_dir = old_driver_dir(); std::string file_path = old_dir + "/fallback-driver.jar"; std::filesystem::create_directories(old_dir); @@ -183,10 +213,11 @@ TEST_F(JdbcUtilsTest, TestFallbackToOldDirectory) { } TEST_F(JdbcUtilsTest, TestPathConstruction) { - setenv("DORIS_HOME", "/tmp/test_doris2", 1); - config::jdbc_drivers_dir = "/tmp/test_doris2/plugins/jdbc_drivers"; + std::filesystem::create_directories(second_temp_home_); + setenv("DORIS_HOME", second_temp_home_.c_str(), 1); + config::jdbc_drivers_dir = second_default_driver_dir(); - std::string old_dir = "/tmp/test_doris2/jdbc_drivers"; + std::string old_dir = second_old_driver_dir(); std::string file_path = old_dir + "/test.jar"; std::filesystem::create_directories(old_dir); @@ -223,9 +254,9 @@ TEST_F(JdbcUtilsTest, TestEdgeCases) { } TEST_F(JdbcUtilsTest, TestMultipleCallsConsistency) { - config::jdbc_drivers_dir = "/tmp/test_doris/plugins/jdbc_drivers"; + config::jdbc_drivers_dir = default_driver_dir(); - std::string dir = "/tmp/test_doris/plugins/jdbc_drivers"; + std::string dir = default_driver_dir(); std::string file_path = dir + "/same-driver.jar"; std::filesystem::create_directories(dir); diff --git a/be/test/exec/exchange/exchange_writer_test.cpp b/be/test/exec/exchange/exchange_writer_test.cpp index 28481d08eb3e73..bbc81a623777ca 100644 --- a/be/test/exec/exchange/exchange_writer_test.cpp +++ b/be/test/exec/exchange/exchange_writer_test.cpp @@ -82,7 +82,7 @@ class RowExpandingPartitioner final : public PartitionerBase { _channel_ids.assign(block->rows(), 0); - auto mutable_columns = block->mutate_columns(); + auto mutable_columns = std::move(*block).mutate_columns(); for (size_t col_idx = 0; col_idx < mutable_columns.size(); ++col_idx) { mutable_columns[col_idx]->insert_from(*mutable_columns[col_idx], 0); } diff --git a/be/test/exec/operator/agg_operator_test.cpp b/be/test/exec/operator/agg_operator_test.cpp index 945fd0f9f1fc81..ae750013c84423 100644 --- a/be/test/exec/operator/agg_operator_test.cpp +++ b/be/test/exec/operator/agg_operator_test.cpp @@ -379,6 +379,96 @@ TEST_F(AggOperatorTestWithGroupBy, test_need_finalize) { } } +TEST_F(AggOperatorTestWithGroupBy, test_need_finalize_mem_reuse_with_shared_output_columns) { + OperatorContext ctx; + auto sink_op = std::make_shared(); + sink_op->_aggregate_evaluators.push_back(create_mock_agg_fn_evaluator( + ctx.pool, MockSlotRef::create_mock_contexts(1, std::make_shared()), + false, false)); + sink_op->_pool = &ctx.pool; + EXPECT_TRUE(sink_op->prepare(&ctx.state).ok()); + sink_op->_probe_expr_ctxs = + MockSlotRef::create_mock_contexts(0, std::make_shared()); + + auto source_op = std::make_shared(); + source_op->mock_row_descriptor.reset(new MockRowDescriptor { + {std::make_shared(), std::make_shared()}, &ctx.pool}); + source_op->_without_key = false; + source_op->_needs_finalize = true; + EXPECT_TRUE(source_op->prepare(&ctx.state).ok()); + + auto shared_state = init_sink_and_source(sink_op, source_op, ctx); + + { + Block block { + ColumnHelper::create_column_with_name({1, 1, 2, 2, 2, 3}), + ColumnHelper::create_column_with_name({1, 1, 100, 100, 100, 1000})}; + auto st = sink_op->sink(&ctx.state, &block, true); + EXPECT_TRUE(st.ok()) << st.msg(); + } + + Block block {ColumnHelper::create_column_with_name({}), + ColumnHelper::create_column_with_name({})}; + auto old_key_column = block.get_by_position(0).column; + auto old_value_column = block.get_by_position(1).column; + bool eos = false; + auto st = source_op->get_block(&ctx.state, &block, &eos); + ASSERT_TRUE(st.ok()) << st.to_string(); + + EXPECT_TRUE(eos); + EXPECT_EQ(old_key_column->size(), 0); + EXPECT_EQ(old_value_column->size(), 0); + EXPECT_TRUE(ColumnHelper::block_equal( + block, Block {ColumnHelper::create_column_with_name({1, 2, 3}), + ColumnHelper::create_column_with_name({2, 300, 1000})})); +} + +TEST_F(AggOperatorTestWithGroupBy, test_no_need_finalize_mem_reuse_with_shared_output_columns) { + OperatorContext ctx; + auto sink_op = std::make_shared(); + sink_op->_aggregate_evaluators.push_back(create_mock_agg_fn_evaluator( + ctx.pool, MockSlotRef::create_mock_contexts(1, std::make_shared()), + false, false)); + sink_op->_pool = &ctx.pool; + EXPECT_TRUE(sink_op->prepare(&ctx.state).ok()); + sink_op->_probe_expr_ctxs = + MockSlotRef::create_mock_contexts(0, std::make_shared()); + + auto source_op = std::make_shared(); + source_op->mock_row_descriptor.reset(new MockRowDescriptor { + {std::make_shared(), std::make_shared()}, &ctx.pool}); + source_op->_without_key = false; + source_op->_needs_finalize = false; + EXPECT_TRUE(source_op->prepare(&ctx.state).ok()); + + auto shared_state = init_sink_and_source(sink_op, source_op, ctx); + + { + Block block { + ColumnHelper::create_column_with_name({1, 1, 2, 2, 2, 3}), + ColumnHelper::create_column_with_name({1, 1, 100, 100, 100, 1000})}; + auto st = sink_op->sink(&ctx.state, &block, true); + EXPECT_TRUE(st.ok()) << st.msg(); + } + + const auto& aggregate_function = sink_op->_aggregate_evaluators[0]->function(); + auto serialized_type = aggregate_function->get_serialized_type(); + Block block {ColumnHelper::create_column_with_name({}), + ColumnWithTypeAndName(aggregate_function->create_serialize_column(), + serialized_type, "")}; + auto old_key_column = block.get_by_position(0).column; + auto old_value_column = block.get_by_position(1).column; + bool eos = false; + auto st = source_op->get_block(&ctx.state, &block, &eos); + ASSERT_TRUE(st.ok()) << st.to_string(); + + EXPECT_TRUE(eos); + EXPECT_EQ(block.rows(), 3); + EXPECT_EQ(old_key_column->size(), 0); + EXPECT_EQ(old_value_column->size(), 0); + EXPECT_TRUE(check_and_get_column(*block.get_by_position(1).column)); +} + TEST_F(AggOperatorTestWithGroupBy, test_2_phase) { /* group by key | sum(value) diff --git a/be/test/exec/operator/datagen_operator_test.cpp b/be/test/exec/operator/datagen_operator_test.cpp index 2130fe7b4358f0..84b59466f355b8 100644 --- a/be/test/exec/operator/datagen_operator_test.cpp +++ b/be/test/exec/operator/datagen_operator_test.cpp @@ -121,4 +121,37 @@ TEST(DataGenSourceOperatorTest, testConst) { ColumnHelper::create_column({5, 5, 5, 5, 5, 5, 5, 5, 5, 5}))); } +TEST(DataGenSourceOperatorTest, testMemReuseWithSharedOutputColumn) { + OperatorContext ctx; + + DataGenSourceOperatorX op; + + std::vector data_types {std::make_shared()}; + auto row_desc = std::make_unique(data_types, &ctx.pool); + op._tuple_id = 0; + op._tuple_desc = row_desc->tuple_desc_map[0]; + + TDataGenScanRange data_gen_scan_range; + data_gen_scan_range.numbers_params.useConst = false; + data_gen_scan_range.numbers_params.constValue = 0; + data_gen_scan_range.numbers_params.totalNumbers = 10; + + TScanRangeParams scan_range_param; + scan_range_param.scan_range.data_gen_scan_range = data_gen_scan_range; + + OperatorHelper::init_local_state(ctx, op, {scan_range_param}); + + Block block {ColumnHelper::create_column_with_name({})}; + auto old_output_column = block.get_by_position(0).column; + bool eos = false; + auto st = op.get_block(&ctx.state, &block, &eos); + ASSERT_TRUE(st.ok()) << st.to_string(); + + EXPECT_TRUE(eos); + EXPECT_EQ(old_output_column->size(), 0); + EXPECT_TRUE(ColumnHelper::column_equal( + block.get_by_position(0).column, + ColumnHelper::create_column({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}))); +} + } // namespace doris diff --git a/be/test/exec/operator/set_operator_test.cpp b/be/test/exec/operator/set_operator_test.cpp index 7e3bcfef30d3be..80f40d66e49a75 100644 --- a/be/test/exec/operator/set_operator_test.cpp +++ b/be/test/exec/operator/set_operator_test.cpp @@ -17,6 +17,7 @@ #include +#include #include #include @@ -384,6 +385,53 @@ TEST_F(ExceptOperatorTest, test_output_null_batsh_size) { } } +TEST_F(ExceptOperatorTest, test_mem_reuse_with_shared_output_column) { + state->_batch_size = 2; + init_op(2, {std::make_shared()}); + + sink_op->_child_exprs = + MockSlotRef::create_mock_contexts(DataTypes {std::make_shared()}); + probe_sink_ops[0]->_child_exprs = + MockSlotRef::create_mock_contexts(DataTypes {std::make_shared()}); + + init_local_state(); + + { + Block block = ColumnHelper::create_block({1, 2, 3}); + auto st = sink_op->sink(state.get(), &block, true); + EXPECT_TRUE(st.ok()) << st.to_string(); + } + + { + Block block = ColumnHelper::create_block({}); + auto st = probe_sink_ops[0]->sink(states[0].get(), &block, true); + EXPECT_TRUE(st.ok()) << st.to_string(); + } + + Block output {ColumnHelper::create_column_with_name({})}; + auto old_output_column = output.get_by_position(0).column; + + bool eos = false; + std::vector values; + while (!eos) { + auto st = source_op->get_block(state.get(), &output, &eos); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_GT(output.rows(), 0); + + const auto& column = output.get_by_position(0).column; + for (size_t i = 0; i < column->size(); ++i) { + values.push_back(column->get_int(i)); + } + if (!eos) { + output.clear_column_data(); + } + } + + EXPECT_EQ(old_output_column->size(), 0); + std::sort(values.begin(), values.end()); + EXPECT_EQ(values, std::vector({1, 2, 3})); +} + TEST_F(IntersectOperatorTest, test_sink_large_string_data_over_4g) { // Test that SetSinkOperatorX can handle string data exceeding 4GB total size. // This exercises the convert_column_if_overflow path in _process_build_block. @@ -688,4 +736,4 @@ TEST_F(ExceptOperatorTest, test_refresh_hash_table) { EXPECT_TRUE(block.empty()); } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/exec/operator/table_function_operator_test.cpp b/be/test/exec/operator/table_function_operator_test.cpp index 93bef8246ae95d..b6057ede0f5e9b 100644 --- a/be/test/exec/operator/table_function_operator_test.cpp +++ b/be/test/exec/operator/table_function_operator_test.cpp @@ -1478,7 +1478,7 @@ TEST_F(UnnestTest, inner) { unnested_tag_column->insert_data((const char*)(ids.data()), 0); expected_output_block.insert(ColumnWithTypeAndName( make_nullable(std::move(unnested_tag_column)), data_type_int_nullable, "tag")); - auto mutable_columns = expected_output_block.mutate_columns(); + auto mutable_columns = std::move(expected_output_block).mutate_columns(); mutable_columns[0]->insert_from( *table_func_local_state->_child_block->get_by_position(0).column, 0); mutable_columns[0]->insert_from( @@ -1587,7 +1587,7 @@ TEST_F(UnnestTest, outer) { unnested_tag_column->insert_data((const char*)(ids.data()), 0); expected_output_block.insert(ColumnWithTypeAndName( make_nullable(std::move(unnested_tag_column)), data_type_int_nullable, "tag")); - auto mutable_columns = expected_output_block.mutate_columns(); + auto mutable_columns = std::move(expected_output_block).mutate_columns(); mutable_columns[0]->insert_from( *table_func_local_state->_child_block->get_by_position(0).column, 0); mutable_columns[0]->insert_from( @@ -1613,7 +1613,7 @@ TEST_F(UnnestTest, outer) { output_block.clear(); expected_output_block.clear_column_data(); - mutable_columns = expected_output_block.mutate_columns(); + mutable_columns = std::move(expected_output_block).mutate_columns(); mutable_columns[0]->insert_from( *table_func_local_state->_child_block->get_by_position(0).column, 1); mutable_columns[1]->insert_default(); diff --git a/be/test/exec/pipeline/local_exchanger_test.cpp b/be/test/exec/pipeline/local_exchanger_test.cpp index 3051625a3ee530..af02a5802411fa 100644 --- a/be/test/exec/pipeline/local_exchanger_test.cpp +++ b/be/test/exec/pipeline/local_exchanger_test.cpp @@ -21,7 +21,9 @@ #include #include "common/status.h" +#include "core/assert_cast.h" #include "core/column/column.h" +#include "core/column/column_const.h" #include "core/column/column_vector.h" #include "core/data_type/data_type.h" #include "exec/exchange/local_exchange_sink_operator.h" @@ -1090,6 +1092,13 @@ TEST_F(LocalExchangerTest, AdaptivePassthroughExchanger) { _local_states[i].get()}), Status::OK()); EXPECT_EQ(block.rows(), j == 1 ? 0 : num_rows_per_block); + if (j == 0) { + const auto& data = + assert_cast(*block.get_by_position(0).column) + .get_data(); + EXPECT_EQ(data.front(), i); + EXPECT_EQ(data.back(), i); + } EXPECT_FALSE(eos); EXPECT_EQ(_local_states[i]->_dependency->ready(), j != 1); } @@ -1307,4 +1316,82 @@ TEST_F(LocalExchangerTest, TestShuffleExchangerWrongMap) { .is()); } } + +TEST_F(LocalExchangerTest, ShuffleExchangerRestoreOutputBlockOnAddRowsError) { + const int num_sink = 1; + const int num_sources = 1; + const int num_partitions = 1; + const int free_block_limit = 0; + std::map shuffle_idx_to_instance_idx {{0, 0}}; + + auto profile = std::make_shared(""); + auto shared_state = LocalExchangeSharedState::create_shared(num_partitions); + shared_state->exchanger = ShuffleExchanger::create_unique(num_sink, num_sources, num_partitions, + free_block_limit); + auto sink_dep = std::make_shared(0, 0, "LOCAL_EXCHANGE_SINK_DEPENDENCY", true); + sink_dep->set_shared_state(shared_state.get()); + shared_state->sink_deps.push_back(sink_dep); + shared_state->create_source_dependencies(num_sources, 0, 0, "TEST"); + + auto* exchanger = (ShuffleExchanger*)shared_state->exchanger.get(); + auto sink_local_state = std::make_unique(nullptr, nullptr); + sink_local_state->_exchanger = shared_state->exchanger.get(); + sink_local_state->_compute_hash_value_timer = ADD_TIMER(profile, "ComputeHashValueTime"); + sink_local_state->_distribute_timer = ADD_TIMER(profile, "DistributeTimer"); + sink_local_state->_partitioner = + std::make_unique>(num_partitions); + sink_local_state->_channel_id = 0; + sink_local_state->_shared_state = shared_state.get(); + sink_local_state->_dependency = sink_dep.get(); + sink_local_state->_memory_used_counter = + profile->AddHighWaterMarkCounter("SinkMemoryUsage", TUnit::BYTES, "", 1); + + auto source_local_state = + std::make_unique(_runtime_state.get(), nullptr); + source_local_state->_exchanger = shared_state->exchanger.get(); + source_local_state->_get_block_failed_counter = ADD_TIMER(profile, "GetBlockFailedCounter"); + source_local_state->_copy_data_timer = ADD_TIMER(profile, "CopyDataTimer"); + source_local_state->_channel_id = 0; + source_local_state->_shared_state = shared_state.get(); + source_local_state->_dependency = shared_state->get_dep_by_channel_id(0).front().get(); + source_local_state->_memory_used_counter = + profile->AddHighWaterMarkCounter("MemoryUsage", TUnit::BYTES, "", 1); + shared_state->mem_counters[0] = source_local_state->_memory_used_counter; + + DataTypePtr int_type = std::make_shared(); + Block in_block; + auto in_col = ColumnInt32::create(); + in_col->insert_many_vals(7, 2); + in_block.insert({std::move(in_col), int_type, "test_int_col0"}); + bool in_eos = false; + SinkInfo sink_info = {.channel_id = &sink_local_state->_channel_id, + .partitioner = sink_local_state->_partitioner.get(), + .local_state = sink_local_state.get(), + .shuffle_idx_to_instance_idx = &shuffle_idx_to_instance_idx, + .ins_idx = 0}; + EXPECT_EQ(exchanger->sink(_runtime_state.get(), &in_block, in_eos, + {sink_local_state->_compute_hash_value_timer, + sink_local_state->_distribute_timer, nullptr}, + sink_info), + Status::OK()); + + Block output_block; + auto const_value = ColumnInt32::create(); + const_value->insert_many_vals(42, 1); + output_block.insert( + {ColumnConst::create(const_value->get_ptr(), 1), int_type, "test_int_col0"}); + + bool eos = false; + const auto status = + exchanger->get_block(_runtime_state.get(), &output_block, &eos, + {nullptr, nullptr, source_local_state->_copy_data_timer}, + {source_local_state->_channel_id, source_local_state.get()}); + EXPECT_FALSE(status.ok()); + ASSERT_EQ(output_block.columns(), 1); + const auto& restored_column = output_block.get_by_position(0).column; + ASSERT_NE(restored_column.get(), nullptr); + EXPECT_TRUE(is_column(*restored_column)); + EXPECT_EQ(output_block.rows(), 1); + EXPECT_NO_THROW(output_block.check_number_of_rows()); +} } // namespace doris diff --git a/be/test/exprs/aggregate/agg_array_agg_test.cpp b/be/test/exprs/aggregate/agg_array_agg_test.cpp index 97b6d99456b02a..d65565a4c99b18 100644 --- a/be/test/exprs/aggregate/agg_array_agg_test.cpp +++ b/be/test/exprs/aggregate/agg_array_agg_test.cpp @@ -42,6 +42,7 @@ #include "exprs/aggregate/agg_function_test.h" #include "exprs/aggregate/aggregate_function.h" #include "exprs/aggregate/aggregate_function_simple_factory.h" +#include "exprs/aggregate/aggregate_function_sort.h" #include "gtest/gtest_pred_impl.h" namespace doris { @@ -122,4 +123,110 @@ TEST_F(AggregateFunctionArrayAggTest, test_array_agg_astr_nullable) { ColumnWithTypeAndName(std::move(array_column), array_data_type, "column")); } +TEST_F(AggregateFunctionArrayAggTest, test_array_agg_astr_foreach) { + auto data_type = make_nullable(std::make_shared()); + auto array_data_type = std::make_shared(data_type); + create_agg("array_agg_foreach", false, {array_data_type}, array_data_type); + + auto off_column = ColumnOffset64::create(); + auto data_column = data_type->create_column(); + std::vector offs = {0, 4}; + std::vector vals = {1, 2, 3}; + for (size_t i = 1; i < offs.size(); ++i) { + off_column->insert_data((const char*)(&offs[i]), 0); + } + data_column->insert_default(); + for (auto& v : vals) { + data_column->insert_data((const char*)(&v), sizeof(v)); + } + auto array_column = ColumnArray::create(data_column->clone(), off_column->clone()); + + auto off_column2 = ColumnOffset64::create(); + std::vector offs2 = {0, 1, 2, 3, 4}; + for (size_t i = 1; i < offs2.size(); ++i) { + off_column2->insert_data((const char*)(&offs2[i]), 0); + } + + auto array_array_data_type = std::make_shared(array_data_type); + auto array_array_off_column = ColumnOffset64::create(); + array_array_off_column->insert_value(4); + auto nested_array_column = ColumnArray::create(data_column->clone(), off_column2->clone()); + auto nested_array_size = nested_array_column->size(); + auto array_array_column = + ColumnArray::create(ColumnNullable::create(std::move(nested_array_column), + ColumnUInt8::create(nested_array_size, 0)), + array_array_off_column->clone()); + ASSERT_TRUE(array_array_data_type->check_column(*array_array_column).ok()); + + execute(Block({ColumnWithTypeAndName(array_column->clone(), array_data_type, "")}), + ColumnWithTypeAndName(std::move(array_array_column), array_array_data_type, "column")); +} + +TEST_F(AggregateFunctionArrayAggTest, test_array_agg_aint64_foreach) { + auto data_type = make_nullable(std::make_shared()); + auto array_data_type = std::make_shared(data_type); + create_agg("array_agg_foreach", false, {array_data_type}, array_data_type); + + auto off_column = ColumnOffset64::create(); + auto data_column = data_type->create_column(); + std::vector offs = {0, 4}; + std::vector vals = {1, 2, 3}; + for (size_t i = 1; i < offs.size(); ++i) { + off_column->insert_data((const char*)(&offs[i]), 0); + } + data_column->insert_default(); + for (auto& v : vals) { + data_column->insert_data((const char*)(&v), sizeof(v)); + } + auto array_column = ColumnArray::create(data_column->clone(), off_column->clone()); + + auto off_column2 = ColumnOffset64::create(); + std::vector offs2 = {0, 1, 2, 3, 4}; + for (size_t i = 1; i < offs2.size(); ++i) { + off_column2->insert_data((const char*)(&offs2[i]), 0); + } + + auto array_array_data_type = std::make_shared(array_data_type); + auto array_array_off_column = ColumnOffset64::create(); + array_array_off_column->insert_value(4); + auto nested_array_column = ColumnArray::create(data_column->clone(), off_column2->clone()); + auto nested_array_size = nested_array_column->size(); + auto array_array_column = + ColumnArray::create(ColumnNullable::create(std::move(nested_array_column), + ColumnUInt8::create(nested_array_size, 0)), + array_array_off_column->clone()); + ASSERT_TRUE(array_array_data_type->check_column(*array_array_column).ok()); + + execute(Block({ColumnWithTypeAndName(array_column->clone(), array_data_type, "")}), + ColumnWithTypeAndName(std::move(array_array_column), array_array_data_type, "column")); +} + +TEST(AggregateFunctionSortDataTest, merge_does_not_share_rhs_block) { + auto data_type = std::make_shared(); + Block prototype({ColumnWithTypeAndName(data_type->create_column(), data_type, "value"), + ColumnWithTypeAndName(data_type->create_column(), data_type, "sort_key")}); + SortDescription sort_desc {SortColumnDescription(1, 1, 1)}; + + AggregateFunctionSortData lhs(sort_desc, prototype); + AggregateFunctionSortData rhs1(sort_desc, prototype); + AggregateFunctionSortData rhs2(sort_desc, prototype); + + auto values = ColumnInt64::create(); + values->insert_value(10); + values->insert_value(20); + auto sort_keys = ColumnInt64::create(); + sort_keys->insert_value(2); + sort_keys->insert_value(1); + const IColumn* row0[] = {values.get(), sort_keys.get()}; + const IColumn* row1[] = {values.get(), sort_keys.get()}; + + rhs1.add(row0, 2, 0); + rhs2.add(row1, 2, 1); + + lhs.merge(rhs1); + ASSERT_NO_THROW(lhs.merge(rhs2)); + ASSERT_EQ(lhs.block.rows(), 2); + ASSERT_EQ(rhs1.block.rows(), 1); +} + } // namespace doris diff --git a/be/test/exprs/aggregate/vec_count_by_enum_test.cpp b/be/test/exprs/aggregate/vec_count_by_enum_test.cpp index b60b7ab7045da6..cbb9a5888d6e3f 100644 --- a/be/test/exprs/aggregate/vec_count_by_enum_test.cpp +++ b/be/test/exprs/aggregate/vec_count_by_enum_test.cpp @@ -32,6 +32,22 @@ namespace doris { void register_aggregate_function_count_by_enum(AggregateFunctionSimpleFactory& factory); +static ColumnPtr create_nullable_gender_column() { + auto column_f1 = ColumnString::create(); + column_f1->insert(Field::create_field("F")); + column_f1->insert(Field::create_field("F")); + column_f1->insert(Field::create_field("M")); + column_f1->insert_default(); + column_f1->insert_default(); + + auto null_map = ColumnUInt8::create(); + std::vector offs = {0, 0, 0, 1, 1}; + for (int i = 0; i < offs.size(); ++i) { + null_map->insert(Field::create_field(offs[i])); + } + return ColumnNullable::create(std::move(column_f1), std::move(null_map)); +} + class VCountByEnumTest : public testing::Test { public: AggregateFunctionPtr agg_function; @@ -129,18 +145,7 @@ TEST_F(VCountByEnumTest, testNotNullableSample) { TEST_F(VCountByEnumTest, testNullableSample) { Arena arena; const int batch_size = 5; - auto column_f1 = ColumnString::create(); - column_f1->insert(Field::create_field("F")); - column_f1->insert(Field::create_field("F")); - column_f1->insert(Field::create_field("M")); - ColumnPtr column_f1_ptr = std::move(column_f1); - auto null_map = ColumnUInt8::create(); - std::vector offs = {0, 0, 0, 1, 1}; - for (int i = 0; i < offs.size(); ++i) { - null_map->insert(Field::create_field(offs[i])); - } - - auto nullable_column_f1 = ColumnNullable::create(column_f1_ptr, std::move(null_map)); + auto nullable_column_f1 = create_nullable_gender_column(); std::unique_ptr memory(new char[agg_function->size_of_data()]); AggregateDataPtr place = memory.get(); @@ -176,18 +181,7 @@ TEST_F(VCountByEnumTest, testNullableSample) { TEST_F(VCountByEnumTest, testNoMerge) { Arena arena; const int batch_size = 5; - auto column_f1 = ColumnString::create(); - column_f1->insert(Field::create_field("F")); - column_f1->insert(Field::create_field("F")); - column_f1->insert(Field::create_field("M")); - ColumnPtr column_f1_ptr = std::move(column_f1); - auto null_map = ColumnUInt8::create(); - std::vector offs = {0, 0, 0, 1, 1}; - for (int i = 0; i < offs.size(); ++i) { - null_map->insert(Field::create_field(offs[i])); - } - - auto nullable_column_f1 = ColumnNullable::create(column_f1_ptr, std::move(null_map)); + auto nullable_column_f1 = create_nullable_gender_column(); std::unique_ptr memory(new char[agg_function->size_of_data()]); AggregateDataPtr place = memory.get(); @@ -216,17 +210,7 @@ TEST_F(VCountByEnumTest, testNoMerge) { TEST_F(VCountByEnumTest, testSerialize) { Arena arena; const int batch_size = 5; - auto column_f1 = ColumnString::create(); - column_f1->insert(Field::create_field("F")); - column_f1->insert(Field::create_field("F")); - column_f1->insert(Field::create_field("M")); - ColumnPtr column_f1_ptr = std::move(column_f1); - auto null_map = ColumnUInt8::create(); - std::vector offs = {0, 0, 0, 1, 1}; - for (int i = 0; i < offs.size(); ++i) { - null_map->insert(Field::create_field(offs[i])); - } - auto nullable_column_f1 = ColumnNullable::create(column_f1_ptr, std::move(null_map)); + auto nullable_column_f1 = create_nullable_gender_column(); std::unique_ptr memory(new char[agg_function->size_of_data()]); AggregateDataPtr place = memory.get(); @@ -262,17 +246,7 @@ TEST_F(VCountByEnumTest, testSerialize) { EXPECT_EQ(item0["null"].GetInt(), 2); EXPECT_EQ(item0["all"].GetInt(), 5); - auto column_f1_2 = ColumnString::create(); - column_f1_2->insert(Field::create_field("F")); - column_f1_2->insert(Field::create_field("F")); - column_f1_2->insert(Field::create_field("M")); - ColumnPtr column_f1_2_ptr = std::move(column_f1_2); - auto null_map_2 = ColumnUInt8::create(); - std::vector offs_2 = {0, 0, 0, 1, 1}; - for (int i = 0; i < offs.size(); ++i) { - null_map_2->insert(Field::create_field(offs_2[i])); - } - auto nullable_column_f1_2 = ColumnNullable::create(column_f1_2_ptr, std::move(null_map_2)); + auto nullable_column_f1_2 = create_nullable_gender_column(); std::unique_ptr memory3(new char[agg_function->size_of_data()]); AggregateDataPtr place3 = memory3.get(); diff --git a/be/test/exprs/aggregate/vec_retention_test.cpp b/be/test/exprs/aggregate/vec_retention_test.cpp index ea22645fb327a5..21966fb7f986f8 100644 --- a/be/test/exprs/aggregate/vec_retention_test.cpp +++ b/be/test/exprs/aggregate/vec_retention_test.cpp @@ -25,6 +25,7 @@ #include "common/logging.h" #include "core/assert_cast.h" #include "core/column/column_array.h" +#include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_vector.h" #include "core/data_type/data_type.h" @@ -43,6 +44,18 @@ namespace doris { void register_aggregate_function_retention(AggregateFunctionSimpleFactory& factory); +namespace { +ColumnArray& retention_result_array(IColumn& column) { + return assert_cast(column); +} + +ColumnUInt8::Container& retention_result_data(IColumn& column) { + auto& array = retention_result_array(column); + auto& nested = assert_cast(array.get_data()); + return assert_cast(nested.get_nested_column()).get_data(); +} +} // namespace + class VRetentionTest : public testing::Test { public: AggregateFunctionPtr agg_function; @@ -83,25 +96,23 @@ TEST_F(VRetentionTest, testEmpty) { agg_function->create(place2); agg_function->merge(place, place2, arena); - auto column_result = - ColumnArray::create(((DataTypePtr)std::make_shared())->create_column()); + auto column_result = agg_function->get_return_type()->create_column(); agg_function->insert_result_into(place, *column_result); - auto& result = assert_cast(column_result->get_data()).get_data(); + auto& result = retention_result_data(*column_result); for (int i = 0; i < result.size(); i++) { EXPECT_EQ(result[i], 0); } - auto column_result2 = - ColumnArray::create(((DataTypePtr)std::make_shared())->create_column()); + auto column_result2 = agg_function->get_return_type()->create_column(); agg_function->insert_result_into(place2, *column_result2); - auto& result2 = assert_cast(column_result2->get_data()).get_data(); + auto& result2 = retention_result_data(*column_result2); for (int i = 0; i < result2.size(); i++) { EXPECT_EQ(result2[i], 0); } - EXPECT_EQ(column_result2->get_offsets()[-1], 0); - EXPECT_EQ(column_result2->get_offsets()[0], 3); - EXPECT_EQ(column_result2->get_offsets().size(), 1); + EXPECT_EQ(retention_result_array(*column_result2).get_offsets()[-1], 0); + EXPECT_EQ(retention_result_array(*column_result2).get_offsets()[0], 3); + EXPECT_EQ(retention_result_array(*column_result2).get_offsets().size(), 1); agg_function->destroy(place); agg_function->destroy(place2); } @@ -141,17 +152,16 @@ TEST_F(VRetentionTest, testSample) { agg_function->merge(place2, place, arena); - auto column_result2 = - ColumnArray::create(((DataTypePtr)std::make_shared())->create_column()); + auto column_result2 = agg_function->get_return_type()->create_column(); agg_function->insert_result_into(place2, *column_result2); - auto& result2 = assert_cast(column_result2->get_data()).get_data(); + auto& result2 = retention_result_data(*column_result2); for (int i = 0; i < result2.size(); i++) { EXPECT_EQ(result2[i], 1); } - EXPECT_EQ(column_result2->get_offsets()[-1], 0); - EXPECT_EQ(column_result2->get_offsets()[0], 3); - EXPECT_EQ(column_result2->get_offsets().size(), 1); + EXPECT_EQ(retention_result_array(*column_result2).get_offsets()[-1], 0); + EXPECT_EQ(retention_result_array(*column_result2).get_offsets()[0], 3); + EXPECT_EQ(retention_result_array(*column_result2).get_offsets().size(), 1); agg_function->destroy(place2); } @@ -184,16 +194,15 @@ TEST_F(VRetentionTest, testNoMerge) { agg_function->add(place, column, i, arena); } - auto column_result = - ColumnArray::create(((DataTypePtr)std::make_shared())->create_column()); + auto column_result = agg_function->get_return_type()->create_column(); agg_function->insert_result_into(place, *column_result); - auto& result = assert_cast(column_result->get_data()).get_data(); + auto& result = retention_result_data(*column_result); for (int i = 0; i < result.size(); i++) { EXPECT_EQ(result[i], 1); } - EXPECT_EQ(column_result->get_offsets()[-1], 0); - EXPECT_EQ(column_result->get_offsets()[0], 3); - EXPECT_EQ(column_result->get_offsets().size(), 1); + EXPECT_EQ(retention_result_array(*column_result).get_offsets()[-1], 0); + EXPECT_EQ(retention_result_array(*column_result).get_offsets()[0], 3); + EXPECT_EQ(retention_result_array(*column_result).get_offsets().size(), 1); agg_function->destroy(place); } @@ -233,10 +242,9 @@ TEST_F(VRetentionTest, testSerialize) { VectorBufferReader buf_reader(buf.get_data_at(0)); agg_function->deserialize(place2, buf_reader, arena); - auto column_result = - ColumnArray::create(((DataTypePtr)std::make_shared())->create_column()); + auto column_result = agg_function->get_return_type()->create_column(); agg_function->insert_result_into(place2, *column_result); - auto& result = assert_cast(column_result->get_data()).get_data(); + auto& result = retention_result_data(*column_result); for (int i = 0; i < result.size(); i++) { if (i == 0) { EXPECT_EQ(result[i], 1); @@ -267,10 +275,9 @@ TEST_F(VRetentionTest, testSerialize) { agg_function->merge(place2, place3, arena); - auto column_result2 = - ColumnArray::create(((DataTypePtr)std::make_shared())->create_column()); + auto column_result2 = agg_function->get_return_type()->create_column(); agg_function->insert_result_into(place2, *column_result2); - auto& result2 = assert_cast(column_result2->get_data()).get_data(); + auto& result2 = retention_result_data(*column_result2); for (int i = 0; i < result2.size(); i++) { if (i == result2.size() - 1) { EXPECT_EQ(result2[i], 0); @@ -279,9 +286,9 @@ TEST_F(VRetentionTest, testSerialize) { } } - EXPECT_EQ(column_result2->get_offsets()[-1], 0); - EXPECT_EQ(column_result2->get_offsets()[0], 3); - EXPECT_EQ(column_result2->get_offsets().size(), 1); + EXPECT_EQ(retention_result_array(*column_result2).get_offsets()[-1], 0); + EXPECT_EQ(retention_result_array(*column_result2).get_offsets()[0], 3); + EXPECT_EQ(retention_result_array(*column_result2).get_offsets().size(), 1); agg_function->destroy(place2); agg_function->destroy(place3); diff --git a/be/test/exprs/function/cast/function_variant_cast_test.cpp b/be/test/exprs/function/cast/function_variant_cast_test.cpp index 8f710188b40290..960637bf1507d0 100644 --- a/be/test/exprs/function/cast/function_variant_cast_test.cpp +++ b/be/test/exprs/function/cast/function_variant_cast_test.cpp @@ -20,6 +20,7 @@ #include "common/status.h" #include "core/column/column_array.h" #include "core/column/column_decimal.h" +#include "core/column/column_nullable.h" #include "core/column/column_variant.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_decimal.h" @@ -132,15 +133,10 @@ TEST(FunctionVariantCast, CastToVariant) { { auto array_type = std::make_shared(std::make_shared()); auto variant_type = std::make_shared(); - auto array_col = - ColumnArray::create(ColumnInt32::create(), ColumnArray::ColumnOffsets::create()); - auto& data = assert_cast(array_col->get_data()); - auto& offsets = array_col->get_offsets(); - - data.insert(Field::create_field(1)); - data.insert(Field::create_field(2)); - data.insert(Field::create_field(3)); - offsets.push_back(3); + auto array_col = array_type->create_column(); + array_col->insert(Field::create_field( + Array {Field::create_field(1), Field::create_field(2), + Field::create_field(3)})); ColumnsWithTypeAndName arguments {{array_col->get_ptr(), array_type, "array_col"}, {nullptr, variant_type, "variant_type"}}; @@ -248,9 +244,7 @@ TEST(FunctionVariantCast, CastFromVariant) { auto variant_col = ColumnVariant::create(0, false); // Create a variant column with array values - variant_col->create_root( - array_type, - ColumnArray::create(ColumnInt32::create(), ColumnArray::ColumnOffsets::create())); + variant_col->create_root(array_type, array_type->create_column()); MutableColumnPtr data = variant_col->get_root(); Field a = Field::create_field(Array {Field::create_field(1), @@ -279,11 +273,14 @@ TEST(FunctionVariantCast, CastFromVariant) { const auto* array_result = assert_cast(remove_nullable(result_col).get()); ASSERT_EQ(array_result->size(), 1); - const auto& result_data = assert_cast(array_result->get_data()); + const auto& result_nullable = assert_cast(array_result->get_data()); + const auto& result_data = + assert_cast(result_nullable.get_nested_column()); ASSERT_EQ(result_data.size(), 3); ASSERT_EQ(result_data.get_element(0), 1); ASSERT_EQ(result_data.get_element(1), 2); ASSERT_EQ(result_data.get_element(2), 3); + ASSERT_FALSE(result_nullable.has_null()); } } @@ -426,8 +423,7 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) { auto result_col = block.get_by_position(result_column).column; ASSERT_NE(result_col.get(), nullptr); const auto* string_result = assert_cast(result_col.get()); - // just call ConvertImplGenericToString which will insert all source column data to ColumnString - ASSERT_EQ(string_result->size(), variant_col->size()); + ASSERT_EQ(string_result->size(), 1); ASSERT_EQ(string_result->get_data_at(0).to_string(), "{\"v\":{\"a\":20,\"b\":\"20\",\"c\":20,\"e\":\"50\",\"f\":20}}"); } @@ -436,7 +432,49 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) { { auto variant_col = construct_basic_varint_column(); variant_col->finalize(); - auto nullable_variant_col = make_nullable(variant_col->get_ptr()); + const auto rows = variant_col->size(); + auto nullable_variant_col = + ColumnNullable::create(std::move(variant_col), ColumnUInt8::create(rows, 0)); + + auto nullable_string_type = make_nullable(std::make_shared()); + auto variant_type = std::make_shared(); + auto nullable_variant_type = make_nullable(variant_type); + + ColumnsWithTypeAndName arguments { + {nullable_variant_col->get_ptr(), nullable_variant_type, "variant_col"}, + {nullptr, nullable_string_type, "nullable_string_type"}}; + + auto function = SimpleFunctionFactory::instance().get_function("CAST", arguments, + nullable_string_type); + ASSERT_NE(function, nullptr); + + Block block {arguments}; + size_t result_column = block.columns(); + block.insert({nullptr, nullable_string_type, "result"}); + RuntimeState state; + auto ctx = FunctionContext::create_context(&state, {}, {}); + ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 1).ok()); + + auto result_col = block.get_by_position(result_column).column; + ASSERT_NE(result_col.get(), nullptr); + const auto* nullable_result = assert_cast(result_col.get()); + ASSERT_EQ(nullable_result->size(), 1); + ASSERT_FALSE(nullable_result->is_null_at(0)); + const auto* string_result = + assert_cast(&nullable_result->get_nested_column()); + ASSERT_EQ(string_result->get_data_at(0).to_string(), + "{\"v\":{\"a\":20,\"b\":\"20\",\"c\":20,\"e\":\"50\",\"f\":20}}"); + } + + // Test case 5: nullable source null-map is preserved after the nested string cast is limited + // to input_rows_count. + { + auto variant_col = construct_basic_varint_column(); + variant_col->finalize(); + auto null_map = ColumnUInt8::create(variant_col->size(), 0); + null_map->get_data()[0] = 1; + auto nullable_variant_col = + ColumnNullable::create(std::move(variant_col), std::move(null_map)); auto nullable_string_type = make_nullable(std::make_shared()); auto variant_type = std::make_shared(); @@ -461,7 +499,7 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) { ASSERT_NE(result_col.get(), nullptr); const auto* nullable_result = assert_cast(result_col.get()); ASSERT_EQ(nullable_result->size(), 1); - ASSERT_TRUE(nullable_result->is_null_at(1)); + ASSERT_TRUE(nullable_result->is_null_at(0)); } } @@ -611,4 +649,4 @@ TEST(FunctionVariantCast, CastFromVariantStrictModeRegression) { } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/exprs/function/function_is_null_test.cpp b/be/test/exprs/function/function_is_null_test.cpp index d637175f05272a..e6420aedce9124 100644 --- a/be/test/exprs/function/function_is_null_test.cpp +++ b/be/test/exprs/function/function_is_null_test.cpp @@ -158,7 +158,7 @@ TEST_F(FunctionIsNullTest, gc_binlogs_test) { const auto& rowset_writer = res.value(); Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); Field key = Field::create_field(10); Field v1 = Field::create_field("v1"); @@ -323,7 +323,7 @@ TEST_F(FunctionIsNullTest, evaluate_inverted_index_corner_cases) { const auto& rowset_writer = res.value(); Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Create block with NO null values to test the scenario where // iterator might not have null bitmap or it's nullptr diff --git a/be/test/exprs/function/function_variant_element_test.cpp b/be/test/exprs/function/function_variant_element_test.cpp index c16e2844ad9f44..98265fda3fa76e 100644 --- a/be/test/exprs/function/function_variant_element_test.cpp +++ b/be/test/exprs/function/function_variant_element_test.cpp @@ -40,7 +40,7 @@ TEST(function_variant_element_test, extract_from_sparse_column) { sparse_column_offsets.push_back(sparse_column_keys->size()); variant_ptr->get_subcolumn({})->insert_default(); variant_ptr->set_num_rows(1); - variant_ptr->get_doc_value_column()->assume_mutable()->resize(1); + variant_ptr->get_doc_value_column_mutable().resize(1); ColumnPtr result; ColumnPtr index_column_ptr = ColumnString::create(); @@ -61,4 +61,4 @@ TEST(function_variant_element_test, extract_from_sparse_column) { EXPECT_EQ(result_string, "{\"age\":\"John\",\"name\":\"John\"}"); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/format/json/json_reader_test.cpp b/be/test/format/json/json_reader_test.cpp index 920d3ea0f9f041..c04785f6fcd2b7 100644 --- a/be/test/format/json/json_reader_test.cpp +++ b/be/test/format/json/json_reader_test.cpp @@ -19,6 +19,12 @@ #include +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" #include "format/json/new_json_reader.h" namespace doris { @@ -87,4 +93,79 @@ TEST(NewJsonReaderSetBatchSizeTest, SetBatchSizeViaGenericInterface) { EXPECT_EQ(base_reader->get_batch_size(), 4096U); } +TEST(NewJsonReaderCowTest, AppendNullForMalformedJsonMutatesOwnerColumn) { + auto nested_column = ColumnInt32::create(); + nested_column->insert_value(7); + auto null_map = ColumnUInt8::create(); + null_map->insert_value(0); + ColumnPtr shared_column = ColumnNullable::create(std::move(nested_column), std::move(null_map)); + const auto* original_column = shared_column.get(); + + Block block; + block.insert({shared_column, make_nullable(std::make_shared()), "c0"}); + + ASSERT_TRUE(json_reader_detail::append_null_for_malformed_json(block).ok()); + ASSERT_EQ(block.rows(), 2); + EXPECT_NE(block.get_by_position(0).column.get(), original_column); + + const auto& result_column = + assert_cast(*block.get_by_position(0).column); + EXPECT_FALSE(result_column.is_null_at(0)); + EXPECT_TRUE(result_column.is_null_at(1)); + + const auto& original_nullable = assert_cast(*shared_column); + EXPECT_EQ(original_nullable.size(), 1); + EXPECT_FALSE(original_nullable.is_null_at(0)); +} + +TEST(NewJsonReaderCowTest, TruncateBlockToRowsMutatesOwnerColumn) { + auto nested_column = ColumnInt32::create(); + nested_column->insert_value(7); + nested_column->insert_value(8); + auto null_map = ColumnUInt8::create(); + null_map->insert_value(0); + null_map->insert_value(0); + ColumnPtr shared_column = ColumnNullable::create(std::move(nested_column), std::move(null_map)); + const auto* original_column = shared_column.get(); + + Block block; + block.insert({shared_column, make_nullable(std::make_shared()), "c0"}); + + json_reader_detail::truncate_block_to_rows(block, 1); + ASSERT_EQ(block.rows(), 1); + EXPECT_NE(block.get_by_position(0).column.get(), original_column); + + const auto& result_column = + assert_cast(*block.get_by_position(0).column); + EXPECT_EQ(result_column.size(), 1); + EXPECT_FALSE(result_column.is_null_at(0)); + + const auto& original_nullable = assert_cast(*shared_column); + EXPECT_EQ(original_nullable.size(), 2); +} + +TEST(NewJsonReaderCowTest, PopBackLastInsertedValueMutatesOwnerColumn) { + auto column = ColumnInt32::create(); + column->insert_value(7); + column->insert_value(8); + ColumnPtr shared_column = std::move(column); + const auto* original_column = shared_column.get(); + + Block block; + block.insert({shared_column, std::make_shared(), "c0"}); + + json_reader_detail::pop_back_last_inserted_value(block, 0); + ASSERT_EQ(block.rows(), 1); + EXPECT_NE(block.get_by_position(0).column.get(), original_column); + + const auto& result_column = assert_cast(*block.get_by_position(0).column); + EXPECT_EQ(result_column.size(), 1); + EXPECT_EQ(result_column.get_data()[0], 7); + + const auto& original_int_column = assert_cast(*shared_column); + EXPECT_EQ(original_int_column.size(), 2); + EXPECT_EQ(original_int_column.get_data()[0], 7); + EXPECT_EQ(original_int_column.get_data()[1], 8); +} + } // namespace doris diff --git a/be/test/format/native/native_reader_writer_test.cpp b/be/test/format/native/native_reader_writer_test.cpp index 5d1d7dc207cef7..0f38721558217b 100644 --- a/be/test/format/native/native_reader_writer_test.cpp +++ b/be/test/format/native/native_reader_writer_test.cpp @@ -527,7 +527,8 @@ TEST_F(NativeReaderWriterTest, round_trip_native_file_large_rows) { total_read_rows = read_rows; first_block = false; } else { - MutableBlock merged_mutable(&merged_block); + ScopedMutableBlock scoped_merged_mutable(&merged_block); + auto& merged_mutable = scoped_merged_mutable.mutable_block(); Status add_st = merged_mutable.add_rows(&dst_block, 0, read_rows); ASSERT_TRUE(add_st.ok()) << add_st; total_read_rows += read_rows; diff --git a/be/test/format/orc/orc_reader_fill_data_test.cpp b/be/test/format/orc/orc_reader_fill_data_test.cpp index eab2b97e38a3bd..84ba8af9da3382 100644 --- a/be/test/format/orc/orc_reader_fill_data_test.cpp +++ b/be/test/format/orc/orc_reader_fill_data_test.cpp @@ -19,6 +19,7 @@ #include +#include "core/assert_cast.h" #include "core/column/column_array.h" #include "core/column/column_struct.h" #include "core/data_type/data_type_array.h" @@ -124,6 +125,43 @@ TEST_F(OrcReaderFillDataTest, TestFillLongColumnWithNull) { } } +TEST_F(OrcReaderFillDataTest, SchemaChangeNullableNullMapUsesAppendedSlice) { + std::vector values = {10, 20, 30}; + std::vector nulls = {true, false, true}; + auto batch = create_long_batch(values.size(), values, nulls); + auto orc_type_ptr = createPrimitiveType(orc::TypeKind::LONG); + + auto nested_column = ColumnFloat64::create(); + nested_column->insert_value(1); + nested_column->insert_value(2); + auto null_map_column = ColumnUInt8::create(); + null_map_column->insert_value(0); + null_map_column->insert_value(0); + ColumnPtr doris_column = + ColumnNullable::create(std::move(nested_column), std::move(null_map_column)); + auto data_type = make_nullable(std::make_shared()); + + TFileScanRangeParams params; + TFileRangeDesc range; + auto reader = OrcReader::create_unique(params, range, 4064, "", nullptr, nullptr, true); + + Status status = reader->_orc_column_to_doris_column( + "test_schema_change_nullable", doris_column, data_type, const_node, orc_type_ptr.get(), + batch.get(), values.size()); + + ASSERT_TRUE(status.ok()) << status.to_string(); + const auto* nullable_column = assert_cast(doris_column.get()); + ASSERT_EQ(nullable_column->size(), 5); + + const auto& null_map = nullable_column->get_null_map_data(); + ASSERT_EQ(null_map.size(), 5); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 0); + EXPECT_EQ(null_map[2], 1); + EXPECT_EQ(null_map[3], 0); + EXPECT_EQ(null_map[4], 1); +} + TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) { // Array类型测试 { @@ -478,4 +516,4 @@ TEST_F(OrcReaderFillDataTest, ComplexTypeConversionTest) { "+-------------------+\n"); } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/format/parquet/byte_stream_split_decoder_test.cpp b/be/test/format/parquet/byte_stream_split_decoder_test.cpp index 379918d479b6bf..b332a13449a99d 100644 --- a/be/test/format/parquet/byte_stream_split_decoder_test.cpp +++ b/be/test/format/parquet/byte_stream_split_decoder_test.cpp @@ -19,7 +19,9 @@ #include +#include "core/column/column_fixed_length_object.h" #include "core/column/column_vector.h" +#include "core/data_type/data_type_fixed_length_object.h" #include "core/data_type/data_type_number.h" #include "util/slice.h" @@ -32,6 +34,24 @@ class ByteStreamSplitDecoderTest : public ::testing::Test { ByteStreamSplitDecoder _decoder; }; +static std::vector encode_byte_stream_split_fixed_length( + const std::vector& values, size_t type_length) { + std::vector encoded(values.size() * type_length); + for (size_t value_index = 0; value_index < values.size(); ++value_index) { + DCHECK_EQ(values[value_index].size(), type_length); + for (size_t byte_index = 0; byte_index < type_length; ++byte_index) { + encoded[byte_index * values.size() + value_index] = + static_cast(values[value_index][byte_index]); + } + } + return encoded; +} + +static std::string fixed_length_value(const ColumnFixedLengthObject& column, size_t row) { + const auto value = column.get_data_at(row); + return {value.data, value.size}; +} + //// Test basic decoding functionality for FLOAT type TEST_F(ByteStreamSplitDecoderTest, test_basic_decode_float) { // Prepare test data for FLOAT type @@ -118,6 +138,36 @@ TEST_F(ByteStreamSplitDecoderTest, test_basic_decode_double) { EXPECT_DOUBLE_EQ(result_column->get_data()[2], 3.0); } +TEST_F(ByteStreamSplitDecoderTest, test_basic_decode_fixed_length_object) { + const size_t type_length = 3; + const std::vector values = {"abc", "def", "ghi"}; + auto encoded = encode_byte_stream_split_fixed_length(values, type_length); + Slice data_slice(encoded.data(), encoded.size()); + + MutableColumnPtr column = ColumnFixedLengthObject::create(type_length); + DataTypePtr data_type = std::make_shared(); + + ASSERT_TRUE(_decoder.set_data(&data_slice).ok()); + _decoder.set_type_length(type_length); + + const size_t num_values = values.size(); + std::vector run_length_null_map(1, num_values); + std::vector filter_data(num_values, 1); + FilterMap filter_map; + ASSERT_TRUE(filter_map.init(filter_data.data(), filter_data.size(), false).ok()); + ColumnSelectVector select_vector; + ASSERT_TRUE(select_vector.init(run_length_null_map, num_values, nullptr, &filter_map, 0).ok()); + + ASSERT_TRUE(_decoder.decode_values(column, data_type, select_vector, false).ok()); + + const auto* result_column = assert_cast(column.get()); + ASSERT_EQ(result_column->item_size(), type_length); + ASSERT_EQ(result_column->size(), num_values); + EXPECT_EQ(fixed_length_value(*result_column, 0), "abc"); + EXPECT_EQ(fixed_length_value(*result_column, 1), "def"); + EXPECT_EQ(fixed_length_value(*result_column, 2), "ghi"); +} + // Test decoding with filter for FLOAT type TEST_F(ByteStreamSplitDecoderTest, test_decode_with_filter_float) { // Prepare test data for FLOAT type @@ -258,6 +308,38 @@ TEST_F(ByteStreamSplitDecoderTest, test_decode_with_filter_and_null_float) { } } +TEST_F(ByteStreamSplitDecoderTest, test_decode_fixed_length_object_with_filter_and_null) { + const size_t type_length = 3; + const std::vector values = {"abc", "ghi"}; + auto encoded = encode_byte_stream_split_fixed_length(values, type_length); + Slice data_slice(encoded.data(), encoded.size()); + + MutableColumnPtr column = ColumnFixedLengthObject::create(type_length); + DataTypePtr data_type = std::make_shared(); + + ASSERT_TRUE(_decoder.set_data(&data_slice).ok()); + _decoder.set_type_length(type_length); + + const size_t num_values = 3; + std::vector run_length_null_map = {1, 1, 1}; // data: [abc, null, ghi] + std::vector filter_data = {0, 1, 1}; // output: [null, ghi] + FilterMap filter_map; + ASSERT_TRUE(filter_map.init(filter_data.data(), filter_data.size(), false).ok()); + ColumnSelectVector select_vector; + NullMap null_map; + ASSERT_TRUE( + select_vector.init(run_length_null_map, num_values, &null_map, &filter_map, 0).ok()); + + ASSERT_TRUE(_decoder.decode_values(column, data_type, select_vector, false).ok()); + + const auto* result_column = assert_cast(column.get()); + ASSERT_EQ(result_column->item_size(), type_length); + ASSERT_EQ(result_column->size(), 2); + EXPECT_EQ(fixed_length_value(*result_column, 1), "ghi"); + EXPECT_TRUE(null_map[0]); + EXPECT_FALSE(null_map[1]); +} + // Test decoding with filter and null for DOUBLE type TEST_F(ByteStreamSplitDecoderTest, test_decode_with_filter_and_null_double) { // Prepare test data for DOUBLE type @@ -402,4 +484,4 @@ TEST_F(ByteStreamSplitDecoderTest, test_skip_value_double) { EXPECT_DOUBLE_EQ(result_column->get_data()[0], 3.0); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/format/parquet/delta_byte_array_decoder_test.cpp b/be/test/format/parquet/delta_byte_array_decoder_test.cpp index 1b039da3d2344d..a0b88d38e43098 100644 --- a/be/test/format/parquet/delta_byte_array_decoder_test.cpp +++ b/be/test/format/parquet/delta_byte_array_decoder_test.cpp @@ -20,7 +20,9 @@ #include #include "arrow/api.h" +#include "core/column/column_fixed_length_object.h" #include "core/column/column_vector.h" +#include "core/data_type/data_type_fixed_length_object.h" #include "core/data_type/data_type_number.h" #include "core/data_type/data_type_string.h" #include "format/parquet/delta_bit_pack_decoder.h" @@ -38,6 +40,16 @@ class DeltaByteArrayDecoderTest : public ::testing::Test { std::unique_ptr _decoder; }; +static void expect_fixed_length_value(const ColumnFixedLengthObject& column, size_t row, + const std::vector& expected) { + const auto value = column.get_data_at(row); + ASSERT_EQ(value.size, expected.size()); + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_EQ(static_cast(value.data[i]), expected[i]) + << "Mismatch at row " << row << ", byte " << i; + } +} + // Test basic decoding byte array functionality TEST_F(DeltaByteArrayDecoderTest, test_basic_decode_byte_array) { // Create ColumnDescriptor @@ -340,6 +352,60 @@ TEST_F(DeltaByteArrayDecoderTest, test_basic_decode_fixed_len_byte_array) { } } +TEST_F(DeltaByteArrayDecoderTest, test_basic_decode_fixed_len_byte_array_object) { + const int32_t type_length = 16; + int precision = 10; + int scale = 2; + _decoder->set_type_length(type_length); + + auto node = parquet::schema::PrimitiveNode::Make( + "test_column", parquet::Repetition::REQUIRED, parquet::Type::FIXED_LEN_BYTE_ARRAY, + parquet::ConvertedType::DECIMAL, type_length, precision, scale); + auto descr = std::make_shared(node, 0, 0); + + std::vector> test_fixed_len_buffers = { + {0x1a, 0x05, 0x06, 0x1b, 0x00, 0x00, 0x00, 0x13, 0x1c, 0x00, 0x00, 0x00, 0x00, 0xbc, + 0x61, 0x40}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00}, + {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF}}; + + std::vector byte_array_values; + for (const auto& buffer : test_fixed_len_buffers) { + byte_array_values.emplace_back( + parquet::ByteArray {static_cast(buffer.size()), buffer.data()}); + } + + auto encoder = MakeTypedEncoder(parquet::Encoding::DELTA_BYTE_ARRAY, + /*use_dictionary=*/false, descr.get()); + ASSERT_NO_THROW( + encoder->Put(byte_array_values.data(), static_cast(byte_array_values.size()))); + auto encoded_buffer = encoder->FlushValues(); + Slice data_slice(encoded_buffer->data(), encoded_buffer->size()); + ASSERT_TRUE(_decoder->set_data(&data_slice).ok()); + + MutableColumnPtr column = ColumnFixedLengthObject::create(type_length); + DataTypePtr data_type = std::make_shared(); + + const size_t num_values = test_fixed_len_buffers.size(); + std::vector run_length_null_map(1, num_values); + std::vector filter_data(num_values, 1); + FilterMap filter_map; + ASSERT_TRUE(filter_map.init(filter_data.data(), filter_data.size(), false).ok()); + ColumnSelectVector select_vector; + ASSERT_TRUE(select_vector.init(run_length_null_map, num_values, nullptr, &filter_map, 0).ok()); + + ASSERT_TRUE(_decoder->decode_values(column, data_type, select_vector, false).ok()); + + const auto* result_column = assert_cast(column.get()); + ASSERT_EQ(result_column->item_size(), type_length); + ASSERT_EQ(result_column->size(), num_values); + for (size_t i = 0; i < num_values; ++i) { + expect_fixed_length_value(*result_column, i, test_fixed_len_buffers[i]); + } +} + // Test decoding fixed-length byte array with filter TEST_F(DeltaByteArrayDecoderTest, test_decode_fixed_len_byte_array_with_filter) { // Configure DECIMAL type parameters @@ -418,6 +484,62 @@ TEST_F(DeltaByteArrayDecoderTest, test_decode_fixed_len_byte_array_with_filter) } } +TEST_F(DeltaByteArrayDecoderTest, test_decode_fixed_len_byte_array_object_with_filter_and_null) { + const int32_t type_length = 16; + int precision = 10; + int scale = 2; + _decoder->set_type_length(type_length); + + auto node = parquet::schema::PrimitiveNode::Make( + "test_column", parquet::Repetition::REQUIRED, parquet::Type::FIXED_LEN_BYTE_ARRAY, + parquet::ConvertedType::DECIMAL, type_length, precision, scale); + auto descr = std::make_shared(node, 0, 0); + + std::vector> test_fixed_len_buffers = { + {0x1a, 0x05, 0x06, 0x1b, 0x00, 0x00, 0x00, 0x13, 0x1c, 0x00, 0x00, 0x00, 0x00, 0xbc, + 0x61, 0x40}, + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00}, + {0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0, 0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, + 0xDE, 0xF0}}; + + std::vector byte_array_values; + for (const auto& buffer : test_fixed_len_buffers) { + byte_array_values.emplace_back( + parquet::ByteArray {static_cast(buffer.size()), buffer.data()}); + } + + auto encoder = MakeTypedEncoder(parquet::Encoding::DELTA_BYTE_ARRAY, + /*use_dictionary=*/false, descr.get()); + ASSERT_NO_THROW( + encoder->Put(byte_array_values.data(), static_cast(byte_array_values.size()))); + auto encoded_buffer = encoder->FlushValues(); + Slice data_slice(encoded_buffer->data(), encoded_buffer->size()); + ASSERT_TRUE(_decoder->set_data(&data_slice).ok()); + + MutableColumnPtr column = ColumnFixedLengthObject::create(type_length); + DataTypePtr data_type = std::make_shared(); + + const size_t num_values = 4; + std::vector run_length_null_map = {2, 1, 1}; // data: [Data 1, Data 2, null, Data 4] + std::vector filter_data = {1, 0, 1, 0}; // output: [Data 1, null] + FilterMap filter_map; + ASSERT_TRUE(filter_map.init(filter_data.data(), filter_data.size(), false).ok()); + ColumnSelectVector select_vector; + NullMap null_map; + ASSERT_TRUE( + select_vector.init(run_length_null_map, num_values, &null_map, &filter_map, 0).ok()); + + ASSERT_TRUE(_decoder->decode_values(column, data_type, select_vector, false).ok()); + + const auto* result_column = assert_cast(column.get()); + ASSERT_EQ(result_column->item_size(), type_length); + ASSERT_EQ(result_column->size(), 2); + expect_fixed_length_value(*result_column, 0, test_fixed_len_buffers[0]); + EXPECT_FALSE(null_map[0]); + EXPECT_TRUE(null_map[1]); +} + // Test decoding fixed-length byte array with filter and null values TEST_F(DeltaByteArrayDecoderTest, test_decode_fixed_len_byte_array_with_filter_and_null) { // Configure DECIMAL type parameters diff --git a/be/test/format/parquet/fix_length_dict_decoder_test.cpp b/be/test/format/parquet/fix_length_dict_decoder_test.cpp index a8050663b43332..afd419c546954e 100644 --- a/be/test/format/parquet/fix_length_dict_decoder_test.cpp +++ b/be/test/format/parquet/fix_length_dict_decoder_test.cpp @@ -19,8 +19,10 @@ #include +#include "core/column/column_fixed_length_object.h" #include "core/column/column_vector.h" #include "core/custom_allocator.h" +#include "core/data_type/data_type_fixed_length_object.h" #include "core/data_type/data_type_number.h" #include "util/slice.h" @@ -48,6 +50,11 @@ class FixLengthDictDecoderTest : public ::testing::Test { size_t _type_length; }; +static std::string fixed_length_value(const ColumnFixedLengthObject& column, size_t row) { + const auto value = column.get_data_at(row); + return {value.data, value.size}; +} + // Test basic decoding functionality TEST_F(FixLengthDictDecoderTest, test_basic_decode) { MutableColumnPtr column = ColumnUInt8::create(); @@ -97,6 +104,39 @@ TEST_F(FixLengthDictDecoderTest, test_basic_decode) { EXPECT_EQ(decoded_strings[6], "banana"); } +TEST_F(FixLengthDictDecoderTest, test_decode_with_column_fixed_length_object) { + MutableColumnPtr column = ColumnFixedLengthObject::create(_type_length); + DataTypePtr data_type = std::make_shared(); + + // RLE encoded data: 4 zeros followed by 1, 2, 1, padded to 8 values, [0 0 0 0 1 2 1] + std::vector rle_data = {2, 8, 0, 3, 0b00011001, 0}; + + Slice data_slice(reinterpret_cast(rle_data.data()), rle_data.size()); + ASSERT_TRUE(_decoder.set_data(&data_slice).ok()); + + const size_t num_values = 7; + std::vector run_length_null_map(1, num_values); + std::vector filter_data(num_values, 1); + FilterMap filter_map; + ASSERT_TRUE(filter_map.init(filter_data.data(), filter_data.size(), false).ok()); + ColumnSelectVector select_vector; + ASSERT_TRUE(select_vector.init(run_length_null_map, num_values, nullptr, &filter_map, 0).ok()); + + ASSERT_TRUE(_decoder.decode_values(column, data_type, select_vector, false).ok()); + + const auto* result_column = assert_cast(column.get()); + ASSERT_EQ(result_column->item_size(), _type_length); + ASSERT_EQ(result_column->size(), num_values); + + EXPECT_EQ(fixed_length_value(*result_column, 0), "apple "); + EXPECT_EQ(fixed_length_value(*result_column, 1), "apple "); + EXPECT_EQ(fixed_length_value(*result_column, 2), "apple "); + EXPECT_EQ(fixed_length_value(*result_column, 3), "apple "); + EXPECT_EQ(fixed_length_value(*result_column, 4), "banana"); + EXPECT_EQ(fixed_length_value(*result_column, 5), "cherry"); + EXPECT_EQ(fixed_length_value(*result_column, 6), "banana"); +} + // Test decoding with filter TEST_F(FixLengthDictDecoderTest, test_decode_with_filter) { MutableColumnPtr column = ColumnUInt8::create(); @@ -144,6 +184,43 @@ TEST_F(FixLengthDictDecoderTest, test_decode_with_filter) { EXPECT_EQ(decoded_strings[4], "banana"); } +TEST_F(FixLengthDictDecoderTest, test_decode_fixed_length_object_with_filter_and_null) { + MutableColumnPtr column = ColumnFixedLengthObject::create(_type_length); + DataTypePtr data_type = std::make_shared(); + + // RLE encoded data: 4 zeros followed by 2, padded to 8 values, [0 0 0 0 2] + std::vector rle_data = {2, 8, 0, 3, 0b00000010, 0}; + + Slice data_slice(reinterpret_cast(rle_data.data()), rle_data.size()); + ASSERT_TRUE(_decoder.set_data(&data_slice).ok()); + + const size_t num_values = 7; + std::vector run_length_null_map {4, 1, 1, 1}; // data: [0 0 0 0 null 2 null] + std::vector filter_data = {1, 0, 1, 0, 1, 1, 1}; // output: [0 0 null 2 null] + + FilterMap filter_map; + ASSERT_TRUE(filter_map.init(filter_data.data(), filter_data.size(), false).ok()); + ColumnSelectVector select_vector; + NullMap null_map; + ASSERT_TRUE( + select_vector.init(run_length_null_map, num_values, &null_map, &filter_map, 0).ok()); + + ASSERT_TRUE(_decoder.decode_values(column, data_type, select_vector, false).ok()); + + const auto* result_column = assert_cast(column.get()); + ASSERT_EQ(result_column->item_size(), _type_length); + ASSERT_EQ(result_column->size(), 5); + + EXPECT_EQ(fixed_length_value(*result_column, 0), "apple "); + EXPECT_EQ(fixed_length_value(*result_column, 1), "apple "); + EXPECT_EQ(fixed_length_value(*result_column, 3), "cherry"); + EXPECT_FALSE(null_map[0]); + EXPECT_FALSE(null_map[1]); + EXPECT_TRUE(null_map[2]); + EXPECT_FALSE(null_map[3]); + EXPECT_TRUE(null_map[4]); +} + // Test decoding with filter and null TEST_F(FixLengthDictDecoderTest, test_decode_with_filter_and_null) { MutableColumnPtr column = ColumnUInt8::create(); diff --git a/be/test/format/parquet/fix_length_plain_decoder_test.cpp b/be/test/format/parquet/fix_length_plain_decoder_test.cpp index 5228074b1e382e..78b992c2a36416 100644 --- a/be/test/format/parquet/fix_length_plain_decoder_test.cpp +++ b/be/test/format/parquet/fix_length_plain_decoder_test.cpp @@ -19,7 +19,9 @@ #include +#include "core/column/column_fixed_length_object.h" #include "core/column/column_vector.h" +#include "core/data_type/data_type_fixed_length_object.h" #include "core/data_type/data_type_number.h" #include "util/slice.h" @@ -34,6 +36,11 @@ class FixLengthPlainDecoderTest : public ::testing::Test { size_t _type_length; }; +static std::string fixed_length_value(const ColumnFixedLengthObject& column, size_t row) { + const auto& value = column.get_data_at(row); + return std::string(value.data, value.size); +} + // Test basic decoding functionality TEST_F(FixLengthPlainDecoderTest, test_basic_decode) { // Prepare test data: create fixed-length integer values @@ -74,6 +81,39 @@ TEST_F(FixLengthPlainDecoderTest, test_basic_decode) { EXPECT_EQ(result_column->get_data()[2], 789); } +TEST_F(FixLengthPlainDecoderTest, test_decode_with_column_fixed_length_object) { + std::string values = "abcdefghijkl"; + _data = std::make_unique(values.size()); + memcpy(_data.get(), values.data(), values.size()); + + _data_slice = Slice(_data.get(), values.size()); + _type_length = 4; + + FixLengthPlainDecoder decoder; + decoder.set_type_length(_type_length); + ASSERT_TRUE(decoder.set_data(&_data_slice).ok()); + + MutableColumnPtr column = ColumnFixedLengthObject::create(_type_length); + DataTypePtr data_type = std::make_shared(); + + size_t num_values = 3; + std::vector run_length_null_map(1, num_values); + std::vector filter_data(num_values, 1); + FilterMap filter_map; + ASSERT_TRUE(filter_map.init(filter_data.data(), filter_data.size(), false).ok()); + ColumnSelectVector select_vector; + ASSERT_TRUE(select_vector.init(run_length_null_map, num_values, nullptr, &filter_map, 0).ok()); + + ASSERT_TRUE(decoder.decode_values(column, data_type, select_vector, false).ok()); + + ASSERT_EQ(column->size(), num_values); + const auto* result_column = assert_cast(column.get()); + EXPECT_EQ(result_column->item_size(), _type_length); + EXPECT_EQ(fixed_length_value(*result_column, 0), "abcd"); + EXPECT_EQ(fixed_length_value(*result_column, 1), "efgh"); + EXPECT_EQ(fixed_length_value(*result_column, 2), "ijkl"); +} + // Test decoding with filter TEST_F(FixLengthPlainDecoderTest, test_decode_with_filter) { // Prepare test data: create fixed-length integer values @@ -113,6 +153,42 @@ TEST_F(FixLengthPlainDecoderTest, test_decode_with_filter) { EXPECT_EQ(result_column->get_data()[1], 789); } +TEST_F(FixLengthPlainDecoderTest, test_decode_fixed_length_object_with_filter_and_null) { + std::string values = "abcdefgh"; + _data = std::make_unique(values.size()); + memcpy(_data.get(), values.data(), values.size()); + + _data_slice = Slice(_data.get(), values.size()); + _type_length = 4; + + FixLengthPlainDecoder decoder; + decoder.set_type_length(_type_length); + ASSERT_TRUE(decoder.set_data(&_data_slice).ok()); + + MutableColumnPtr column = ColumnFixedLengthObject::create(_type_length); + DataTypePtr data_type = std::make_shared(); + + size_t num_values = 3; + std::vector run_length_null_map = {1, 1, 1}; // data: [abcd, null, efgh] + std::vector filter_data = {0, 1, 1}; // output: [null, efgh] + FilterMap filter_map; + ASSERT_TRUE(filter_map.init(filter_data.data(), filter_data.size(), false).ok()); + ColumnSelectVector select_vector; + NullMap null_map; + ASSERT_TRUE( + select_vector.init(run_length_null_map, num_values, &null_map, &filter_map, 0).ok()); + + ASSERT_TRUE(decoder.decode_values(column, data_type, select_vector, false).ok()); + + ASSERT_EQ(column->size(), 2); + const auto* result_column = assert_cast(column.get()); + EXPECT_EQ(result_column->item_size(), _type_length); + EXPECT_EQ(fixed_length_value(*result_column, 1), "efgh"); + EXPECT_EQ(null_map.size(), 2); + EXPECT_TRUE(null_map[0]); + EXPECT_FALSE(null_map[1]); +} + // Test decoding with filter and null TEST_F(FixLengthPlainDecoderTest, test_decode_with_filter_and_null) { // Prepare test data: create fixed-length integer values diff --git a/be/test/format/parquet/parquet_column_convert_test.cpp b/be/test/format/parquet/parquet_column_convert_test.cpp index e50d28ef0e7930..112390442dc593 100644 --- a/be/test/format/parquet/parquet_column_convert_test.cpp +++ b/be/test/format/parquet/parquet_column_convert_test.cpp @@ -22,6 +22,10 @@ #include #include +#include "core/assert_cast.h" +#include "core/column/column_fixed_length_object.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" #include "util/timezone_utils.h" namespace doris::parquet { @@ -38,6 +42,8 @@ static FieldSchema make_timestamp_field_schema(bool is_adjusted_to_utc) { } TEST(ParquetColumnConvertTest, InitFixedOffsetDetection) { + TimezoneUtils::load_timezones_to_cache(); + cctz::time_zone utc_tz; cctz::time_zone plus8_tz; cctz::time_zone shanghai_tz; @@ -119,4 +125,178 @@ TEST(ParquetColumnConvertTest, LookupPathMatchesOriginal) { } } +TEST(ParquetColumnConvertTest, AlignNullMapUsesAppendedSourceSlice) { + auto dst_nested_column = ColumnFloat64::create(); + dst_nested_column->insert_value(1); + dst_nested_column->insert_value(2); + auto dst_null_map_column = ColumnUInt8::create(); + dst_null_map_column->insert_value(0); + dst_null_map_column->insert_value(0); + ColumnPtr dst_column = + ColumnNullable::create(std::move(dst_nested_column), std::move(dst_null_map_column)); + + auto src_nested_column = ColumnInt64::create(); + for (int i = 0; i < 5; ++i) { + src_nested_column->insert_value(i); + } + auto src_null_map_column = ColumnUInt8::create(); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + ColumnPtr src_column = + ColumnNullable::create(std::move(src_nested_column), std::move(src_null_map_column)); + + align_null_map(src_column, dst_column, 2, 3, 2); + + const auto* nullable_column = assert_cast(dst_column.get()); + const auto& null_map = nullable_column->get_null_map_data(); + ASSERT_EQ(null_map.size(), 5); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 0); + EXPECT_EQ(null_map[2], 1); + EXPECT_EQ(null_map[3], 0); + EXPECT_EQ(null_map[4], 1); +} + +TEST(ParquetColumnConvertTest, AlignNullMapUsesNullablePrefixForCachedReadColumn) { + auto dst_nested_column = ColumnFloat64::create(); + dst_nested_column->insert_value(1); + dst_nested_column->insert_value(2); + auto dst_null_map_column = ColumnUInt8::create(); + dst_null_map_column->insert_value(0); + dst_null_map_column->insert_value(0); + ColumnPtr dst_column = + ColumnNullable::create(std::move(dst_nested_column), std::move(dst_null_map_column)); + + auto src_nested_column = ColumnInt64::create(); + src_nested_column->insert_value(8); + src_nested_column->insert_value(9); + src_nested_column->insert_value(10); + src_nested_column->insert_value(11); + src_nested_column->insert_value(12); + auto src_null_map_column = ColumnUInt8::create(); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + ColumnPtr src_column = + ColumnNullable::create(std::move(src_nested_column), std::move(src_null_map_column)); + + align_null_map(src_column, dst_column, get_null_map_size_or_inner_column_size(dst_column), 3, + get_appended_null_map_start(src_column, 3)); + + const auto* nullable_column = assert_cast(dst_column.get()); + const auto& null_map = nullable_column->get_null_map_data(); + ASSERT_EQ(null_map.size(), 5); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 0); + EXPECT_EQ(null_map[2], 1); + EXPECT_EQ(null_map[3], 0); + EXPECT_EQ(null_map[4], 1); +} + +TEST(ParquetColumnConvertTest, ConvertNullableFloatToDoubleUsesCurrentSourceNullMapSlice) { + FieldSchema field_schema; + field_schema.name = "float_col"; + field_schema.parquet_schema.__set_name("float_col"); + field_schema.parquet_schema.__set_type(tparquet::Type::FLOAT); + field_schema.data_type = DataTypeFactory::instance().create_data_type(TYPE_FLOAT, true); + + const auto dst_type = DataTypeFactory::instance().create_data_type(TYPE_DOUBLE, true); + auto converter = PhysicalToLogicalConverter::get_converter( + &field_schema, field_schema.data_type, dst_type, nullptr); + ASSERT_TRUE(converter->support()) << converter->get_error_msg(); + + auto src_nested_column = ColumnFloat32::create(); + src_nested_column->insert_value(1.5F); + src_nested_column->insert_value(2.5F); + src_nested_column->insert_value(3.5F); + auto src_null_map_column = ColumnUInt8::create(); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + src_null_map_column->insert_value(0); + ColumnPtr src_column = + ColumnNullable::create(std::move(src_nested_column), std::move(src_null_map_column)); + + ColumnPtr dst_column = dst_type->create_column(); + ColumnPtr dst_alias = dst_column; + + ASSERT_TRUE(converter->convert(src_column, field_schema.data_type, dst_type, dst_column, false) + .ok()); + + const auto* nullable_column = assert_cast(dst_column.get()); + ASSERT_EQ(nullable_column->size(), 3); + const auto& null_map = nullable_column->get_null_map_data(); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 1); + EXPECT_EQ(null_map[2], 0); + + const auto& nested_column = + assert_cast(nullable_column->get_nested_column()); + EXPECT_DOUBLE_EQ(nested_column.get_data()[0], 1.5); + EXPECT_DOUBLE_EQ(nested_column.get_data()[2], 3.5); + + const auto* original_dst = assert_cast(dst_alias.get()); + EXPECT_EQ(original_dst->size(), 0); +} + +TEST(ParquetColumnConvertTest, + ConvertNullableFixedLengthStringToVarbinaryPreservesExistingDstPrefix) { + FieldSchema field_schema; + field_schema.name = "fixed_binary_col"; + field_schema.parquet_schema.__set_name("fixed_binary_col"); + field_schema.parquet_schema.__set_type(tparquet::Type::FIXED_LEN_BYTE_ARRAY); + field_schema.parquet_schema.__set_type_length(2); + field_schema.data_type = DataTypeFactory::instance().create_data_type(TYPE_STRING, true); + + const auto dst_type = DataTypeFactory::instance().create_data_type(TYPE_VARBINARY, true); + auto converter = PhysicalToLogicalConverter::get_converter( + &field_schema, field_schema.data_type, dst_type, nullptr); + ASSERT_TRUE(converter->support()) << converter->get_error_msg(); + + auto src_nested_column = ColumnFixedLengthObject::create(2); + src_nested_column->insert_data("aa", 2); + src_nested_column->insert_data("bb", 2); + src_nested_column->insert_data("cc", 2); + auto src_null_map_column = ColumnUInt8::create(); + src_null_map_column->insert_value(0); + src_null_map_column->insert_value(1); + src_null_map_column->insert_value(0); + ColumnPtr src_column = + ColumnNullable::create(std::move(src_nested_column), std::move(src_null_map_column)); + + auto dst_nested_column = ColumnVarbinary::create(); + dst_nested_column->insert_data("zz", 2); + auto dst_null_map_column = ColumnUInt8::create(); + dst_null_map_column->insert_value(0); + ColumnPtr dst_column = + ColumnNullable::create(std::move(dst_nested_column), std::move(dst_null_map_column)); + ColumnPtr dst_alias = dst_column; + + ASSERT_TRUE(converter->convert(src_column, field_schema.data_type, dst_type, dst_column, false) + .ok()); + + const auto* nullable_column = assert_cast(dst_column.get()); + ASSERT_EQ(nullable_column->size(), 4); + const auto& null_map = nullable_column->get_null_map_data(); + EXPECT_EQ(null_map[0], 0); + EXPECT_EQ(null_map[1], 0); + EXPECT_EQ(null_map[2], 1); + EXPECT_EQ(null_map[3], 0); + + const auto& nested_column = + assert_cast(nullable_column->get_nested_column()); + ASSERT_EQ(nested_column.size(), 4); + EXPECT_EQ(nested_column.get_data_at(0).to_string(), "zz"); + EXPECT_EQ(nested_column.get_data_at(1).to_string(), "aa"); + EXPECT_EQ(nested_column.get_data_at(3).to_string(), "cc"); + + const auto* original_dst = assert_cast(dst_alias.get()); + ASSERT_EQ(original_dst->size(), 1); + EXPECT_EQ(original_dst->get_data_at(0).to_string(), "zz"); +} + } // namespace doris::parquet diff --git a/be/test/format/parquet/parquet_thrift_test.cpp b/be/test/format/parquet/parquet_thrift_test.cpp index 7171fe3b63cd16..4bbe6dc09e41e9 100644 --- a/be/test/format/parquet/parquet_thrift_test.cpp +++ b/be/test/format/parquet/parquet_thrift_test.cpp @@ -161,8 +161,8 @@ TEST_F(ParquetThriftReaderTest, complex_nested_file) { static int fill_nullable_column(ColumnPtr& doris_column, level_t* definitions, size_t num_values) { CHECK(doris_column->is_nullable()); - auto* nullable_column = - const_cast(static_cast(doris_column.get())); + doris_column = IColumn::mutate(std::move(doris_column)); + auto* nullable_column = assert_cast(doris_column->assume_mutable().get()); NullMap& map_data = nullable_column->get_null_map_data(); int null_cnt = 0; for (int i = 0; i < num_values; ++i) { @@ -193,6 +193,9 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column ColumnPtr src_column = _converter->get_physical_column( field_schema->physical_type, field_schema->data_type, doris_column, data_type, false); + if (_converter->read_directly_into_dst_logical_column()) { + src_column = std::move(doris_column); + } DataTypePtr& resolved_type = _converter->get_physical_type(); io::BufferedFileStreamReader stream_reader(file_reader, start_offset, chunk_size, 1024); @@ -217,10 +220,10 @@ static Status get_column_values(io::FileReaderSPtr file_reader, tparquet::Column if (src_column->is_nullable()) { // fill nullable values fill_nullable_column(src_column, definitions, rows); - auto* nullable_column = - const_cast(static_cast(src_column.get())); + auto* nullable_column = assert_cast(src_column->assume_mutable().get()); data_column = nullable_column->get_nested_column_ptr(); } else { + src_column = IColumn::mutate(std::move(src_column)); data_column = src_column->assume_mutable(); } FilterMap filter_map; diff --git a/be/test/format/table/table_format_reader_test.cpp b/be/test/format/table/table_format_reader_test.cpp index 09ab7e10b2ec73..1a1b3176df6700 100644 --- a/be/test/format/table/table_format_reader_test.cpp +++ b/be/test/format/table/table_format_reader_test.cpp @@ -19,13 +19,58 @@ #include +#include "core/assert_cast.h" +#include "core/block/block.h" +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "runtime/descriptors.h" + namespace doris { class MockTableFormatReader : public TableFormatReader { public: Status _do_get_next_block(Block*, size_t*, bool*) override { return Status::OK(); } + + void set_fill_col_name_to_block_idx(std::unordered_map* index) { + _fill_col_name_to_block_idx = index; + } + + void set_partition_value(const std::string& col_name, const std::string& value, + const SlotDescriptor* slot_desc) { + _fill_partition_values[col_name] = {value, slot_desc}; + } }; +static TTypeDesc create_scalar_type(TPrimitiveType::type primitive_type) { + TTypeDesc type_desc; + TTypeNode type_node; + TScalarType scalar_type; + scalar_type.__set_type(primitive_type); + type_node.__set_type(TTypeNodeType::SCALAR); + type_node.__set_scalar_type(scalar_type); + type_desc.types.push_back(type_node); + return type_desc; +} + +static TSlotDescriptor create_slot_descriptor(int slot_id, const std::string& col_name, + TPrimitiveType::type primitive_type, + bool nullable = false) { + TSlotDescriptor slot_desc; + slot_desc.__set_id(slot_id); + slot_desc.__set_parent(1); + slot_desc.__set_slotType(create_scalar_type(primitive_type)); + slot_desc.__set_columnPos(slot_id); + slot_desc.__set_colName(col_name); + slot_desc.__set_col_unique_id(slot_id); + slot_desc.__set_slotIdx(slot_id); + slot_desc.__set_isMaterialized(true); + slot_desc.__set_is_key(false); + slot_desc.__set_nullIndicatorBit(nullable ? 0 : -1); + return slot_desc; +} + TEST(TableFormatReaderTest, FillSynthesizedColumnsInvokesRegisteredHandlers) { MockTableFormatReader reader; size_t handled_rows = 0; @@ -48,4 +93,60 @@ TEST(TableFormatReaderTest, FillSynthesizedColumnsInvokesRegisteredHandlers) { EXPECT_EQ(handled_rows, 128u); } +TEST(TableFormatReaderTest, FillPartitionColumnRestoresSharedColumnOnDeserializeError) { + MockTableFormatReader reader; + std::unordered_map block_index {{"part_col", 0}}; + reader.set_fill_col_name_to_block_idx(&block_index); + + auto slot_desc = create_slot_descriptor(0, "part_col", TPrimitiveType::INT); + SlotDescriptor slot(slot_desc); + reader.set_partition_value("part_col", "not_an_int", &slot); + + auto type = std::make_shared(); + auto mutable_col = ColumnInt32::create(); + ColumnPtr old_col = mutable_col->get_ptr(); + + Block block; + block.insert({std::move(mutable_col), type, "part_col"}); + + auto status = reader.on_fill_partition_columns(&block, 2, {"part_col"}); + + EXPECT_FALSE(status.ok()); + ASSERT_EQ(block.columns(), 1); + ASSERT_NE(block.get_by_position(0).column.get(), nullptr); + EXPECT_EQ(block.get_by_position(0).column->size(), 0); + EXPECT_EQ(old_col->size(), 0); + EXPECT_NE(block.get_by_position(0).column.get(), old_col.get()); + EXPECT_NO_THROW(block.check_number_of_rows()); +} + +TEST(TableFormatReaderTest, FillMissingNullableColumnDetachesSharedBlockSlot) { + MockTableFormatReader reader; + std::unordered_map block_index {{"missing_col", 0}}; + reader.set_fill_col_name_to_block_idx(&block_index); + + auto nullable_type = make_nullable(std::make_shared()); + auto nullable_col = ColumnNullable::create(ColumnInt32::create(), ColumnUInt8::create()); + ColumnPtr old_col = nullable_col->get_ptr(); + + Block block; + block.insert({std::move(nullable_col), nullable_type, "missing_col"}); + + auto status = reader.on_fill_missing_columns(&block, 3, {"missing_col"}); + + EXPECT_TRUE(status.ok()) << status.to_string(); + ASSERT_EQ(block.columns(), 1); + ASSERT_NE(block.get_by_position(0).column.get(), nullptr); + EXPECT_EQ(block.rows(), 3); + EXPECT_EQ(old_col->size(), 0); + EXPECT_NE(block.get_by_position(0).column.get(), old_col.get()); + + const auto& nullable = assert_cast(*block.get_by_position(0).column); + const auto& null_map = nullable.get_null_map_data(); + ASSERT_EQ(null_map.size(), 3); + EXPECT_EQ(null_map[0], 1); + EXPECT_EQ(null_map[1], 1); + EXPECT_EQ(null_map[2], 1); +} + } // namespace doris diff --git a/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp b/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp index bbfe9e18a35830..5590ecaa93ada1 100644 --- a/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp +++ b/be/test/load/delta_writer/delta_writer_cluster_key_test.cpp @@ -191,7 +191,7 @@ static TDescriptorTable create_descriptor_tablet_with_sequence_col() { } static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { - auto columns = block->mutate_columns(); + auto columns = std::move(*block).mutate_columns(); int8_t c1 = k1; columns[0]->insert_data((const char*)&c1, sizeof(c1)); @@ -214,7 +214,8 @@ static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); int32_t c5 = seq; - columns[4]->insert_data((const char*)&c5, sizeof(c2)); + columns[4]->insert_data((const char*)&c5, sizeof(c5)); + block->set_columns(std::move(columns)); } class TestDeltaWriterClusterKey : public ::testing::Test { diff --git a/be/test/load/delta_writer/delta_writer_test.cpp b/be/test/load/delta_writer/delta_writer_test.cpp index 08cd0f7c7e579a..5d3aebc4e2325f 100644 --- a/be/test/load/delta_writer/delta_writer_test.cpp +++ b/be/test/load/delta_writer/delta_writer_test.cpp @@ -446,7 +446,7 @@ static TDescriptorTable create_descriptor_tablet_with_sequence_col() { } static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { - auto columns = block->mutate_columns(); + auto columns = std::move(*block).mutate_columns(); int8_t c1 = k1; columns[0]->insert_data((const char*)&c1, sizeof(c1)); @@ -461,15 +461,16 @@ static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { {"2020-07-16 19:39:43", 19}, c3, nullptr, p); } int64_t c3_int = c3.to_int64(); - columns[2]->insert_data((const char*)&c3_int, sizeof(c3)); + columns[2]->insert_data((const char*)&c3_int, sizeof(c3_int)); DateV2Value c4; c4.unchecked_set_time(2022, 6, 6, 0, 0, 0, 0); uint32_t c4_int = c4.to_date_int_val(); - columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); + columns[3]->insert_data((const char*)&c4_int, sizeof(c4_int)); int32_t c5 = seq; - columns[4]->insert_data((const char*)&c5, sizeof(c2)); + columns[4]->insert_data((const char*)&c5, sizeof(c5)); + block->set_columns(std::move(columns)); } class TestDeltaWriter : public ::testing::Test { @@ -568,7 +569,7 @@ TEST_F(TestDeltaWriter, vec_write) { slot_desc->col_name())); } - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); { int8_t k1 = -127; columns[0]->insert_data((const char*)&k1, sizeof(k1)); @@ -670,6 +671,7 @@ TEST_F(TestDeltaWriter, vec_write) { date_v2_int = date_v2.to_date_int_val(); columns[21]->insert_data((const char*)&date_v2_int, sizeof(date_v2_int)); + block.set_columns(std::move(columns)); res = delta_writer->write(&block, {0}); ASSERT_TRUE(res.ok()); } diff --git a/be/test/load/memtable/memtable_flush_executor_test.cpp b/be/test/load/memtable/memtable_flush_executor_test.cpp index 7916d94db409c1..375d0daab40ec0 100644 --- a/be/test/load/memtable/memtable_flush_executor_test.cpp +++ b/be/test/load/memtable/memtable_flush_executor_test.cpp @@ -253,13 +253,16 @@ class MemTableFlushExecutorGroupFlushTest : public testing::Test { block.insert(ColumnWithTypeAndName(slot->get_empty_mutable_column(), slot->type(), slot->col_name())); } - auto cols = block.mutate_columns(); - int8_t k1 = -127; - int16_t k2 = -32767; - int32_t k3 = -2147483647; - cols[0]->insert_data((const char*)&k1, sizeof(k1)); - cols[1]->insert_data((const char*)&k2, sizeof(k2)); - cols[2]->insert_data((const char*)&k3, sizeof(k3)); + { + auto cols_guard = block.mutate_columns_scoped(); + auto& cols = cols_guard.mutable_columns(); + int8_t k1 = -127; + int16_t k2 = -32767; + int32_t k3 = -2147483647; + cols[0]->insert_data((const char*)&k1, sizeof(k1)); + cols[1]->insert_data((const char*)&k2, sizeof(k2)); + cols[2]->insert_data((const char*)&k3, sizeof(k3)); + } ASSERT_TRUE(ctx->memtable->insert(&block, {0}).ok()); } diff --git a/be/test/load/memtable/memtable_memory_limiter_test.cpp b/be/test/load/memtable/memtable_memory_limiter_test.cpp index 1d5c1238335346..551010e0709cb1 100644 --- a/be/test/load/memtable/memtable_memory_limiter_test.cpp +++ b/be/test/load/memtable/memtable_memory_limiter_test.cpp @@ -154,7 +154,7 @@ TEST_F(MemTableMemoryLimiterTest, handle_memtable_flush_test) { block.insert(ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(), slot_desc->type(), slot_desc->col_name())); } - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); { int8_t k1 = -127; columns[0]->insert_data((const char*)&k1, sizeof(k1)); @@ -165,6 +165,7 @@ TEST_F(MemTableMemoryLimiterTest, handle_memtable_flush_test) { int32_t k3 = -2147483647; columns[2]->insert_data((const char*)&k3, sizeof(k3)); + block.set_columns(std::move(columns)); res = delta_writer->write(&block, {0}); ASSERT_TRUE(res.ok()); } diff --git a/be/test/olap/rowset/group_rowset_writer_test.cpp b/be/test/olap/rowset/group_rowset_writer_test.cpp index c1aa21c6b4547d..46e4a74f8ad047 100644 --- a/be/test/olap/rowset/group_rowset_writer_test.cpp +++ b/be/test/olap/rowset/group_rowset_writer_test.cpp @@ -96,12 +96,15 @@ class GroupRowsetWriterTest : public testing::Test { Block create_block(int start_key, int num_rows) const { Block block = _tablet->tablet_schema()->create_block(); - auto columns = block.mutate_columns(); - for (int i = 0; i < num_rows; ++i) { - columns[0]->insert(Field::create_field(start_key + i)); - columns[1]->insert(Field::create_field((start_key + i) * 10)); + { + auto columns_guard = block.mutate_columns_scoped(); + auto& columns = columns_guard.mutable_columns(); + for (int i = 0; i < num_rows; ++i) { + columns[0]->insert(Field::create_field(start_key + i)); + columns[1]->insert( + Field::create_field((start_key + i) * 10)); + } } - block.set_columns(std::move(columns)); return block; } diff --git a/be/test/runtime/snapshot_loader_test.cpp b/be/test/runtime/snapshot_loader_test.cpp index 6c320d225f5e44..efae696a62c492 100644 --- a/be/test/runtime/snapshot_loader_test.cpp +++ b/be/test/runtime/snapshot_loader_test.cpp @@ -210,10 +210,11 @@ static void add_rowset(int64_t tablet_id, int32_t schema_hash, int64_t partition slot_desc->col_name())); } - std::cout << "total column " << block.mutate_columns().size() << std::endl; - auto columns = block.mutate_columns(); + std::cout << "total column " << block.columns() << std::endl; + auto columns = std::move(block).mutate_columns(); int16_t c1 = value; columns[0]->insert_data((const char*)&c1, sizeof(c1)); + block.set_columns(std::move(columns)); Status res = delta_writer->write(&block, {0}); EXPECT_TRUE(res.ok()); diff --git a/be/test/runtime/stream_load_parquet_test.cpp b/be/test/runtime/stream_load_parquet_test.cpp index bf9a35c2a64111..62e280f1e80a64 100644 --- a/be/test/runtime/stream_load_parquet_test.cpp +++ b/be/test/runtime/stream_load_parquet_test.cpp @@ -15,6 +15,10 @@ // specific language governing permissions and limitations // under the License. +#include + +#include + #include "gtest/gtest.h" #include "load/load_path_mgr.h" #include "runtime/exec_env.h" @@ -27,18 +31,19 @@ class LoadPathMgrTest : public testing::Test { _exec_env = ExecEnv::GetInstance(); _load_path_mgr = std::make_unique(_exec_env); - // create tmp file - _test_dir = "/tmp/test_clean_file"; - _test_dir1 = "/tmp/test_clean_file/mini_download"; - _test_dir2 = "/tmp/test_clean_file1/mini_download/test.parquet"; - - auto result = io::global_local_filesystem()->delete_directory_or_file(_test_dir1); - result = io::global_local_filesystem()->create_directory(_test_dir1); - EXPECT_TRUE(result.ok()); + auto test_root = std::filesystem::temp_directory_path() / + ("doris_load_path_mgr_test_" + std::to_string(::getpid())); + _test_dir = test_root.string(); + _test_dir1 = _test_dir + "/mini_download"; + _test_dir2 = _test_dir1 + "/test.parquet"; - result = io::global_local_filesystem()->delete_directory_or_file(_test_dir2); - result = io::global_local_filesystem()->create_directory(_test_dir2); - EXPECT_TRUE(result.ok()); + std::error_code ec; + std::filesystem::remove_all(_test_dir, ec); + ASSERT_FALSE(ec) << ec.message(); + std::filesystem::create_directories(_test_dir1, ec); + ASSERT_FALSE(ec) << ec.message(); + std::filesystem::create_directories(_test_dir2, ec); + ASSERT_FALSE(ec) << ec.message(); const_cast&>(_exec_env->store_paths()).emplace_back(_test_dir, 1024); } @@ -46,6 +51,9 @@ class LoadPathMgrTest : public testing::Test { void TearDown() override { const_cast&>(_exec_env->store_paths()).clear(); _load_path_mgr->stop(); + std::error_code ec; + std::filesystem::remove_all(_test_dir, ec); + EXPECT_FALSE(ec) << ec.message(); _exec_env->destroy(); } @@ -96,4 +104,4 @@ TEST_F(LoadPathMgrTest, CheckDiskSpaceTest) { EXPECT_FALSE(exists); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/storage/adaptive_thread_pool_controller_test.cpp b/be/test/storage/adaptive_thread_pool_controller_test.cpp index 06d79629330e14..6c4e42fea05897 100644 --- a/be/test/storage/adaptive_thread_pool_controller_test.cpp +++ b/be/test/storage/adaptive_thread_pool_controller_test.cpp @@ -19,6 +19,7 @@ #include +#include #include #include "common/config.h" @@ -44,15 +45,19 @@ class AdaptiveThreadPoolControllerTest : public testing::Test { void SetUp() override { _original_enable_adaptive = config::enable_adaptive_flush_threads; + int num_cpus = std::thread::hardware_concurrency(); + if (num_cpus <= 0) num_cpus = 1; + int max_threads = std::max(64, num_cpus * 4); + ASSERT_TRUE(ThreadPoolBuilder("TestPool") .set_min_threads(2) - .set_max_threads(64) + .set_max_threads(max_threads) .build(&_pool) .ok()); ASSERT_TRUE(ThreadPoolBuilder("TestPool2") .set_min_threads(2) - .set_max_threads(64) + .set_max_threads(max_threads) .build(&_pool2) .ok()); } diff --git a/be/test/storage/compaction/ordered_data_compaction_test.cpp b/be/test/storage/compaction/ordered_data_compaction_test.cpp index fa050f6a68b40e..333cad0cdfb308 100644 --- a/be/test/storage/compaction/ordered_data_compaction_test.cpp +++ b/be/test/storage/compaction/ordered_data_compaction_test.cpp @@ -304,7 +304,7 @@ class OrderedDataCompactionTest : public ::testing::Test { uint32_t num_rows = 0; for (int i = 0; i < rowset_data.size(); ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rowset_data[i].size(); ++rid) { int32_t c1 = std::get<0>(rowset_data[i][rid]); int32_t c2 = std::get<1>(rowset_data[i][rid]); @@ -317,6 +317,7 @@ class OrderedDataCompactionTest : public ::testing::Test { } num_rows++; } + block.set_columns(std::move(columns)); auto s = rowset_writer->add_block(&block); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); @@ -573,7 +574,7 @@ TEST_F(OrderedDataCompactionTest, test_index_disk_size) { uint32_t num_rows = 0; for (int j = 0; j < input_data[i].size(); ++j) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < input_data[i][j].size(); ++rid) { int32_t c1 = std::get<0>(input_data[i][j][rid]); int32_t c2 = std::get<1>(input_data[i][j][rid]); @@ -586,6 +587,7 @@ TEST_F(OrderedDataCompactionTest, test_index_disk_size) { } num_rows++; } + block.set_columns(std::move(columns)); auto s = rowset_writer->add_block(&block); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); diff --git a/be/test/storage/compaction/segcompaction_mow_test.cpp b/be/test/storage/compaction/segcompaction_mow_test.cpp index 760a5d953aa693..28a9fed8021eb2 100644 --- a/be/test/storage/compaction/segcompaction_mow_test.cpp +++ b/be/test/storage/compaction/segcompaction_mow_test.cpp @@ -103,6 +103,14 @@ class SegCompactionMoWTest : public ::testing::TestWithParam { protected: OlapReaderStatistics _stats; + Status add_block_with_columns(RowsetWriter* rowset_writer, Block* block, + MutableColumns* columns) { + block->set_columns(std::move(*columns)); + auto st = rowset_writer->add_block(block); + *columns = std::move(*block).mutate_columns(); + return st; + } + bool check_dir(std::vector& vec) { std::vector result; for (const auto& entry : std::filesystem::directory_iterator(lTestDir)) { @@ -335,7 +343,7 @@ TEST_P(SegCompactionMoWTest, SegCompactionThenRead) { // k3 := rid for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -358,7 +366,7 @@ TEST_P(SegCompactionMoWTest, SegCompactionThenRead) { } } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -441,7 +449,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { int segid = 0; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -458,7 +466,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -469,7 +477,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -486,7 +494,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -497,7 +505,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_per_segment = 4096; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -514,7 +522,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -525,7 +533,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -542,7 +550,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -554,7 +562,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { std::map unique_keys; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { // generate some duplicate rows, segment compaction will merge them int rand_i = rand() % (num_segments - 3); @@ -573,7 +581,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { } unique_keys.emplace(k1, rid); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -593,7 +601,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -610,7 +618,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -671,7 +679,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { int segid = 0; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -688,7 +696,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -699,7 +707,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_per_segment = 4096; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -716,7 +724,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -727,7 +735,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -744,7 +752,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -755,7 +763,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_per_segment = 4096; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -772,7 +780,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -783,7 +791,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + segid; uint32_t k2 = segid; @@ -800,7 +808,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionInterleaveWithBig_OoOoO) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -856,7 +864,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionNotTrigger) { // k3 := rid for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -873,7 +881,7 @@ TEST_F(SegCompactionMoWTest, SegCompactionNotTrigger) { rows_mark_deleted++; } } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); diff --git a/be/test/storage/compaction/segcompaction_test.cpp b/be/test/storage/compaction/segcompaction_test.cpp index 6c43fea684cb43..d3b843c050da2e 100644 --- a/be/test/storage/compaction/segcompaction_test.cpp +++ b/be/test/storage/compaction/segcompaction_test.cpp @@ -124,6 +124,14 @@ class SegCompactionTest : public testing::Test { protected: OlapReaderStatistics _stats; + Status add_block_with_columns(RowsetWriter* rowset_writer, Block* block, + MutableColumns* columns) { + block->set_columns(std::move(*columns)); + auto st = rowset_writer->add_block(block); + *columns = std::move(*block).mutate_columns(); + return st; + } + bool check_dir(std::vector& vec) { std::vector result; for (const auto& entry : std::filesystem::directory_iterator(lTestDir)) { @@ -307,7 +315,7 @@ TEST_F(SegCompactionTest, SegCompactionThenRead) { // k3 := rid for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -316,7 +324,7 @@ TEST_F(SegCompactionTest, SegCompactionThenRead) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -428,7 +436,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { uint32_t rows_per_segment = 4096; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -437,7 +445,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -446,7 +454,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -455,7 +463,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -464,7 +472,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_per_segment = 4096; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -473,7 +481,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -482,7 +490,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -491,7 +499,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -500,7 +508,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_per_segment = 4096; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -509,7 +517,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -519,7 +527,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -528,7 +536,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_ooooOOoOooooooooO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -582,7 +590,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { uint32_t rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -591,7 +599,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -600,7 +608,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { rows_per_segment = 4096; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -609,7 +617,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -618,7 +626,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -627,7 +635,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -636,7 +644,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { rows_per_segment = 4096; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -645,7 +653,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -654,7 +662,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { rows_per_segment = 6400; for (int i = 0; i < num_segments; ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 100 + i; uint32_t k2 = i; @@ -663,7 +671,7 @@ TEST_F(SegCompactionTest, SegCompactionInterleaveWithBig_OoOoO) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); } - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -710,7 +718,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { uint32_t k3 = 0; Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // segment#0 k1 = k2 = 1; k3 = 1; @@ -730,7 +738,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -754,7 +762,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -779,7 +787,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -804,7 +812,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -817,7 +825,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -830,7 +838,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadUniqueTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -976,7 +984,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { uint32_t k3 = 0; Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // segment#0 k1 = k2 = 1; @@ -997,7 +1005,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1021,7 +1029,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1046,7 +1054,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1071,7 +1079,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1084,7 +1092,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); @@ -1097,7 +1105,7 @@ TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); columns[2]->insert_data((const char*)&k3, sizeof(k3)); - s = rowset_writer->add_block(&block); + s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_EQ(Status::OK(), s); diff --git a/be/test/storage/compaction/variant_doc_mode_compaction_test.cpp b/be/test/storage/compaction/variant_doc_mode_compaction_test.cpp index 58ce82f38cbc4c..f234cf8e34cc89 100644 --- a/be/test/storage/compaction/variant_doc_mode_compaction_test.cpp +++ b/be/test/storage/compaction/variant_doc_mode_compaction_test.cpp @@ -251,7 +251,7 @@ class VariantDocModeCompactionTest : public ::testing::Test { auto rowset_writer = std::move(res).value(); Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); auto* variant_col = assert_cast(columns[1].get()); auto raw_json_column = ColumnString::create(); raw_json_column->reserve(kRowsPerSegment); diff --git a/be/test/storage/compaction/vertical_compaction_test.cpp b/be/test/storage/compaction/vertical_compaction_test.cpp index 58e0cec0762753..542c12fd91945a 100644 --- a/be/test/storage/compaction/vertical_compaction_test.cpp +++ b/be/test/storage/compaction/vertical_compaction_test.cpp @@ -106,6 +106,14 @@ class VerticalCompactionTest : public ::testing::Test { ExecEnv::GetInstance()->set_storage_engine(nullptr); } + Status add_block_with_columns(RowsetWriter* rowset_writer, Block* block, + MutableColumns* columns) { + block->set_columns(std::move(*columns)); + auto st = rowset_writer->add_block(block); + *columns = std::move(*block).mutate_columns(); + return st; + } + TabletSchemaSPtr create_schema(KeysType keys_type = DUP_KEYS, bool without_key = false) { TabletSchemaSPtr tablet_schema = std::make_shared(); TabletSchemaPB tablet_schema_pb; @@ -241,7 +249,7 @@ class VerticalCompactionTest : public ::testing::Test { uint32_t num_rows = 0; for (int i = 0; i < rowset_data.size(); ++i) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rowset_data[i].size(); ++rid) { int32_t c1 = std::get<0>(rowset_data[i][rid]); int32_t c2 = std::get<1>(rowset_data[i][rid]); @@ -254,7 +262,7 @@ class VerticalCompactionTest : public ::testing::Test { } num_rows++; } - auto s = rowset_writer->add_block(&block); + auto s = add_block_with_columns(rowset_writer.get(), &block, &columns); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); EXPECT_TRUE(s.ok()); @@ -1202,7 +1210,7 @@ TEST_F(VerticalCompactionTest, TestUniqueKeyVerticalMergeWithNullableSparseColum // Create block with nullable c2 column Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rows_per_segment; ++rid) { int32_t c1 = i * rows_per_segment + rid; @@ -1222,7 +1230,7 @@ TEST_F(VerticalCompactionTest, TestUniqueKeyVerticalMergeWithNullableSparseColum columns[2]->insert_data((const char*)&delete_sign, sizeof(delete_sign)); } - auto s = rowset_writer->add_block(&block); + auto s = add_block_with_columns(rowset_writer.get(), &block, &columns); ASSERT_TRUE(s.ok()) << s; s = rowset_writer->flush(); ASSERT_TRUE(s.ok()) << s; @@ -1381,13 +1389,13 @@ TEST_F(VerticalCompactionTest, TestFooterRawDataBytesAccuracy) { auto rowset_writer = std::move(res).value(); Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int i = 0; i < kNumRows; i++) { int32_t int_val = i; columns[0]->insert_data(reinterpret_cast(&int_val), sizeof(int_val)); columns[1]->insert_data(fixed_string.data(), fixed_string.size()); } - ASSERT_TRUE(rowset_writer->add_block(&block).ok()); + ASSERT_TRUE(add_block_with_columns(rowset_writer.get(), &block, &columns).ok()); ASSERT_TRUE(rowset_writer->flush().ok()); RowsetSharedPtr rowset; @@ -1477,7 +1485,7 @@ TEST_F(VerticalCompactionTest, TestFooterRawDataBytesNullableSparse) { auto rowset_writer = std::move(res).value(); Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int i = 0; i < kNumRows; i++) { int32_t key_val = i; columns[0]->insert_data(reinterpret_cast(&key_val), sizeof(key_val)); @@ -1488,7 +1496,7 @@ TEST_F(VerticalCompactionTest, TestFooterRawDataBytesNullableSparse) { columns[1]->insert_default(); // ColumnNullable default is null } } - ASSERT_TRUE(rowset_writer->add_block(&block).ok()); + ASSERT_TRUE(add_block_with_columns(rowset_writer.get(), &block, &columns).ok()); ASSERT_TRUE(rowset_writer->flush().ok()); RowsetSharedPtr rowset; diff --git a/be/test/storage/index/date_bloom_filter_test.cpp b/be/test/storage/index/date_bloom_filter_test.cpp index 636e7a6848d39b..263f2a44f0177d 100644 --- a/be/test/storage/index/date_bloom_filter_test.cpp +++ b/be/test/storage/index/date_bloom_filter_test.cpp @@ -131,7 +131,7 @@ TEST_F(DateBloomFilterTest, query_index_test) { const auto& rowset_writer = res.value(); Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); auto date = timestamp_from_date("2024-11-08"); auto datetime = timestamp_from_datetime("2024-11-08 09:00:00"); @@ -146,6 +146,7 @@ TEST_F(DateBloomFilterTest, query_index_test) { olap_datetime_value = datetime.to_olap_datetime(); columns[0]->insert_many_fix_len_data(reinterpret_cast(&olap_date_value), 1); columns[1]->insert_many_fix_len_data(reinterpret_cast(&olap_datetime_value), 1); + block.set_columns(std::move(columns)); Status st; st = rowset_writer->add_block(&block); @@ -224,7 +225,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { const auto& rowset_writer = res.value(); Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Insert test data auto date = timestamp_from_date("2024-11-08"); @@ -240,6 +241,7 @@ TEST_F(DateBloomFilterTest, in_list_predicate_test) { olap_datetime_value = datetime.to_olap_datetime(); columns[0]->insert_many_fix_len_data(reinterpret_cast(&olap_date_value), 1); columns[1]->insert_many_fix_len_data(reinterpret_cast(&olap_datetime_value), 1); + block.set_columns(std::move(columns)); EXPECT_TRUE(rowset_writer->add_block(&block).ok()); EXPECT_TRUE(rowset_writer->flush().ok()); diff --git a/be/test/storage/index/index_builder_test.cpp b/be/test/storage/index/index_builder_test.cpp index c281fd511477fb..dd36ba3ab33159 100644 --- a/be/test/storage/index/index_builder_test.cpp +++ b/be/test/storage/index/index_builder_test.cpp @@ -244,7 +244,7 @@ TEST_F(IndexBuilderTest, DropInvertedIndexTest) { // 5. Write data to the rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -257,6 +257,8 @@ TEST_F(IndexBuilderTest, DropInvertedIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -532,7 +534,7 @@ TEST_F(IndexBuilderTest, BuildInvertedIndexAfterWritingDataTest) { // 4. Write data to the rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns according to the schema for (int i = 0; i < num_rows; ++i) { @@ -545,6 +547,8 @@ TEST_F(IndexBuilderTest, BuildInvertedIndexAfterWritingDataTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -861,7 +865,7 @@ TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) { // 5. Write data to rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -874,6 +878,8 @@ TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1029,7 +1035,7 @@ TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTestV1) { // 8. Write data to rowset { Block block = v1_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -1042,6 +1048,8 @@ TEST_F(IndexBuilderTest, AddIndexWhenOneExistsTestV1) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1178,7 +1186,7 @@ TEST_F(IndexBuilderTest, MultiSegmentBuildIndexTest) { // 4. Write data to the rowset in multiple batches to ensure we get multiple segments for (int segment = 0; segment < num_segments; segment++) { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < rows_per_segment; ++i) { @@ -1191,6 +1199,8 @@ TEST_F(IndexBuilderTest, MultiSegmentBuildIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1328,7 +1338,7 @@ TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) { // 4. Write data to the rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -1341,6 +1351,8 @@ TEST_F(IndexBuilderTest, NonExistentColumnIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1502,7 +1514,7 @@ TEST_F(IndexBuilderTest, RenameColumnIndexTest) { // 5. Write data to the rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -1515,6 +1527,8 @@ TEST_F(IndexBuilderTest, RenameColumnIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1656,7 +1670,7 @@ TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) { // 5. Write data to the rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -1669,6 +1683,8 @@ TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1828,7 +1844,7 @@ TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTestV1) { // 9. Write data to rowset { Block block = v1_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -1841,6 +1857,8 @@ TEST_F(IndexBuilderTest, AddNonExistentColumnIndexWhenOneExistsTestV1) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -1978,7 +1996,7 @@ TEST_F(IndexBuilderTest, NonNullIndexDataTest) { // 4. Write non-null data to the rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns with no null values for (int i = 0; i < num_rows; ++i) { @@ -1991,6 +2009,8 @@ TEST_F(IndexBuilderTest, NonNullIndexDataTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2102,7 +2122,7 @@ TEST_F(IndexBuilderTest, NonExistentColumnUniqueIdTest) { // 4. Write data to the rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -2115,6 +2135,8 @@ TEST_F(IndexBuilderTest, NonExistentColumnUniqueIdTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2233,7 +2255,7 @@ TEST_F(IndexBuilderTest, DropIndexV1FormatTest) { // 9. Write data to the rowset { Block block = v1_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -2246,6 +2268,8 @@ TEST_F(IndexBuilderTest, DropIndexV1FormatTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2357,7 +2381,7 @@ TEST_F(IndexBuilderTest, ResourceCleanupTest) { // 4. Write data to the rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -2370,6 +2394,8 @@ TEST_F(IndexBuilderTest, ResourceCleanupTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2512,7 +2538,7 @@ TEST_F(IndexBuilderTest, ArrayTypeIndexTest) { // 7. Create data block and write data { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Prepare columns for k1 and array_col for (int i = 0; i < 1000; i++) { @@ -2535,6 +2561,8 @@ TEST_F(IndexBuilderTest, ArrayTypeIndexTest) { array_col.insert(Field::create_field(arr)); } + block.set_columns(std::move(columns)); + // Add block to rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2618,7 +2646,7 @@ TEST_F(IndexBuilderTest, UniqueKeysTableIndexTest) { { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < 1000; ++i) { @@ -2631,6 +2659,8 @@ TEST_F(IndexBuilderTest, UniqueKeysTableIndexTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2776,7 +2806,7 @@ TEST_F(IndexBuilderTest, HandleSingleRowsetErrorTest) { { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < 1000; ++i) { @@ -2789,6 +2819,8 @@ TEST_F(IndexBuilderTest, HandleSingleRowsetErrorTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -2896,7 +2928,7 @@ TEST_F(IndexBuilderTest, UpdateInvertedIndexInfoErrorTest) { // Write data { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < 1000; ++i) { @@ -2909,6 +2941,8 @@ TEST_F(IndexBuilderTest, UpdateInvertedIndexInfoErrorTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); @@ -3011,7 +3045,7 @@ TEST_F(IndexBuilderTest, DropOneIndexNotAffectOtherIndexesOnSameColumnTest) { // 5. Write data to the rowset { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); // Add data for k1 and k2 columns for (int i = 0; i < num_rows; ++i) { @@ -3024,6 +3058,8 @@ TEST_F(IndexBuilderTest, DropOneIndexNotAffectOtherIndexesOnSameColumnTest) { columns[1]->insert_data((const char*)&k2, sizeof(k2)); } + block.set_columns(std::move(columns)); + // Add the block to the rowset Status s = rowset_writer->add_block(&block); ASSERT_TRUE(s.ok()) << s.to_string(); diff --git a/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp b/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp index 1512212d6f24d0..ea7a71510f9086 100644 --- a/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp +++ b/be/test/storage/index/inverted/common/inverted_index_gc_binlogs_test.cpp @@ -148,12 +148,13 @@ TEST_F(IndexGcBinglogsTest, gc_binlogs_test) { const auto& rowset_writer = res.value(); Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); Field key = Field::create_field(10); Field v1 = Field::create_field("v1"); columns[0]->insert(key); columns[1]->insert(v1); + block.set_columns(std::move(columns)); EXPECT_TRUE(rowset_writer->add_block(&block).ok()); EXPECT_TRUE(rowset_writer->flush().ok()); diff --git a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp index 2a59fb86acc5e8..6ca726770cfb5d 100644 --- a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp +++ b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp @@ -658,7 +658,7 @@ class IndexCompactionUtils { const auto& rowset_writer = res.value(); Block block = schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (const auto& row : data[i]) { if constexpr (std::is_same_v) { Field key = Field::create_field(int32_t(row.key)); @@ -697,6 +697,8 @@ class IndexCompactionUtils { } } + block.set_columns(std::move(columns)); + Status st = rowset_writer->add_block(&block); EXPECT_TRUE(st.ok()) << st.to_string(); st = rowset_writer->flush(); @@ -758,4 +760,4 @@ class IndexCompactionUtils { } }; -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/storage/iterator/block_reader_agg_flush_test.cpp b/be/test/storage/iterator/block_reader_agg_flush_test.cpp index a9c0a4a4818d82..77376285198fce 100644 --- a/be/test/storage/iterator/block_reader_agg_flush_test.cpp +++ b/be/test/storage/iterator/block_reader_agg_flush_test.cpp @@ -72,7 +72,8 @@ std::unique_ptr make_source_block(size_t n_rows, int64_t key_value) { // struct of the source block, pre-filled with `n_rows` default rows so that // non-variable-length agg columns can be written via replace_column_data. MutableColumns make_stored_columns(const Block& src_block, size_t n_rows) { - return src_block.create_same_struct_block(n_rows)->mutate_columns(); + auto block = src_block.create_same_struct_block(n_rows); + return std::move(*block).mutate_columns(); } MutableColumns make_target_columns() { diff --git a/be/test/storage/rowid_conversion_test.cpp b/be/test/storage/rowid_conversion_test.cpp index 3ec611ca430cf6..c8b3bad9336a04 100644 --- a/be/test/storage/rowid_conversion_test.cpp +++ b/be/test/storage/rowid_conversion_test.cpp @@ -192,7 +192,7 @@ class TestRowIdConversion : public testing::TestWithParamcreate_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (int rid = 0; rid < rowset_data[i].size(); ++rid) { int32_t c1 = std::get<0>(rowset_data[i][rid]); int32_t c2 = std::get<1>(rowset_data[i][rid]); @@ -205,6 +205,7 @@ class TestRowIdConversion : public testing::TestWithParamadd_block(&block); EXPECT_TRUE(s.ok()); s = rowset_writer->flush(); diff --git a/be/test/storage/segment/segment_cache_test.cpp b/be/test/storage/segment/segment_cache_test.cpp index 82bfe8242411e2..a48c50eaa0216d 100644 --- a/be/test/storage/segment/segment_cache_test.cpp +++ b/be/test/storage/segment/segment_cache_test.cpp @@ -183,7 +183,7 @@ static TDescriptorTable create_descriptor_tablet_with_sequence_col() { } static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { - auto columns = block->mutate_columns(); + auto columns = std::move(*block).mutate_columns(); int8_t c1 = k1; columns[0]->insert_data((const char*)&c1, sizeof(c1)); @@ -198,15 +198,16 @@ static void generate_data(Block* block, int8_t k1, int16_t k2, int32_t seq) { {"2020-07-16 19:39:43", 19}, c3, nullptr, p); } int64_t c3_int = c3.to_int64(); - columns[2]->insert_data((const char*)&c3_int, sizeof(c3)); + columns[2]->insert_data((const char*)&c3_int, sizeof(c3_int)); DateV2Value c4; c4.unchecked_set_time(2022, 6, 6, 0, 0, 0, 0); uint32_t c4_int = c4.to_date_int_val(); - columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); + columns[3]->insert_data((const char*)&c4_int, sizeof(c4_int)); int32_t c5 = seq; - columns[4]->insert_data((const char*)&c5, sizeof(c2)); + columns[4]->insert_data((const char*)&c5, sizeof(c5)); + block->set_columns(std::move(columns)); } class SegmentCacheTest : public ::testing::Test { diff --git a/be/test/storage/segment/segments_key_bounds_truncation_test.cpp b/be/test/storage/segment/segments_key_bounds_truncation_test.cpp index 0236ac98c9f7be..10ee96e1c8e5d1 100644 --- a/be/test/storage/segment/segments_key_bounds_truncation_test.cpp +++ b/be/test/storage/segment/segments_key_bounds_truncation_test.cpp @@ -182,12 +182,13 @@ class SegmentsKeyBoundsTruncationTest : public testing::Test { int const_value = 999; for (const auto& segment_rows : data) { Block block = tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); for (const auto& row : segment_rows) { columns[0]->insert_data(row.data(), row.size()); columns[1]->insert_data(reinterpret_cast(&const_value), sizeof(const_value)); } + block.set_columns(std::move(columns)); ret.emplace_back(std::move(block)); } return ret; diff --git a/be/test/storage/segment/variant_column_writer_reader_test.cpp b/be/test/storage/segment/variant_column_writer_reader_test.cpp index 3a644cc373f896..517469331c32e1 100644 --- a/be/test/storage/segment/variant_column_writer_reader_test.cpp +++ b/be/test/storage/segment/variant_column_writer_reader_test.cpp @@ -82,6 +82,38 @@ static void construct_tablet_index(TabletIndexPB* tablet_index, int64_t index_id tablet_index->add_col_unique_id(col_unique_id); } +static void fill_nullable_variant_block(Block* block, + std::unordered_map* inserted_jsonstr, + variant_util::PathToNoneNullValues* path_with_size) { + MutableColumnPtr column = IColumn::mutate(block->get_by_position(0).column); + auto* nullable_object = assert_cast(column.get()); + for (int idx = 0; idx < 10; idx++) { + nullable_object->insert_default(); // insert null + { + auto column_object = nullable_object->get_nested_column_ptr(); + auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, + inserted_jsonstr); + path_with_size->insert(res.begin(), res.end()); + } + for (int j = 0; j < 80; ++j) { + Field f = Field::create_field(UInt8(0)); + nullable_object->get_null_map_column().insert(f); + } + nullable_object->insert_many_defaults(17); + { + auto column_object = nullable_object->get_nested_column_ptr(); + auto res = VariantUtil::fill_object_column_with_test_data(column_object, 2, + inserted_jsonstr); + path_with_size->insert(res.begin(), res.end()); + } + for (int j = 0; j < 2; ++j) { + Field f = Field::create_field(UInt8(0)); + nullable_object->get_null_map_column().insert(f); + } + } + block->replace_by_position(0, std::move(column)); +} + // MockColumnReaderCache class for testing class MockColumnReaderCache : public segment_v2::ColumnReaderCache { public: @@ -232,7 +264,7 @@ class VariantColumnWriterReaderTest : public testing::Test { for (const auto& batch : batches) { Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); auto variant_col = ColumnVariant::create( _tablet_schema->column(0).variant_max_subcolumns_count(), false); auto json_col = ColumnString::create(); @@ -273,7 +305,7 @@ class VariantColumnWriterReaderTest : public testing::Test { } Block block = _tablet_schema->create_block(); - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); auto variant_col = ColumnVariant::create( _tablet_schema->column(0).variant_max_subcolumns_count(), false); auto json_col = ColumnString::create(); @@ -2627,28 +2659,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_nullable) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -2780,28 +2793,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_nullable_without_finalize) auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -2875,28 +2869,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_bm_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -2970,28 +2945,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_bf_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -3067,28 +3023,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_zm_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -3164,28 +3101,9 @@ TEST_F(VariantColumnWriterReaderTest, test_write_inverted_with_finalize) { auto olap_data_convertor = std::make_unique(); // here is nullable variant auto block = _tablet_schema->create_block(); - auto nullable_object = assert_cast( - (*std::move(block.get_by_position(0).column)).mutate().get()); std::unordered_map inserted_jsonstr; - auto column_object = nullable_object->get_nested_column_ptr(); variant_util::PathToNoneNullValues path_with_size; - for (int idx = 0; idx < 10; idx++) { - nullable_object->insert_default(); // insert null - auto res = VariantUtil::fill_object_column_with_test_data(column_object, 80, - &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 80; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - nullable_object->insert_many_defaults(17); - res = VariantUtil::fill_object_column_with_test_data(column_object, 2, &inserted_jsonstr); - path_with_size.insert(res.begin(), res.end()); - for (int j = 0; j < 2; ++j) { - Field f = Field::create_field(UInt8(0)); - nullable_object->get_null_map_column_ptr()->insert(f); - } - } + fill_nullable_variant_block(&block, &inserted_jsonstr, &path_with_size); // sort path_with_size with value olap_data_convertor->add_column_data_convertor(column); olap_data_convertor->set_source_content(&block, 0, 1000); @@ -3712,7 +3630,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false); MutableColumnPtr null_object = - ColumnNullable::create(new_column_object1->assume_mutable(), ColumnUInt8::create()); + ColumnNullable::create(std::move(new_column_object1), ColumnUInt8::create()); size_t n = 1000; st = nested_iter->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); @@ -3723,8 +3641,8 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) { { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object12 = ColumnVariant::create(3, false); - MutableColumnPtr null_object12 = ColumnNullable::create( - new_column_object12->assume_mutable(), ColumnUInt8::create()); + MutableColumnPtr null_object12 = + ColumnNullable::create(std::move(new_column_object12), ColumnUInt8::create()); st = nested_iter->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); st = nested_iter->next_batch(&n, null_object12, &has_null); @@ -3756,7 +3674,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter) { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object2 = ColumnVariant::create(3, false); MutableColumnPtr null_object2 = - ColumnNullable::create(new_column_object2->assume_mutable(), ColumnUInt8::create()); + ColumnNullable::create(std::move(new_column_object2), ColumnUInt8::create()); size_t nrows = 1000; st = nested_iter2->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); @@ -3866,7 +3784,7 @@ TEST_F(VariantColumnWriterReaderTest, test_nested_iter_nullable) { // fill with nullable ColumnVariant target MutableColumnPtr new_column_object1 = ColumnVariant::create(3, false); MutableColumnPtr null_object = - ColumnNullable::create(new_column_object1->assume_mutable(), ColumnUInt8::create()); + ColumnNullable::create(std::move(new_column_object1), ColumnUInt8::create()); size_t nrows = 1000; st = nested_iter->seek_to_ordinal(0); EXPECT_TRUE(st.ok()) << st.msg(); diff --git a/be/test/storage/segment/variant_util_test.cpp b/be/test/storage/segment/variant_util_test.cpp index 902bf9c843b115..9f09addabff29a 100644 --- a/be/test/storage/segment/variant_util_test.cpp +++ b/be/test/storage/segment/variant_util_test.cpp @@ -25,8 +25,11 @@ #include "common/config.h" #include "core/block/block.h" +#include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_variant.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_nullable.h" #include "core/data_type/data_type_variant.h" #include "core/field.h" #include "core/value/jsonb_value.h" @@ -473,6 +476,37 @@ TEST(VariantUtilTest, ParseVariantColumns_ScalarJsonStringToSubcolumns) { EXPECT_EQ(f.field.get(), 2); } +TEST(VariantUtilTest, ParseNullableScalarVariantDetachesNestedAlias) { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field(*variant, Field::create_field(123)); + ColumnPtr variant_ptr = std::move(variant); + + auto null_map = ColumnUInt8::create(); + null_map->insert_value(0); + ColumnPtr nullable_variant = ColumnNullable::create(variant_ptr, null_map->get_ptr()); + variant_ptr.reset(); + ColumnPtr nullable_alias = nullable_variant; + + Block block; + block.insert( + {nullable_variant, make_nullable(std::make_shared(0, false)), "v"}); + + ParseConfig parse_cfg; + parse_cfg.deprecated_enable_flatten_nested = false; + parse_cfg.parse_to = ParseConfig::ParseTo::OnlySubcolumns; + Status st = + parse_and_materialize_variant_columns(block, std::vector {0}, {parse_cfg}); + EXPECT_TRUE(st.ok()) << st.to_string(); + + const auto& alias_nullable = assert_cast(*nullable_alias); + const auto& alias_variant = + assert_cast(alias_nullable.get_nested_column()); + EXPECT_TRUE(alias_variant.is_scalar_variant()); + EXPECT_EQ(alias_variant.get_root_type()->get_primitive_type(), PrimitiveType::TYPE_INT); + + EXPECT_TRUE(block.get_by_position(0).column->is_nullable()); +} + TEST(VariantUtilTest, ParseVariantColumns_DocModeBinaryToSubcolumns) { const std::vector jsons = { R"({"a":1,"b":"x"})", // diff --git a/be/test/storage/tablet/tablet_cooldown_test.cpp b/be/test/storage/tablet/tablet_cooldown_test.cpp index b919aa887834e7..182274341acdb2 100644 --- a/be/test/storage/tablet/tablet_cooldown_test.cpp +++ b/be/test/storage/tablet/tablet_cooldown_test.cpp @@ -334,7 +334,7 @@ static void write_rowset(TabletSharedPtr* tablet, PUniqueId load_id, int64_t rep slot_desc->col_name())); } Status st; - auto columns = block.mutate_columns(); + auto columns = std::move(block).mutate_columns(); if (with_data) { int8_t c1 = 123; @@ -344,7 +344,7 @@ static void write_rowset(TabletSharedPtr* tablet, PUniqueId load_id, int64_t rep columns[1]->insert_data((const char*)&c2, sizeof(c2)); int32_t c3 = 1; - columns[2]->insert_data((const char*)&c3, sizeof(c2)); + columns[2]->insert_data((const char*)&c3, sizeof(c3)); VecDateTimeValue c4; { @@ -354,8 +354,9 @@ static void write_rowset(TabletSharedPtr* tablet, PUniqueId load_id, int64_t rep {"2020-07-16 19:39:43", 19}, c4, nullptr, p); } int64_t c4_int = c4.to_int64(); - columns[3]->insert_data((const char*)&c4_int, sizeof(c4)); + columns[3]->insert_data((const char*)&c4_int, sizeof(c4_int)); + block.set_columns(std::move(columns)); st = delta_writer->write(&block, {0}); ASSERT_EQ(Status::OK(), st); } diff --git a/be/test/util/bit_util_test.cpp b/be/test/util/bit_util_test.cpp index af106e3a26011c..6bebd7289182ec 100644 --- a/be/test/util/bit_util_test.cpp +++ b/be/test/util/bit_util_test.cpp @@ -63,14 +63,14 @@ TEST(BitUtil, BigEndianToHost) { void insert_true(ColumnNullable* column, size_t num = 1) { for (int i = 0; i < num; i++) { - assert_cast(column->get_nested_column_ptr().get())->insert_value(1); + assert_cast(column->get_nested_column()).insert_value(1); column->push_false_to_nullmap(1); } } void insert_false(ColumnNullable* column, size_t num = 1) { for (int i = 0; i < num; i++) { - assert_cast(column->get_nested_column_ptr().get())->insert_value(0); + assert_cast(column->get_nested_column()).insert_value(0); column->push_false_to_nullmap(1); } } @@ -102,16 +102,12 @@ TEST(BitUtil, CountZero) { insert_false(column.get(), 54); insert_true(column.get(), 1); insert_false(column.get(), 14); + const auto& nested_data = + assert_cast(column->get_nested_column()).get_data(); ASSERT_EQ( - brute_force_count_zero_num( - assert_cast(column->get_nested_column_ptr().get()) - ->get_data() - .data(), - column->get_null_map_data().data(), column->size()), - simd::count_zero_num((int8_t*)assert_cast( - column->get_nested_column_ptr().get()) - ->get_data() - .data(), + brute_force_count_zero_num(nested_data.data(), column->get_null_map_data().data(), + column->size()), + simd::count_zero_num((int8_t*)nested_data.data(), column->get_null_map_data().data(), (uint32_t)column->size())); } @@ -131,16 +127,12 @@ TEST(BitUtil, CountZero) { } } } + const auto& nested_data = + assert_cast(column->get_nested_column()).get_data(); ASSERT_EQ( - brute_force_count_zero_num( - assert_cast(column->get_nested_column_ptr().get()) - ->get_data() - .data(), - column->get_null_map_data().data(), column->size()), - simd::count_zero_num((int8_t*)assert_cast( - column->get_nested_column_ptr().get()) - ->get_data() - .data(), + brute_force_count_zero_num(nested_data.data(), column->get_null_map_data().data(), + column->size()), + simd::count_zero_num((int8_t*)nested_data.data(), column->get_null_map_data().data(), (uint32_t)column->size())); } }