diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index b25ea8a37c3e3b..6aacc4ab4d0230 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -569,7 +569,7 @@ Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, vectorized::Block* if (local_state._build_side_mutable_block.empty()) { auto tmp_build_block = vectorized::VectorizedUtils::create_empty_columnswithtypename( _child->row_desc()); - tmp_build_block = *(tmp_build_block.create_same_struct_block(1, false)); + tmp_build_block = *(tmp_build_block.create_same_struct_block_with_type(1)); local_state._build_col_ids.resize(_build_expr_ctxs.size()); RETURN_IF_ERROR(local_state._do_evaluate(tmp_build_block, local_state._build_expr_ctxs, *local_state._build_expr_call_timer, diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index ec4b8585db3ec1..26d764c04ef2ea 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -61,9 +61,11 @@ namespace doris::vectorized { class Arena; class ColumnSorter; +class IDataType; using EqualFlags = std::vector; using EqualRange = std::pair; +using DataTypePtr = std::shared_ptr; /// Declares interface to store columns in memory. class IColumn : public COW { @@ -308,6 +310,15 @@ class IColumn : public COW { } } + /// for ColumnVector with type date/datetime, the default value depend on data type. + virtual void insert_default_with_type(DataTypePtr type) { insert_default(); } + + void insert_many_defaults_with_type(size_t length, DataTypePtr type) { + for (size_t i = 0; i < length; ++i) { + insert_default_with_type(type); + } + } + /** Removes last n elements. * Is used to support exception-safety of several operations. * For example, sometimes insertion should be reverted if we catch an exception during operation processing. diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index ca2b75d171b838..6e75e608866275 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -259,6 +259,13 @@ class ColumnNullable final : public COWHelper, public N _need_update_has_null = false; } + void insert_default_with_type(DataTypePtr type) override { + get_nested_column().insert_default_with_type(type); + get_null_map_data().push_back(1); + _has_null = true; + _need_update_has_null = false; + } + void insert_many_defaults(size_t length) override { get_nested_column().insert_many_defaults(length); get_null_map_data().resize_fill(get_null_map_data().size() + length, 1); diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index 40cbefa85edf4d..375e7f2df43d1c 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -160,6 +160,12 @@ void ColumnStruct::insert_default() { } } +void ColumnStruct::insert_default_with_type(DataTypePtr type) { + for (auto& column : columns) { + column->insert_default_with_type(type); + } +} + void ColumnStruct::pop_back(size_t n) { for (auto& column : columns) { column->pop_back(n); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 8a86330b1db966..8ac68751afba6b 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -115,6 +115,7 @@ class ColumnStruct final : public COWHelper { void insert(const Field& x) override; void insert_from(const IColumn& src_, size_t n) override; void insert_default() override; + void insert_default_with_type(DataTypePtr type) override; void pop_back(size_t n) override; StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override; const char* deserialize_and_insert_from_arena(const char* pos) override; diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 3d3aa89243f866..d91d8ae2d8235a 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -27,6 +27,7 @@ #include #include +#include "util/binary_cast.hpp" #include "util/hash_util.hpp" #include "util/simd/bits.h" #include "vec/columns/column_impl.h" @@ -40,6 +41,7 @@ #include "vec/core/sort_block.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" +#include "vec/runtime/vdatetime_value.h" namespace doris::vectorized { @@ -505,6 +507,19 @@ void ColumnVector::replace_column_null_data(const uint8_t* __restrict null_ma } } +template +void ColumnVector::insert_default_with_type(DataTypePtr type) { + if (WhichDataType(type).is_date_or_datetime()) { + data.push_back(binary_cast(VecDateTimeValue::DEFAULT_VALUE)); + } else if (WhichDataType(type).is_date_v2()) { + data.push_back(DateV2Value::DEFAULT_VALUE.to_date_int_val()); + } else if (WhichDataType(type).is_date_time_v2()) { + data.push_back(DateV2Value::DEFAULT_VALUE.to_date_int_val()); + } else { + insert_default(); + } +} + /// Explicit template instantiations - to avoid code bloat in headers. template class ColumnVector; template class ColumnVector; diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index e3bf98118b1bc4..aef8b01f0f0aa1 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -53,14 +53,9 @@ class SipHash; -namespace doris { -namespace vectorized { +namespace doris::vectorized { class Arena; class ColumnSorter; -} // namespace vectorized -} // namespace doris - -namespace doris::vectorized { /** Stuff for comparing numbers. * Integer values are compared as usual. @@ -241,6 +236,8 @@ class ColumnVector final : public COWHelper> { void insert_default() override { data.push_back(T()); } + void insert_default_with_type(DataTypePtr type) override; + void insert_many_defaults(size_t length) override { size_t old_size = data.size(); data.resize(old_size + length); diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 5b4d858673f406..e3e81ef64db623 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -1193,6 +1193,16 @@ std::unique_ptr Block::create_same_struct_block(size_t size, bool is_rese return temp_block; } +std::unique_ptr Block::create_same_struct_block_with_type(size_t size) const { + auto temp_block = Block::create_unique(); + for (const auto& d : data) { + auto column = d.type->create_column(); + column->insert_many_defaults_with_type(size, d.type); + temp_block->insert({std::move(column), d.type, d.name}); + } + return temp_block; +} + void Block::shrink_char_type_column_suffix_zero(const std::vector& char_type_idx) { for (auto idx : char_type_idx) { if (idx < data.size()) { diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index ab670593115621..45525df1d39e86 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -312,6 +312,8 @@ class Block { Status deserialize(const PBlock& pblock); std::unique_ptr create_same_struct_block(size_t size, bool is_reserve = false) const; + // only used for hashjoin build side to mock a row into block. + std::unique_ptr create_same_struct_block_with_type(size_t size) const; /** Compares (*this) n-th row and rhs m-th row. * Returns negative number, 0, or positive number (*this) n-th row is less, equal, greater than rhs m-th row respectively. diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h b/be/src/vec/functions/function_date_or_datetime_computation.h index 83b38f2e5f92d2..5da9d62b4d6639 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.h +++ b/be/src/vec/functions/function_date_or_datetime_computation.h @@ -374,12 +374,16 @@ struct DateTimeOp { } static void vector_vector(const PaddedPODArray& vec_from0, const PaddedPODArray& vec_from1, - PaddedPODArray& vec_to) { + PaddedPODArray& vec_to, const NullMap* nullmap0, + const NullMap* nullmap1) { size_t size = vec_from0.size(); vec_to.resize(size); bool invalid = true; for (size_t i = 0; i < size; ++i) { + if ((nullmap0 && (*nullmap0)[i]) || (nullmap1 && (*nullmap1)[i])) [[unlikely]] { + continue; + } // here reinterpret_cast is used to convert uint8& to bool&, // otherwise it will be implicitly converted to bool, causing the rvalue to fail to match the lvalue. // the same goes for the following. @@ -400,18 +404,23 @@ struct DateTimeOp { vec_to.resize(size); null_map.resize_fill(size, false); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < size; ++i) { vec_to[i] = Transform::execute(vec_from0[i], vec_from1[i], reinterpret_cast(null_map[i])); + } } static void vector_vector(const PaddedPODArray& vec_from0, const PaddedPODArray& vec_from1, - PaddedPODArray& vec_to) { + PaddedPODArray& vec_to, const NullMap* nullmap0, + const NullMap* nullmap1) { size_t size = vec_from0.size(); vec_to.resize(size); bool invalid = true; for (size_t i = 0; i < size; ++i) { + if ((nullmap0 && (*nullmap0)[i]) || (nullmap1 && (*nullmap1)[i])) [[unlikely]] { + continue; + } vec_to[i] = Transform::execute(vec_from0[i], vec_from1[i], invalid); if (UNLIKELY(invalid)) { @@ -434,12 +443,19 @@ struct DateTimeOp { } } static void vector_constant(const PaddedPODArray& vec_from, - PaddedPODArray& vec_to, Int128& delta) { + PaddedPODArray& vec_to, Int128& delta, + const NullMap* nullmap0, const NullMap* nullmap1) { + if (nullmap1 && (*nullmap1)[0]) [[unlikely]] { + return; + } size_t size = vec_from.size(); vec_to.resize(size); bool invalid = true; for (size_t i = 0; i < size; ++i) { + if (nullmap0 && (*nullmap0)[i]) [[unlikely]] { + continue; + } vec_to[i] = Transform::execute(vec_from[i], delta, invalid); if (UNLIKELY(invalid)) { @@ -462,12 +478,19 @@ struct DateTimeOp { } } static void vector_constant(const PaddedPODArray& vec_from, - PaddedPODArray& vec_to, Int64 delta) { + PaddedPODArray& vec_to, Int64 delta, + const NullMap* nullmap0, const NullMap* nullmap1) { + if (nullmap1 && (*nullmap1)[0]) [[unlikely]] { + return; + } size_t size = vec_from.size(); vec_to.resize(size); bool invalid = true; for (size_t i = 0; i < size; ++i) { + if (nullmap0 && (*nullmap0)[i]) [[unlikely]] { + continue; + } vec_to[i] = Transform::execute(vec_from[i], delta, invalid); if (UNLIKELY(invalid)) { @@ -490,12 +513,19 @@ struct DateTimeOp { } } static void constant_vector(const FromType1& from, PaddedPODArray& vec_to, - const IColumn& delta) { + const IColumn& delta, const NullMap* nullmap0, + const NullMap* nullmap1) { + if (nullmap0 && (*nullmap0)[0]) [[unlikely]] { + return; + } size_t size = delta.size(); vec_to.resize(size); bool invalid = true; for (size_t i = 0; i < size; ++i) { + if (nullmap1 && (*nullmap1)[i]) [[unlikely]] { + continue; + } vec_to[i] = Transform::execute(from, delta.get_int(i), invalid); if (UNLIKELY(invalid)) { @@ -517,12 +547,19 @@ struct DateTimeOp { } static void constant_vector(const FromType1& from, PaddedPODArray& vec_to, - const PaddedPODArray& delta) { + const PaddedPODArray& delta, const NullMap* nullmap0, + const NullMap* nullmap1) { + if (nullmap0 && (*nullmap0)[0]) [[unlikely]] { + return; + } size_t size = delta.size(); vec_to.resize(size); bool invalid = true; for (size_t i = 0; i < size; ++i) { + if (nullmap1 && (*nullmap1)[i]) [[unlikely]] { + continue; + } vec_to[i] = Transform::execute(from, delta[i], invalid); if (UNLIKELY(invalid)) { @@ -540,11 +577,19 @@ struct DateTimeAddIntervalImpl { using ToType = typename Transform::ReturnType::FieldType; using Op = DateTimeOp; - const ColumnPtr source_col = remove_nullable(block.get_by_position(arguments[0]).column); + //ATTN: those null maps may be nullmap of ColumnConst(only 1 row) + // src column is always datelike type. + ColumnPtr& col0 = block.get_by_position(arguments[0]).column; + const NullMap* nullmap0 = VectorizedUtils::get_null_map(col0); + // the second column may be delta column(xx_add/sub) or datelike column(xxx_diff) + ColumnPtr& col1 = block.get_by_position(arguments[1]).column; + const NullMap* nullmap1 = VectorizedUtils::get_null_map(col1); + + const ColumnPtr source_col = remove_nullable(col0); const auto is_nullable = block.get_by_position(result).type->is_nullable(); if (const auto* sources = check_and_get_column>(source_col.get())) { auto col_to = ColumnVector::create(); - auto delta_column_ptr = remove_nullable(block.get_by_position(arguments[1]).column); + auto delta_column_ptr = remove_nullable(col1); const IColumn& delta_column = *delta_column_ptr; if (is_nullable) { @@ -581,8 +626,7 @@ struct DateTimeAddIntervalImpl { col_to->get_data(), null_map->get_data()); } } - if (const auto* nullable_col = check_and_get_column( - block.get_by_position(arguments[0]).column.get())) { + if (const auto* nullable_col = check_and_get_column(col0.get())) { NullMap& result_null_map = assert_cast(*null_map).get_data(); const NullMap& src_null_map = assert_cast(nullable_col->get_null_map_column()) @@ -590,8 +634,7 @@ struct DateTimeAddIntervalImpl { VectorizedUtils::update_null_map(result_null_map, src_null_map); } - if (const auto* nullable_col = check_and_get_column( - block.get_by_position(arguments[1]).column.get())) { + if (const auto* nullable_col = check_and_get_column(col1.get())) { NullMap& result_null_map = assert_cast(*null_map).get_data(); const NullMap& src_null_map = assert_cast(nullable_col->get_null_map_column()) @@ -606,28 +649,32 @@ struct DateTimeAddIntervalImpl { typeid_cast(&delta_column)) { if (delta_const_column->get_field().get_type() == Field::Types::Int128) { Op::vector_constant(sources->get_data(), col_to->get_data(), - delta_const_column->get_field().get()); + delta_const_column->get_field().get(), nullmap0, + nullmap1); } else if (delta_const_column->get_field().get_type() == Field::Types::Int64) { Op::vector_constant(sources->get_data(), col_to->get_data(), - delta_const_column->get_field().get()); + delta_const_column->get_field().get(), nullmap0, + nullmap1); } else if (delta_const_column->get_field().get_type() == Field::Types::UInt64) { Op::vector_constant(sources->get_data(), col_to->get_data(), - delta_const_column->get_field().get()); + delta_const_column->get_field().get(), nullmap0, + nullmap1); } else { Op::vector_constant(sources->get_data(), col_to->get_data(), - delta_const_column->get_field().get()); + delta_const_column->get_field().get(), nullmap0, + nullmap1); } } else { if (const auto* delta_vec_column0 = check_and_get_column>(delta_column)) { Op::vector_vector(sources->get_data(), delta_vec_column0->get_data(), - col_to->get_data()); + col_to->get_data(), nullmap0, nullmap1); } else { const auto* delta_vec_column1 = check_and_get_column>(delta_column); DCHECK(delta_vec_column1 != nullptr); Op::vector_vector(sources->get_data(), delta_vec_column1->get_data(), - col_to->get_data()); + col_to->get_data(), nullmap0, nullmap1); } } block.replace_by_position(result, std::move(col_to)); @@ -637,8 +684,7 @@ struct DateTimeAddIntervalImpl { auto col_to = ColumnVector::create(); if (is_nullable) { auto null_map = ColumnUInt8::create(input_rows_count, 0); - auto not_nullable_column_ptr_arg1 = - remove_nullable(block.get_by_position(arguments[1]).column); + auto not_nullable_column_ptr_arg1 = remove_nullable(col1); if (const auto* delta_vec_column = check_and_get_column>( *not_nullable_column_ptr_arg1)) { Op::constant_vector(sources_const->template get_value(), @@ -649,8 +695,7 @@ struct DateTimeAddIntervalImpl { col_to->get_data(), null_map->get_data(), *not_nullable_column_ptr_arg1); } - if (const auto* nullable_col = check_and_get_column( - block.get_by_position(arguments[0]).column.get())) { + if (const auto* nullable_col = check_and_get_column(col0.get())) { NullMap& result_null_map = assert_cast(*null_map).get_data(); const NullMap& src_null_map = assert_cast(nullable_col->get_null_map_column()) @@ -658,8 +703,7 @@ struct DateTimeAddIntervalImpl { VectorizedUtils::update_null_map(result_null_map, src_null_map); } - if (const auto* nullable_col = check_and_get_column( - block.get_by_position(arguments[1]).column.get())) { + if (const auto* nullable_col = check_and_get_column(col1.get())) { NullMap& result_null_map = assert_cast(*null_map).get_data(); const NullMap& src_null_map = assert_cast(nullable_col->get_null_map_column()) @@ -670,21 +714,20 @@ struct DateTimeAddIntervalImpl { block.get_by_position(result).column = ColumnNullable::create(std::move(col_to), std::move(null_map)); } else { - if (const auto* delta_vec_column = check_and_get_column>( - *block.get_by_position(arguments[1]).column)) { + if (const auto* delta_vec_column = + check_and_get_column>(*col1)) { Op::constant_vector(sources_const->template get_value(), - col_to->get_data(), delta_vec_column->get_data()); + col_to->get_data(), delta_vec_column->get_data(), nullmap0, + nullmap1); } else { Op::constant_vector(sources_const->template get_value(), - col_to->get_data(), - *block.get_by_position(arguments[1]).column); + col_to->get_data(), *col1, nullmap0, nullmap1); } block.replace_by_position(result, std::move(col_to)); } } else { return Status::RuntimeError("Illegal column {} of first argument of function {}", - block.get_by_position(arguments[0]).column->get_name(), - Transform::name); + col0->get_name(), Transform::name); } return Status::OK(); } @@ -705,7 +748,9 @@ class FunctionDateOrDateTimeComputation : public IFunction { size_t get_number_of_arguments() const override { return 0; } DataTypes get_variadic_argument_types_impl() const override { - if constexpr (has_variadic_argument) return Transform::get_variadic_argument_types(); + if constexpr (has_variadic_argument) { + return Transform::get_variadic_argument_types(); + } return {}; } bool use_default_implementation_for_nulls() const override { return false; } diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 1c9bd9d126a511..6725a4bd9b34f7 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -264,6 +264,9 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes _month(0), // so this is a difference between Vectorization mode and Rowbatch mode with DateTimeValue; _year(0) {} // before int128 16 bytes ---> after int64 8 bytes + const static VecDateTimeValue FIRST_DAY; + const static VecDateTimeValue DEFAULT_VALUE; + // The data format of DATE/DATETIME is different in storage layer and execute layer. // So we should use different creator to get data from value. // We should use create_from_olap_xxx only at binary data scanned from storage engine and convert to typed data. @@ -764,6 +767,10 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes _year(year) {} }; +inline const VecDateTimeValue VecDateTimeValue::FIRST_DAY(false, TYPE_DATETIME, 0, 0, 0, 1, 1, 1); +inline const VecDateTimeValue VecDateTimeValue::DEFAULT_VALUE(false, TYPE_DATETIME, 0, 0, 0, 1970, + 1, 1); + template class DateV2Value { public: @@ -779,6 +786,9 @@ class DateV2Value { DateV2Value(const DateV2Value& other) = default; + const static DateV2Value FIRST_DAY; + const static DateV2Value DEFAULT_VALUE; + static DateV2Value create_from_olap_date(uint64_t value) { DateV2Value date; date.from_olap_date(value); @@ -1367,6 +1377,11 @@ class DateV2Value { : date_v2_value_(year, month, day, hour, minute, second, microsecond) {} }; +template +inline const DateV2Value DateV2Value::FIRST_DAY = DateV2Value(0001, 1, 1, 0, 0, 0, 0); +template +inline const DateV2Value DateV2Value::DEFAULT_VALUE = DateV2Value(1970, 1, 1, 0, 0, 0, 0); + // only support DATE - DATE (no support DATETIME - DATETIME) std::size_t operator-(const VecDateTimeValue& v1, const VecDateTimeValue& v2); diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 8d17b2787a53da..108e36b5eabed8 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -111,6 +111,21 @@ class VectorizedUtils { return columns_with_type_and_name; } + // Helper function to extract null map from column (including ColumnConst cases) + // won't expand for Const(Nullable) + static const NullMap* get_null_map(const ColumnPtr& col) { + if (col->is_nullable()) { + return &static_cast(*col).get_null_map_data(); + } + // Handle Const(Nullable) case + if (const auto* const_col = check_and_get_column(col.get()); + const_col != nullptr && const_col->get_data_column().is_nullable()) { + return &static_cast(const_col->get_data_column()) + .get_null_map_data(); + } + return nullptr; + }; + // is_single: whether src is null map of a ColumnConst static void update_null_map(NullMap& dst, const NullMap& src, bool is_single = false) { size_t size = dst.size(); diff --git a/regression-test/data/correctness_p0/test_join_date_default_value.out b/regression-test/data/correctness_p0/test_join_date_default_value.out new file mode 100644 index 00000000000000..6e549a19e114de --- /dev/null +++ b/regression-test/data/correctness_p0/test_join_date_default_value.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 2025-11-21T00:00 + diff --git a/regression-test/suites/correctness_p0/test_join_date_default_value.groovy b/regression-test/suites/correctness_p0/test_join_date_default_value.groovy new file mode 100644 index 00000000000000..b620ee93281930 --- /dev/null +++ b/regression-test/suites/correctness_p0/test_join_date_default_value.groovy @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_join_date_default_value") { + sql "drop table if exists testdt_l;" + sql "drop table if exists testdt_r;" + sql """ + create table testdt_l(f1 int, f2 datetimev2 not null) distributed by hash(f1) properties('replication_num' = '1'); + """ + sql """ insert into testdt_l values(1, '2025-11-21'); """ + sql """ + create table testdt_r(f1 int, f2 datetimev2 not null) distributed by hash(f1) properties('replication_num' = '1'); + """ + qt_sql """ + select * + from testdt_l + where not exists ( + select f1 + from testdt_r + where testdt_l.f2 = testdt_r.f2 + and ( + date_sub(testdt_l.f2, interval 8 hour) = testdt_r.f2 + or date_sub(testdt_l.f2, interval 16 hour) = testdt_r.f2 + ) + ); + """ +} \ No newline at end of file