diff --git a/be/src/cloud/delete_bitmap_file_reader.cpp b/be/src/cloud/delete_bitmap_file_reader.cpp index e693b1cb5b5622..20b5f19f31c84f 100644 --- a/be/src/cloud/delete_bitmap_file_reader.cpp +++ b/be/src/cloud/delete_bitmap_file_reader.cpp @@ -21,7 +21,6 @@ #include "common/status.h" #include "io/fs/file_reader.h" #include "util/coding.h" -#include "util/crc32c.h" namespace doris { #include "common/compile_check_begin.h" @@ -117,7 +116,7 @@ Status DeleteBitmapFileReader::read(DeleteBitmapPB& delete_bitmap) { offset, {checksum_len_buf, DeleteBitmapFileWriter::CHECKSUM_SIZE}, &bytes_read)); offset += DeleteBitmapFileWriter::CHECKSUM_SIZE; uint32_t checksum = decode_fixed32_le(checksum_len_buf); - uint32_t computed_checksum = crc32c::Value(delete_bitmap_buf.data(), delete_bitmap_len); + uint32_t computed_checksum = crc32c::Crc32c(delete_bitmap_buf.data(), delete_bitmap_len); if (computed_checksum != checksum) { return Status::InternalError("delete bitmap checksum failed from file=" + _path + ", computed checksum=" + std::to_string(computed_checksum) + diff --git a/be/src/cloud/delete_bitmap_file_writer.cpp b/be/src/cloud/delete_bitmap_file_writer.cpp index 5244d4dad7c77d..e1e5df23404a14 100644 --- a/be/src/cloud/delete_bitmap_file_writer.cpp +++ b/be/src/cloud/delete_bitmap_file_writer.cpp @@ -17,8 +17,9 @@ #include "cloud/delete_bitmap_file_writer.h" +#include + #include "io/fs/file_writer.h" -#include "util/crc32c.h" namespace doris { #include "common/compile_check_begin.h" @@ -86,7 +87,7 @@ Status DeleteBitmapFileWriter::write(const DeleteBitmapPB& delete_bitmap) { // 3. write checksum uint8_t checksum_buf[CHECKSUM_SIZE]; - uint32_t checksum = crc32c::Value(content.data(), delete_bitmap_len); + uint32_t checksum = crc32c::Crc32c(content.data(), delete_bitmap_len); encode_fixed32_le(checksum_buf, checksum); RETURN_IF_ERROR(_file_writer->append({checksum_buf, CHECKSUM_SIZE})); return Status::OK(); diff --git a/be/src/exec/lzo_decompressor.cpp b/be/src/exec/lzo_decompressor.cpp index a4decce3ac156d..42bff1a08b5811 100644 --- a/be/src/exec/lzo_decompressor.cpp +++ b/be/src/exec/lzo_decompressor.cpp @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. +#include + #include "common/cast_set.h" #include "common/logging.h" #include "exec/decompressor.h" #include "olap/utils.h" #include "orc/Exceptions.hh" -#include "util/crc32c.h" namespace orc { /** @@ -317,7 +318,7 @@ Status LzopDecompressor::parse_header_info(uint8_t* input, size_t input_len, uint32_t computed_checksum; if (_header_info.header_checksum_type == CHECK_CRC32) { computed_checksum = CRC32_INIT_VALUE; - computed_checksum = crc32c::Extend(computed_checksum, (const char*)header, cur - header); + computed_checksum = crc32c::Extend(computed_checksum, (const uint8_t*)header, cur - header); } else { computed_checksum = ADLER32_INIT_VALUE; computed_checksum = olap_adler32(computed_checksum, (const char*)header, cur - header); @@ -366,7 +367,7 @@ Status LzopDecompressor::checksum(LzoChecksum type, const std::string& source, u case CHECK_NONE: return Status::OK(); case CHECK_CRC32: - computed_checksum = crc32c::Extend(CRC32_INIT_VALUE, (const char*)ptr, len); + computed_checksum = crc32c::Extend(CRC32_INIT_VALUE, (const uint8_t*)ptr, len); break; case CHECK_ADLER: computed_checksum = olap_adler32(ADLER32_INIT_VALUE, (const char*)ptr, len); diff --git a/be/src/exprs/block_bloom_filter.hpp b/be/src/exprs/block_bloom_filter.hpp index 6736d1e18d4b5a..4470af90ad8bba 100644 --- a/be/src/exprs/block_bloom_filter.hpp +++ b/be/src/exprs/block_bloom_filter.hpp @@ -20,6 +20,8 @@ #pragma once +#include + #include "vec/common/string_ref.h" #ifdef __AVX2__ #include @@ -28,7 +30,6 @@ #endif #include "common/status.h" -#include "util/hash_util.hpp" #include "util/slice.h" namespace butil { @@ -76,7 +77,7 @@ class BlockBloomFilter { // Same as above with convenience of hashing the key. void insert(const StringRef& key) noexcept { if (key.data) { - insert(HashUtil::crc32c_hash(key.data, uint32_t(key.size), _hash_seed)); + insert(crc32c::Extend(_hash_seed, (const uint8_t*)key.data, uint32_t(key.size))); } } @@ -105,7 +106,7 @@ class BlockBloomFilter { // Same as above with convenience of hashing the key. bool find(const StringRef& key) const noexcept { if (key.data) { - return find(HashUtil::crc32c_hash(key.data, uint32_t(key.size), _hash_seed)); + return find(crc32c::Extend(_hash_seed, (const uint8_t*)key.data, uint32_t(key.size))); } return false; } diff --git a/be/src/io/cache/cache_lru_dumper.cpp b/be/src/io/cache/cache_lru_dumper.cpp index 64bbed44c1ce4c..1e4a3c2e0ce397 100644 --- a/be/src/io/cache/cache_lru_dumper.cpp +++ b/be/src/io/cache/cache_lru_dumper.cpp @@ -17,10 +17,11 @@ #include "io/cache/cache_lru_dumper.h" +#include + #include "io/cache/block_file_cache.h" #include "io/cache/lru_queue_recorder.h" #include "util/coding.h" -#include "util/crc32c.h" #include "vec/common/endian.h" namespace doris::io { @@ -186,7 +187,7 @@ Status CacheLRUDumper::flush_current_group(std::ofstream& out, std::string& file ::doris::io::cache::EntryGroupOffsetSizePb* group_info = _dump_meta.add_group_offset_size(); group_info->set_offset(group_start); group_info->set_size(serialized.size()); - uint32_t checksum = crc32c::Value(serialized.data(), serialized.size()); + uint32_t checksum = crc32c::Crc32c(serialized.data(), serialized.size()); group_info->set_checksum(checksum); // Reset for next group @@ -417,7 +418,7 @@ Status CacheLRUDumper::parse_one_lru_entry(std::ifstream& in, std::string& filen std::string group_serialized(group_info.size(), '\0'); in.read(&group_serialized[0], group_serialized.size()); RETURN_IF_ERROR(check_ifstream_status(in, filename)); - uint32_t checksum = crc32c::Value(group_serialized.data(), group_serialized.size()); + uint32_t checksum = crc32c::Crc32c(group_serialized.data(), group_serialized.size()); if (checksum != group_info.checksum()) { std::string warn_msg = fmt::format("restore lru failed as checksum not match, file={}", filename); diff --git a/be/src/io/cache/file_cache_lru_tool.cpp b/be/src/io/cache/file_cache_lru_tool.cpp index ce843c563b5f50..42a316b774039f 100644 --- a/be/src/io/cache/file_cache_lru_tool.cpp +++ b/be/src/io/cache/file_cache_lru_tool.cpp @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include @@ -27,7 +28,6 @@ #include "io/cache/file_cache_common.h" #include "io/cache/lru_queue_recorder.h" #include "util/coding.h" -#include "util/crc32c.h" using namespace doris; @@ -177,7 +177,7 @@ Status parse_one_lru_entry(std::ifstream& in, std::string& filename, io::UInt128 std::string group_serialized(group_info.size(), '\0'); in.read(&group_serialized[0], group_serialized.size()); RETURN_IF_ERROR(check_ifstream_status(in, filename)); - uint32_t checksum = crc32c::Value(group_serialized.data(), group_serialized.size()); + uint32_t checksum = crc32c::Crc32c(group_serialized.data(), group_serialized.size()); if (checksum != group_info.checksum()) { std::string warn_msg = fmt::format("restore lru failed as checksum not match, file={}", filename); diff --git a/be/src/io/fs/s3_file_bufferpool.cpp b/be/src/io/fs/s3_file_bufferpool.cpp index f1f90ea7f2ef06..0a23a1bc19065f 100644 --- a/be/src/io/fs/s3_file_bufferpool.cpp +++ b/be/src/io/fs/s3_file_bufferpool.cpp @@ -18,6 +18,7 @@ #include "s3_file_bufferpool.h" #include +#include #include #include @@ -100,7 +101,7 @@ Status UploadFileBuffer::append_data(const Slice& data) { data.get_size()); std::memcpy((void*)(_inner_data->data().get_data() + _size), data.get_data(), data.get_size()); _size += data.get_size(); - _crc_value = crc32c::Extend(_crc_value, data.get_data(), data.get_size()); + _crc_value = crc32c::Extend(_crc_value, (const uint8_t*)data.get_data(), data.get_size()); return Status::OK(); } @@ -146,7 +147,7 @@ std::string_view FileBuffer::get_string_view_data() const { void UploadFileBuffer::on_upload() { _stream_ptr = std::make_shared(_inner_data->data().get_data(), _size); - if (_crc_value != crc32c::Value(_inner_data->data().get_data(), _size)) { + if (_crc_value != crc32c::Crc32c(_inner_data->data().get_data(), _size)) { DCHECK(false); set_status(Status::IOError("Buffer checksum not match")); return; diff --git a/be/src/io/fs/s3_file_bufferpool.h b/be/src/io/fs/s3_file_bufferpool.h index 1b552850ae3af8..efe53dd622ab9b 100644 --- a/be/src/io/fs/s3_file_bufferpool.h +++ b/be/src/io/fs/s3_file_bufferpool.h @@ -17,6 +17,8 @@ #pragma once +#include + #include #include #include @@ -27,7 +29,6 @@ #include "common/status.h" #include "io/cache/file_block.h" -#include "util/crc32c.h" #include "util/slice.h" #include "util/threadpool.h" diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 2c858e557bb14b..774a27de19e805 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -18,6 +18,7 @@ #include "olap/base_tablet.h" #include +#include #include #include @@ -48,7 +49,6 @@ #include "olap/txn_manager.h" #include "service/point_query_executor.h" #include "util/bvar_helper.h" -#include "util/crc32c.h" #include "util/debug_points.h" #include "util/doris_metrics.h" #include "util/key_util.h" @@ -2034,7 +2034,7 @@ Status BaseTablet::calc_file_crc(uint32_t* crc_value, int64_t start_version, int return st; } // crc_value is calculated based on the crc_value of each rowset. - *crc_value = crc32c::Extend(*crc_value, reinterpret_cast(&rs_crc_value), + *crc_value = crc32c::Extend(*crc_value, reinterpret_cast(&rs_crc_value), sizeof(rs_crc_value)); *file_count += rs_file_count; } diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 3765ac24220314..e6b1beb7208fa3 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -17,6 +17,7 @@ #include "olap/rowset/beta_rowset.h" +#include #include #include #include @@ -46,7 +47,6 @@ #include "olap/segment_loader.h" #include "olap/tablet_schema.h" #include "olap/utils.h" -#include "util/crc32c.h" #include "util/debug_points.h" #include "util/doris_metrics.h" @@ -731,7 +731,7 @@ Status BetaRowset::calc_file_crc(uint32_t* crc_value, int64_t* file_count) { // 3. calculate the crc_value based on all_file_md5 DCHECK(file_paths.size() == all_file_md5.size()); for (auto& i : all_file_md5) { - *crc_value = crc32c::Extend(*crc_value, i.data(), i.size()); + *crc_value = crc32c::Extend(*crc_value, (const uint8_t*)i.data(), i.size()); } return Status::OK(); diff --git a/be/src/olap/rowset/segment_v2/page_io.cpp b/be/src/olap/rowset/segment_v2/page_io.cpp index 4654fe5c0fea13..9a9b557f3fcc57 100644 --- a/be/src/olap/rowset/segment_v2/page_io.cpp +++ b/be/src/olap/rowset/segment_v2/page_io.cpp @@ -17,6 +17,7 @@ #include "olap/rowset/segment_v2/page_io.h" +#include #include #include @@ -41,7 +42,6 @@ #include "olap/rowset/segment_v2/page_handle.h" #include "util/block_compression.h" #include "util/coding.h" -#include "util/crc32c.h" #include "util/faststring.h" #include "util/runtime_profile.h" @@ -103,7 +103,10 @@ Status PageIO::write_page(io::FileWriter* writer, const std::vector& body // checksum uint8_t checksum_buf[sizeof(uint32_t)]; - uint32_t checksum = crc32c::Value(page); + uint32_t checksum = 0; + for (const auto& slice : page) { + checksum = crc32c::Extend(checksum, (const uint8_t*)slice.data, slice.size); + } encode_fixed32_le(checksum_buf, checksum); page.emplace_back(checksum_buf, sizeof(uint32_t)); @@ -175,7 +178,7 @@ Status PageIO::read_and_decompress_page_(const PageReadOptions& opts, PageHandle if (opts.verify_checksum) { uint32_t expect = decode_fixed32_le((uint8_t*)page_slice.data + page_slice.size - 4); - uint32_t actual = crc32c::Value(page_slice.data, page_slice.size - 4); + uint32_t actual = crc32c::Crc32c(page_slice.data, page_slice.size - 4); // here const_cast is used for testing. InjectionContext ctx = {&actual, const_cast(&opts)}; (void)ctx; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index e5a9b8a9d7f1d1..1586629b35f1bf 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -17,6 +17,7 @@ #include "olap/rowset/segment_v2/segment.h" +#include #include #include #include @@ -66,7 +67,6 @@ #include "runtime/runtime_predicate.h" #include "runtime/runtime_state.h" #include "util/coding.h" -#include "util/crc32c.h" #include "util/slice.h" // Slice #include "vec/columns/column.h" #include "vec/common/schema_util.h" @@ -469,7 +469,7 @@ Status Segment::_parse_footer(std::shared_ptr& footer, // validate footer PB's checksum uint32_t expect_checksum = decode_fixed32_le(fixed_buf + 4); - uint32_t actual_checksum = crc32c::Value(footer_buf.data(), footer_buf.size()); + uint32_t actual_checksum = crc32c::Crc32c(footer_buf.data(), footer_buf.size()); if (actual_checksum != expect_checksum) { Status st = _write_error_file(file_size, file_size - 12 - footer_length, bytes_read, footer_buf.data(), io_ctx); diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 16bc7c290309e5..94422dd69997aa 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -24,6 +24,8 @@ #include // IWYU pragma: no_include +#include + #include "cloud/config.h" #include "common/cast_set.h" #include "common/compiler_util.h" // IWYU pragma: keep @@ -62,7 +64,6 @@ #include "runtime/memory/mem_tracker.h" #include "service/point_query_executor.h" #include "util/coding.h" -#include "util/crc32c.h" #include "util/faststring.h" #include "util/key_util.h" #include "util/simd/bits.h" @@ -1155,7 +1156,7 @@ Status SegmentWriter::_write_footer() { // footer's size put_fixed32_le(&fixed_buf, cast_set(footer_buf.size())); // footer's checksum - uint32_t checksum = crc32c::Value(footer_buf.data(), footer_buf.size()); + uint32_t checksum = crc32c::Crc32c(footer_buf.data(), footer_buf.size()); put_fixed32_le(&fixed_buf, checksum); // Append magic number. we don't write magic number in the header because // that will need an extra seek when reading diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index e98ee8032556cd..282655ea01067b 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -17,6 +17,7 @@ #include "olap/rowset/segment_v2/vertical_segment_writer.h" +#include #include #include #include @@ -63,7 +64,6 @@ #include "runtime/memory/mem_tracker.h" #include "service/point_query_executor.h" #include "util/coding.h" -#include "util/crc32c.h" #include "util/debug_points.h" #include "util/faststring.h" #include "util/key_util.h" @@ -1371,7 +1371,7 @@ Status VerticalSegmentWriter::_write_footer() { // footer's size put_fixed32_le(&fixed_buf, cast_set(footer_buf.size())); // footer's checksum - uint32_t checksum = crc32c::Value(footer_buf.data(), footer_buf.size()); + uint32_t checksum = crc32c::Crc32c(footer_buf.data(), footer_buf.size()); put_fixed32_le(&fixed_buf, checksum); // Append magic number. we don't write magic number in the header because // that will need an extra seek when reading diff --git a/be/src/olap/wal/wal_reader.cpp b/be/src/olap/wal/wal_reader.cpp index ded3adcd78f884..af30480851e9fe 100644 --- a/be/src/olap/wal/wal_reader.cpp +++ b/be/src/olap/wal/wal_reader.cpp @@ -17,6 +17,8 @@ #include "olap/wal/wal_reader.h" +#include + #include #include @@ -25,7 +27,6 @@ #include "io/fs/file_system.h" #include "io/fs/path.h" #include "util/coding.h" -#include "util/crc32c.h" #include "util/string_util.h" #include "wal_writer.h" @@ -145,7 +146,7 @@ Status WalReader::read_header(uint32_t& version, std::string& col_ids) { } Status WalReader::_check_checksum(const char* binary, size_t size, uint32_t checksum) { - uint32_t computed_checksum = crc32c::Value(binary, size); + uint32_t computed_checksum = crc32c::Crc32c(binary, size); if (LIKELY(computed_checksum == checksum)) { return Status::OK(); } diff --git a/be/src/olap/wal/wal_writer.cpp b/be/src/olap/wal/wal_writer.cpp index 385aef5245b64d..5a69e636d16db4 100644 --- a/be/src/olap/wal/wal_writer.cpp +++ b/be/src/olap/wal/wal_writer.cpp @@ -17,6 +17,7 @@ #include "olap/wal/wal_writer.h" +#include #include #include @@ -29,7 +30,6 @@ #include "io/fs/path.h" #include "olap/storage_engine.h" #include "olap/wal/wal_manager.h" -#include "util/crc32c.h" #include "util/thrift_rpc_helper.h" namespace doris { @@ -124,7 +124,7 @@ Status WalWriter::append_blocks(const PBlockArray& blocks) { offset += block_length; uint8_t checksum_buf[sizeof(uint32_t)]; - uint32_t checksum = crc32c::Value(content.data(), block_length); + uint32_t checksum = crc32c::Crc32c(content.data(), block_length); encode_fixed32_le(checksum_buf, checksum); RETURN_IF_ERROR(_file_writer->append({checksum_buf, sizeof(uint32_t)})); offset += CHECKSUM_SIZE; diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index 65e3f23fc6bcb7..01f37cf133bb02 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -705,7 +705,7 @@ struct SetSharedState : public BasicSharedState { enum class ExchangeType : uint8_t { NOOP = 0, - // Shuffle data by Crc32HashPartitioner. + // Shuffle data by Crc32CHashPartitioner HASH_SHUFFLE = 1, // Round-robin passthrough data blocks. PASSTHROUGH = 2, diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp b/be/src/pipeline/exec/exchange_sink_operator.cpp index 1df0f1ef70961e..ff1a4a38e90e5b 100644 --- a/be/src/pipeline/exec/exchange_sink_operator.cpp +++ b/be/src/pipeline/exec/exchange_sink_operator.cpp @@ -119,13 +119,18 @@ Status ExchangeSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& inf if (_part_type == TPartitionType::HASH_PARTITIONED) { _partition_count = channels.size(); - _partitioner = - std::make_unique>( - channels.size()); + if (_state->query_options().__isset.enable_new_shuffle_hash_method && + _state->query_options().enable_new_shuffle_hash_method) { + _partitioner = std::make_unique(channels.size()); + } else { + _partitioner = std::make_unique< + vectorized::Crc32HashPartitioner>( + channels.size()); + } RETURN_IF_ERROR(_partitioner->init(p._texprs)); RETURN_IF_ERROR(_partitioner->prepare(state, p._row_desc)); custom_profile()->add_info_string( - "Partitioner", fmt::format("Crc32HashPartitioner({})", _partition_count)); + "Partitioner", fmt::format("Crc32CHashPartitioner({})", _partition_count)); } else if (_part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) { _partition_count = channels.size(); _partitioner = diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h index b6328876008d95..34bd6be056c87e 100644 --- a/be/src/pipeline/exec/operator.h +++ b/be/src/pipeline/exec/operator.h @@ -614,7 +614,7 @@ class DataSinkOperatorXBase : public OperatorBase { virtual Status init(const TPlanNode& tnode, RuntimeState* state); Status init(const TDataSink& tsink) override; - [[nodiscard]] virtual Status init(ExchangeType type, const int num_buckets, + [[nodiscard]] virtual Status init(RuntimeState* state, ExchangeType type, const int num_buckets, const bool use_global_hash_shuffle, const std::map& shuffle_idx_to_instance_idx) { return Status::InternalError("init() is only implemented in local exchange!"); diff --git a/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp index 494e638be527ca..8b76900bfdb519 100644 --- a/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp +++ b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp @@ -17,6 +17,8 @@ #include "pipeline/local_exchange/local_exchange_sink_operator.h" +#include + #include "pipeline/local_exchange/local_exchanger.h" #include "vec/runtime/partitioner.h" #include "vec/sink/vdata_stream_sender.h" @@ -35,8 +37,8 @@ std::vector LocalExchangeSinkLocalState::dependencies() const { return deps; } -Status LocalExchangeSinkOperatorX::init(ExchangeType type, const int num_buckets, - const bool use_global_hash_shuffle, +Status LocalExchangeSinkOperatorX::init(RuntimeState* state, ExchangeType type, + const int num_buckets, const bool use_global_hash_shuffle, const std::map& shuffle_idx_to_instance_idx) { _name = "LOCAL_EXCHANGE_SINK_OPERATOR(" + get_exchange_type_name(type) + ")"; _type = type; @@ -54,13 +56,20 @@ Status LocalExchangeSinkOperatorX::init(ExchangeType type, const int num_buckets _shuffle_idx_to_instance_idx[i] = i; } } - _partitioner.reset(new vectorized::Crc32HashPartitioner( - _num_partitions)); + if (state->query_options().__isset.enable_new_shuffle_hash_method && + state->query_options().enable_new_shuffle_hash_method) { + _partitioner = std::make_unique(_num_partitions); + } else { + _partitioner = std::make_unique< + vectorized::Crc32HashPartitioner>( + _num_partitions); + } RETURN_IF_ERROR(_partitioner->init(_texprs)); } else if (_type == ExchangeType::BUCKET_HASH_SHUFFLE) { DCHECK_GT(num_buckets, 0); - _partitioner.reset( - new vectorized::Crc32HashPartitioner(num_buckets)); + _partitioner = + std::make_unique>( + num_buckets); RETURN_IF_ERROR(_partitioner->init(_texprs)); } return Status::OK(); diff --git a/be/src/pipeline/local_exchange/local_exchange_sink_operator.h b/be/src/pipeline/local_exchange/local_exchange_sink_operator.h index 7f4b46724cca59..c4723a9f5127b7 100644 --- a/be/src/pipeline/local_exchange/local_exchange_sink_operator.h +++ b/be/src/pipeline/local_exchange/local_exchange_sink_operator.h @@ -68,17 +68,6 @@ class LocalExchangeSinkLocalState final : public PipelineXSinkLocalState> SHIFT_BITS; - } -}; - class LocalExchangeSinkOperatorX final : public DataSinkOperatorX { public: using Base = DataSinkOperatorX; @@ -108,7 +97,8 @@ class LocalExchangeSinkOperatorX final : public DataSinkOperatorX& shuffle_idx_to_instance_idx) override; Status prepare(RuntimeState* state) override; diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index 7d145550a945f8..fe073a0305288c 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -757,8 +757,9 @@ Status PipelineFragmentContext::_add_local_exchange_impl( data_distribution.distribution_type = ExchangeType::HASH_SHUFFLE; } RETURN_IF_ERROR(new_pip->set_sink(sink)); - RETURN_IF_ERROR(new_pip->sink()->init(data_distribution.distribution_type, num_buckets, - use_global_hash_shuffle, shuffle_idx_to_instance_idx)); + RETURN_IF_ERROR(new_pip->sink()->init(_runtime_state.get(), data_distribution.distribution_type, + num_buckets, use_global_hash_shuffle, + shuffle_idx_to_instance_idx)); // 2. Create and initialize LocalExchangeSharedState. std::shared_ptr shared_state = diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 5ee174acddbeb8..c580db4c78668b 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -37,7 +38,6 @@ #include "olap/tablet_meta.h" #include "olap/tablet_meta_manager.h" #include "util/coding.h" -#include "util/crc32c.h" using doris::DataDir; using doris::StorageEngine; @@ -263,7 +263,7 @@ Status get_segment_footer(doris::io::FileReader* file_reader, SegmentFooterPB* f // validate footer PB's checksum uint32_t expect_checksum = doris::decode_fixed32_le(fixed_buf + 4); - uint32_t actual_checksum = doris::crc32c::Value(footer_buf.data(), footer_buf.size()); + uint32_t actual_checksum = crc32c::Crc32c(footer_buf.data(), footer_buf.size()); if (actual_checksum != expect_checksum) { return Status::Corruption( "Bad segment file {}: footer checksum not match, actual={} vs expect={}", file_name, diff --git a/be/src/util/crc32c.cpp b/be/src/util/crc32c.cpp deleted file mode 100644 index 3a2b91ddca9d73..00000000000000 --- a/be/src/util/crc32c.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "util/crc32c.h" - -#include - -namespace doris { -namespace crc32c { - -uint32_t Extend(uint32_t crc, const char* data, size_t n) { - return crc32c_extend(crc, (const uint8_t*)data, n); -} - -uint32_t Value(const char* data, size_t n) { - return crc32c_value((const uint8_t*)data, n); -} - -uint32_t Value(const std::vector& slices) { - uint32_t crc = 0; - for (const auto& slice : slices) { - crc = crc32c_extend(crc, (const uint8_t*)slice.get_data(), slice.get_size()); - } - return crc; -} - -} // namespace crc32c -} // namespace doris diff --git a/be/src/util/crc32c.h b/be/src/util/crc32c.h deleted file mode 100644 index 75ac6a59128f9d..00000000000000 --- a/be/src/util/crc32c.h +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "util/slice.h" - -namespace doris { -namespace crc32c { - -uint32_t Extend(uint32_t crc, const char* data, size_t n); - -// Return the crc32c of data[0,n-1] -uint32_t Value(const char* data, size_t n); - -// Return the crc32c of data content in all slices -uint32_t Value(const std::vector& slices); - -} // namespace crc32c -} // namespace doris diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp index 11892fa4321496..9c5d4ef3aca539 100644 --- a/be/src/util/hash_util.hpp +++ b/be/src/util/hash_util.hpp @@ -20,6 +20,7 @@ #pragma once +#include #include #include #include @@ -30,7 +31,6 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "util/cpu_info.h" -#include "util/crc32c.h" #include "util/hash/city.h" #include "util/murmur_hash3.h" #include "util/sse_util.hpp" @@ -50,10 +50,25 @@ class HashUtil { return (uint32_t)crc32(hash, (const unsigned char*)(&INT_VALUE), 4); } - // ATTN: crc32c's result is different with zlib_crc32 coz of different polynomial - // crc32c have better performance than zlib_crc32/crc_hash - static uint32_t crc32c_hash(const void* data, uint32_t bytes, uint32_t hash) { - return crc32c::Extend(hash, static_cast(data), bytes); + template + static uint32_t crc32c_fixed(const T& value, uint32_t hash) { + if constexpr (sizeof(T) == 1) { + return _mm_crc32_u8(hash, *reinterpret_cast(&value)); + } else if constexpr (sizeof(T) == 2) { + return _mm_crc32_u16(hash, *reinterpret_cast(&value)); + } else if constexpr (sizeof(T) == 4) { + return _mm_crc32_u32(hash, *reinterpret_cast(&value)); + } else if constexpr (sizeof(T) == 8) { + return (uint32_t)_mm_crc32_u64(hash, *reinterpret_cast(&value)); + } else { + return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T)); + } + } + + static uint32_t crc32c_null(uint32_t hash) { + // null is treat as 0 when hash + static const int INT_VALUE = 0; + return crc32c_fixed(INT_VALUE, hash); } // Compute the Crc32 hash for data using SSE4 instructions. The input hash parameter is @@ -65,7 +80,7 @@ class HashUtil { // NOTE: Any changes made to this function need to be reflected in Codegen::GetHashFn. // TODO: crc32 hashes with different seeds do not result in different hash functions. // The resulting hashes are correlated. - // ATTN: prefer do not use this function anymore, use crc32c_hash instead + // ATTN: prefer do not use this function anymore, use crc32c::Extend instead // This function is retained because it is not certain whether there are compatibility issues with historical data. static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) { if (!CpuInfo::is_supported(CpuInfo::SSE4_2)) { diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 985fb19276e4b1..23aedbae751802 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -408,6 +408,19 @@ class IColumn : public COW { "Method update_crc_with_value is not supported for " + get_name()); } + virtual void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "Method update_crc32c_batch is not supported for " + get_name()); + } + + // use range for one hash value to avoid virtual function call in loop + virtual void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "Method update_crc32c_single is not supported for " + get_name()); + } + /** Removes elements that don't match the filter. * Is used in WHERE and HAVING operations. * If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column; @@ -665,6 +678,9 @@ class IColumn : public COW { // usage: nested_column.replace_column_null_data(nested_null_map.data()) // only wrok on column_vector and column column decimal, there will be no behavior when other columns type call this method virtual void replace_column_null_data(const uint8_t* __restrict null_map) {} + // whether support replace null data, default return false + // column_vector and column_decimal override this method to return true + virtual bool support_replace_column_null_data() const { return false; } // For float/double types, replace -0.0 with 0.0, set NaN to quiet NaN, // used to ensure data hash equality for -0.0 and +0.0, e.g. aggregate and join diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index fbccd299aa760d..0ee4ef0a07af76 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -428,6 +428,50 @@ void ColumnArray::update_crcs_with_value(uint32_t* __restrict hash, PrimitiveTyp } } +void ColumnArray::update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const { + auto s = size(); + if (null_map) { + for (size_t i = 0; i < s; ++i) { + if (null_map[i] == 0) { + update_crc32c_single(i, i + 1, hashes[i], nullptr); + } + } + } else { + for (size_t i = 0; i < s; ++i) { + update_crc32c_single(i, i + 1, hashes[i], nullptr); + } + } +} + +void ColumnArray::update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const { + const auto& offsets_column = get_offsets(); + if (null_map) { + for (size_t i = start; i < end; ++i) { + if (null_map[i] == 0) { + size_t elem_size = offsets_column[i] - offsets_column[i - 1]; + if (elem_size == 0) { + hash = HashUtil::crc32c_null(hash); + } else { + get_data().update_crc32c_single(offsets_column[i - 1], offsets_column[i], hash, + nullptr); + } + } + } + } else { + for (size_t i = start; i < end; ++i) { + size_t elem_size = offsets_column[i] - offsets_column[i - 1]; + if (elem_size == 0) { + hash = HashUtil::crc32c_null(hash); + } else { + get_data().update_crc32c_single(offsets_column[i - 1], offsets_column[i], hash, + nullptr); + } + } + } +} + void ColumnArray::insert(const Field& x) { DCHECK_EQ(x.get_type(), PrimitiveType::TYPE_ARRAY); if (x.is_null()) { diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 44df03773f1a86..2bf2a36f9f4f8a 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -149,6 +149,11 @@ class ColumnArray final : public COWHelper { void update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type, uint32_t rows, uint32_t offset = 0, const uint8_t* __restrict null_data = nullptr) const override; + void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const override; + + void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const override; void insert_range_from(const IColumn& src, size_t start, size_t length) override; void insert_range_from_ignore_overflow(const IColumn& src, size_t start, diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index d78387fb385735..7843c0dfece66c 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -192,16 +192,6 @@ class ColumnComplexType final : public COWHelper> __builtin_unreachable(); } - // maybe we do not need to impl the function - void update_hash_with_value(size_t n, SipHash& hash) const override { - // TODO add hash function - } - - void update_hashes_with_value(uint64_t* __restrict hashes, - const uint8_t* __restrict null_data = nullptr) const override { - // TODO add hash function - } - StringRef get_raw_data() const override { return StringRef(reinterpret_cast(data.data()), data.size()); } diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index 72f9fcde421863..782514fcc6f185 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -20,6 +20,7 @@ #include "vec/columns/column_decimal.h" +#include #include #include @@ -208,6 +209,40 @@ void ColumnDecimal::update_crcs_with_value(uint32_t* __restrict hashes, Primi } } +template +void ColumnDecimal::update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const { + auto s = size(); + if (null_map) { + for (size_t i = 0; i < s; ++i) { + if (null_map[i] == 0) { + hashes[i] = HashUtil::crc32c_fixed(data[i], hashes[i]); + } + } + } else { + for (size_t i = 0; i < s; ++i) { + hashes[i] = HashUtil::crc32c_fixed(data[i], hashes[i]); + } + } +} + +template +void ColumnDecimal::update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const { + auto s = size(); + if (null_map) { + for (size_t i = 0; i < s; ++i) { + if (null_map[i] == 0) { + hash = HashUtil::crc32c_fixed(data[i], hash); + } + } + } else { + for (size_t i = 0; i < s; ++i) { + hash = HashUtil::crc32c_fixed(data[i], hash); + } + } +} + template void ColumnDecimal::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, const uint8_t* __restrict null_data) const { diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index aa11ba342d9910..46d5663b6aeae9 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -173,6 +173,12 @@ class ColumnDecimal final : public COWHelper> { uint32_t offset, const uint8_t* __restrict null_data) const override; + void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const override; + + void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const override; + void update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, const uint8_t* __restrict null_data) const override; void update_crc_with_value(size_t start, size_t end, uint32_t& hash, @@ -222,6 +228,8 @@ class ColumnDecimal final : public COWHelper> { void replace_column_null_data(const uint8_t* __restrict null_map) override; + bool support_replace_column_null_data() const override { return true; } + void sort_column(const ColumnSorter* sorter, EqualFlags& flags, IColumn::Permutation& perms, EqualRange& range, bool last_column) const override; diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 63dc41bfd51036..2659ec38a82f0e 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -345,7 +345,8 @@ class ColumnDictI32 final : public COWHelper { if (type == FieldType::OLAP_FIELD_TYPE_CHAR) { len = strnlen(sv.data, sv.size); } - uint32_t hash_val = HashUtil::crc32c_hash(sv.data, static_cast(len), 0); + uint32_t hash_val = + crc32c::Extend(0, (const uint8_t*)sv.data, static_cast(len)); _hash_values[code] = hash_val; _compute_hash_value_flags[code] = 1; return _hash_values[code]; diff --git a/be/src/vec/columns/column_dummy.h b/be/src/vec/columns/column_dummy.h index 8b7ce9e3d0a0c4..700ed09fbf4261 100644 --- a/be/src/vec/columns/column_dummy.h +++ b/be/src/vec/columns/column_dummy.h @@ -153,6 +153,8 @@ class IColumnDummy : public IColumn { __builtin_unreachable(); } + // dummy column do not need to hash, so these functions are empty + // do not throw exception void update_hash_with_value(size_t n, SipHash& hash) const override {} void update_hashes_with_value(uint64_t* __restrict hashes, @@ -168,6 +170,12 @@ class IColumnDummy : public IColumn { void update_crc_with_value(size_t start, size_t end, uint32_t& hash, const uint8_t* __restrict null_data) const override {} + void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const override {} + + void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const override {} + protected: size_t s; }; diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index 65aff62d5ed706..5e5f3a841d5f30 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -398,6 +398,50 @@ void ColumnMap::update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType } } +void ColumnMap::update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const { + auto s = size(); + if (null_map) { + for (size_t i = 0; i < s; ++i) { + if (null_map[i] == 0) { + update_crc32c_single(i, i + 1, hashes[i], nullptr); + } + } + } else { + for (size_t i = 0; i < s; ++i) { + update_crc32c_single(i, i + 1, hashes[i], nullptr); + } + } +} + +void ColumnMap::update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const { + const auto& offsets = get_offsets(); + if (null_map) { + for (size_t i = start; i < end; ++i) { + if (null_map[i] == 0) { + size_t kv_size = offsets[i] - offsets[i - 1]; + if (kv_size == 0) { + hash = HashUtil::crc32c_null(hash); + } else { + get_keys().update_crc32c_single(offsets[i - 1], offsets[i], hash, nullptr); + get_values().update_crc32c_single(offsets[i - 1], offsets[i], hash, nullptr); + } + } + } + } else { + for (size_t i = start; i < end; ++i) { + size_t kv_size = offsets[i] - offsets[i - 1]; + if (kv_size == 0) { + hash = HashUtil::crc32c_null(hash); + } else { + get_keys().update_crc32c_single(offsets[i - 1], offsets[i], hash, nullptr); + get_values().update_crc32c_single(offsets[i - 1], offsets[i], hash, nullptr); + } + } + } +} + void ColumnMap::insert_range_from(const IColumn& src, size_t start, size_t length) { if (length == 0) { return; diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 7ef91f9dd385da..925f43da1d3c10 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -166,6 +166,12 @@ class ColumnMap final : public COWHelper { uint32_t offset = 0, const uint8_t* __restrict null_data = nullptr) const override; + void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const override; + + void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const override; + /******************** keys and values ***************/ const ColumnPtr& get_keys_ptr() const { return keys_column; } ColumnPtr& get_keys_ptr() { return keys_column; } diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 35e5ca6e1cc39c..5c78fa456c7820 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -114,6 +114,41 @@ void ColumnNullable::update_crcs_with_value(uint32_t* __restrict hashes, doris:: } } +void ColumnNullable::update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const { + DCHECK(null_map == nullptr); + const auto* __restrict real_null_data = + assert_cast(get_null_map_column()).get_data().data(); + if (_nested_column->support_replace_column_null_data()) { + // nullmap process is slow, replace null data to default value to avoid nullmap process + _nested_column->assume_mutable()->replace_column_null_data(real_null_data); + _nested_column->update_crc32c_batch(hashes, nullptr); + } else { + auto s = size(); + for (int i = 0; i < s; ++i) { + if (real_null_data[i] != 0) { + hashes[i] = HashUtil::crc32c_null(hashes[i]); + } + } + _nested_column->update_crc32c_batch(hashes, real_null_data); + } +} + +void ColumnNullable::update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const { + DCHECK(null_map == nullptr); + const auto* __restrict real_null_data = + assert_cast(get_null_map_column()).get_data().data(); + constexpr int NULL_VALUE = 0; + auto s = size(); + for (int i = 0; i < s; ++i) { + if (real_null_data[i] != 0) { + hash = HashUtil::crc32c_fixed(NULL_VALUE, hash); + } + } + _nested_column->update_crc32c_single(start, end, hash, real_null_data); +} + void ColumnNullable::update_hashes_with_value(uint64_t* __restrict hashes, const uint8_t* __restrict null_data) const { DCHECK(null_data == nullptr); diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 1c027522015a6c..83ad71ad400eaa 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -229,6 +229,11 @@ class ColumnNullable final : public COWHelper { void update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type, uint32_t rows, uint32_t offset, const uint8_t* __restrict null_data) const override; + void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const override; + + void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const override; void update_hashes_with_value(uint64_t* __restrict hashes, const uint8_t* __restrict null_data) const override; diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index ad2a6e472eb7ca..57ed137cc3ff67 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -20,6 +20,8 @@ #include "vec/columns/column_string.h" +#include + #include #include #include @@ -318,6 +320,44 @@ void ColumnStr::update_crcs_with_value(uint32_t* __restrict hashes, doris::Pr } } +template +void ColumnStr::update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const { + auto s = size(); + if (null_map) { + for (size_t i = 0; i < s; i++) { + if (null_map[i] == 0) { + auto data_ref = get_data_at(i); + hashes[i] = + crc32c_extend(hashes[i], (const uint8_t*)(data_ref.data), data_ref.size); + } + } + } else { + for (size_t i = 0; i < s; i++) { + auto data_ref = get_data_at(i); + hashes[i] = crc32c_extend(hashes[i], (const uint8_t*)(data_ref.data), data_ref.size); + } + } +} + +template +void ColumnStr::update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const { + if (null_map) { + for (size_t i = start; i < end; i++) { + if (null_map[i] == 0) { + auto data_ref = get_data_at(i); + hash = crc32c_extend(hash, (const uint8_t*)(data_ref.data), data_ref.size); + } + } + } else { + for (size_t i = start; i < end; i++) { + auto data_ref = get_data_at(i); + hash = crc32c_extend(hash, (const uint8_t*)(data_ref.data), data_ref.size); + } + } +} + template ColumnPtr ColumnStr::filter(const IColumn::Filter& filt, ssize_t result_size_hint) const { if constexpr (std::is_same_v) { diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index d14d1b5fc17bae..03051c8cb903e8 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -427,6 +427,12 @@ class ColumnStr final : public COWHelper> { uint32_t offset, const uint8_t* __restrict null_data) const override; + void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const override; + + void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const override; + void update_hashes_with_value(uint64_t* __restrict hashes, const uint8_t* __restrict null_data) const override { auto s = size(); diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index 3621857dee9df4..415f08701a539b 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -257,6 +257,20 @@ void ColumnStruct::update_crcs_with_value(uint32_t* __restrict hash, PrimitiveTy } } +void ColumnStruct::update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const { + for (const auto& column : columns) { + column->update_crc32c_batch(hashes, nullptr); + } +} + +void ColumnStruct::update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const { + for (const auto& column : columns) { + column->update_crc32c_single(start, end, hash, nullptr); + } +} + void ColumnStruct::insert_indices_from(const IColumn& src, const uint32_t* indices_begin, const uint32_t* indices_end) { const auto& src_concrete = assert_cast(src); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 8bc4601ce90899..6e8ffdf69e8535 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -125,6 +125,12 @@ class ColumnStruct final : public COWHelper { uint32_t offset = 0, const uint8_t* __restrict null_data = nullptr) const override; + void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const override; + + void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const override; + void insert_indices_from(const IColumn& src, const uint32_t* indices_begin, const uint32_t* indices_end) override; diff --git a/be/src/vec/columns/column_variant.cpp b/be/src/vec/columns/column_variant.cpp index 0d25fbec6668d2..1eb789b1c8216c 100644 --- a/be/src/vec/columns/column_variant.cpp +++ b/be/src/vec/columns/column_variant.cpp @@ -2120,6 +2120,19 @@ void ColumnVariant::update_crc_with_value(size_t start, size_t end, uint32_t& ha }); } +void ColumnVariant::update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const { + for_each_imutable_column( + [&](const ColumnPtr column) { column->update_crc32c_batch(hashes, nullptr); }); +} + +void ColumnVariant::update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const { + for_each_imutable_column([&](const ColumnPtr column) { + column->update_crc32c_single(start, end, hash, nullptr); + }); +} + std::string ColumnVariant::debug_string() const { std::stringstream res; res << get_name() << "(num_row = " << num_rows; diff --git a/be/src/vec/columns/column_variant.h b/be/src/vec/columns/column_variant.h index 2bc8c3f00d8214..d32bc578906b8b 100644 --- a/be/src/vec/columns/column_variant.h +++ b/be/src/vec/columns/column_variant.h @@ -496,6 +496,12 @@ class ColumnVariant final : public COWHelper { void update_crc_with_value(size_t start, size_t end, uint32_t& hash, const uint8_t* __restrict null_data) const override; + void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const override; + + void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const override; + // Not implemented StringRef get_data_at(size_t) const override { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 8947a4cc025bd6..a5367ab8634545 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -20,6 +20,7 @@ #include "vec/columns/column_vector.h" +#include #include #include #include @@ -237,6 +238,52 @@ void ColumnVector::update_crcs_with_value(uint32_t* __restrict hashes, Primit } } +template +uint32_t ColumnVector::_crc32c_hash(uint32_t hash, size_t idx) const { + if constexpr (is_date_or_datetime(T)) { + char buf[64]; + const auto& date_val = (const VecDateTimeValue&)data[idx]; + auto len = date_val.to_buffer(buf); + return crc32c_extend(hash, (const uint8_t*)buf, len); + } else { + return HashUtil::crc32c_fixed(data[idx], hash); + } +} + +template +void ColumnVector::update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const { + auto s = size(); + if (null_map) { + for (size_t i = 0; i < s; ++i) { + if (null_map[i] == 0) { + hashes[i] = _crc32c_hash(hashes[i], i); + } + } + } else { + for (size_t i = 0; i < s; ++i) { + hashes[i] = _crc32c_hash(hashes[i], i); + } + } +} + +template +void ColumnVector::update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const { + auto s = size(); + if (null_map) { + for (size_t i = 0; i < s; ++i) { + if (null_map[i] == 0) { + hash = _crc32c_hash(hash, i); + } + } + } else { + for (size_t i = 0; i < s; ++i) { + hash = _crc32c_hash(hash, i); + } + } +} + template struct ColumnVector::less { const Self& parent; diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index bf50ebc0062f2e..6478eb6192c83a 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -234,12 +234,19 @@ class ColumnVector final : public COWHelper> { } } } + + void update_crc32c_single(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_map) const override; + void update_hash_with_value(size_t n, SipHash& hash) const override; void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType type, uint32_t rows, uint32_t offset, const uint8_t* __restrict null_data) const override; + void update_crc32c_batch(uint32_t* __restrict hashes, + const uint8_t* __restrict null_map) const override; + void update_hashes_with_value(uint64_t* __restrict hashes, const uint8_t* __restrict null_data) const override; @@ -327,6 +334,8 @@ class ColumnVector final : public COWHelper> { void replace_column_null_data(const uint8_t* __restrict null_map) override; + bool support_replace_column_null_data() const override { return true; } + void replace_float_special_values() override; void sort_column(const ColumnSorter* sorter, EqualFlags& flags, IColumn::Permutation& perms, @@ -351,6 +360,7 @@ class ColumnVector final : public COWHelper> { size_t serialize_size_at(size_t row) const override { return sizeof(value_type); } protected: + uint32_t _crc32c_hash(uint32_t hash, size_t idx) const; // when run function which need_replace_null_data_to_default, use the value far from 0 to avoid // raise errors for null cell. static value_type default_value() { diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 011fadbf9ce370..6a29bac2b658b1 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -4601,7 +4601,7 @@ class FunctionNgramSearch : public IFunction { uint32_t sub_str_hash(const char* data, int32_t length) const { constexpr static uint32_t seed = 0; - return HashUtil::crc32c_hash(data, length, seed); + return crc32c::Extend(seed, (const uint8_t*)data, length); } template diff --git a/be/src/vec/runtime/partitioner.cpp b/be/src/vec/runtime/partitioner.cpp index 367ff2bded2e6b..7e9410fd29f47b 100644 --- a/be/src/vec/runtime/partitioner.cpp +++ b/be/src/vec/runtime/partitioner.cpp @@ -18,6 +18,7 @@ #include "partitioner.h" #include "common/cast_set.h" +#include "common/status.h" #include "pipeline/local_exchange/local_exchange_sink_operator.h" #include "runtime/thread_context.h" #include "vec/columns/column_const.h" @@ -37,10 +38,9 @@ Status Crc32HashPartitioner::do_partitioning(RuntimeState* state, Bl int result_size = cast_set(_partition_expr_ctxs.size()); std::vector result(result_size); - _hash_vals.resize(rows); - std::fill(_hash_vals.begin(), _hash_vals.end(), 0); + _initialize_hash_vals(rows); auto* __restrict hashes = _hash_vals.data(); - { RETURN_IF_ERROR(_get_partition_column_result(block, result)); } + RETURN_IF_ERROR(_get_partition_column_result(block, result)); for (int j = 0; j < result_size; ++j) { const auto& [col, is_const] = unpack_if_const(block->get_by_position(result[j]).column); if (is_const) { @@ -53,7 +53,7 @@ Status Crc32HashPartitioner::do_partitioning(RuntimeState* state, Bl hashes[i] = ChannelIds()(hashes[i], _partition_count); } - { Block::erase_useless_column(block, column_to_keep); } + Block::erase_useless_column(block, column_to_keep); } return Status::OK(); } @@ -70,14 +70,20 @@ template Status Crc32HashPartitioner::clone(RuntimeState* state, std::unique_ptr& partitioner) { auto* new_partitioner = new Crc32HashPartitioner(cast_set(_partition_count)); + partitioner.reset(new_partitioner); + return _clone_expr_ctxs(state, new_partitioner->_partition_expr_ctxs); +} + +void Crc32CHashPartitioner::_do_hash(const ColumnPtr& column, uint32_t* __restrict result, + int idx) const { + column->update_crc32c_batch(result, nullptr); +} +Status Crc32CHashPartitioner::clone(RuntimeState* state, + std::unique_ptr& partitioner) { + auto* new_partitioner = new Crc32CHashPartitioner(cast_set(_partition_count)); partitioner.reset(new_partitioner); - new_partitioner->_partition_expr_ctxs.resize(_partition_expr_ctxs.size()); - for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) { - RETURN_IF_ERROR( - _partition_expr_ctxs[i]->clone(state, new_partitioner->_partition_expr_ctxs[i])); - } - return Status::OK(); + return _clone_expr_ctxs(state, new_partitioner->_partition_expr_ctxs); } template class Crc32HashPartitioner; diff --git a/be/src/vec/runtime/partitioner.h b/be/src/vec/runtime/partitioner.h index 29afab157b1972..031a97dc2bd9e2 100644 --- a/be/src/vec/runtime/partitioner.h +++ b/be/src/vec/runtime/partitioner.h @@ -17,7 +17,8 @@ #pragma once -#include "util/runtime_profile.h" +#include + #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" @@ -81,7 +82,9 @@ class Crc32HashPartitioner : public PartitionerBase { Status do_partitioning(RuntimeState* state, Block* block, bool eos, bool* already_sent) const override; - ChannelField get_channel_ids() const override { return {_hash_vals.data(), sizeof(uint32_t)}; } + ChannelField get_channel_ids() const override { + return {.channel_id = _hash_vals.data(), .len = sizeof(uint32_t)}; + } Status clone(RuntimeState* state, std::unique_ptr& partitioner) override; @@ -94,7 +97,19 @@ class Crc32HashPartitioner : public PartitionerBase { return Status::OK(); } - void _do_hash(const ColumnPtr& column, uint32_t* __restrict result, int idx) const; + Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const { + new_partition_expr_ctxs.resize(_partition_expr_ctxs.size()); + for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) { + RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i])); + } + return Status::OK(); + } + + virtual void _do_hash(const ColumnPtr& column, uint32_t* __restrict result, int idx) const; + virtual void _initialize_hash_vals(size_t rows) const { + _hash_vals.resize(rows); + std::ranges::fill(_hash_vals, 0); + } VExprContextSPtrs _partition_expr_ctxs; mutable std::vector _hash_vals; @@ -113,5 +128,44 @@ struct SpillPartitionChannelIds { return ((l >> 16) | (l << 16)) % r; } }; + +static inline uint32_t crc32c_shuffle_mix(uint32_t h) { + // Step 1: fold high entropy into low bits + h ^= h >> 16; + // Step 2: odd multiplicative scramble (cheap avalanche) + h *= 0xA5B35705U; + // Step 3: final fold to break remaining linearity + h ^= h >> 13; + return h; +} + +// use high 16 bits as channel id to avoid conflict with crc32c hash table +// shuffle hash function same with crc32c hash table(eg join hash table) will lead bad performance +// hash table offten use low 16 bits as bucket index, so we shift 16 bits to high bits to avoid conflict +struct ShiftChannelIds { + template + HashValueType operator()(HashValueType l, size_t r) { + return crc32c_shuffle_mix(l) % r; + } +}; + +class Crc32CHashPartitioner : public Crc32HashPartitioner { +public: + Crc32CHashPartitioner(int partition_count) + : Crc32HashPartitioner(partition_count) {} + + Status clone(RuntimeState* state, std::unique_ptr& partitioner) override; + +private: + void _do_hash(const ColumnPtr& column, uint32_t* __restrict result, int idx) const override; + + void _initialize_hash_vals(size_t rows) const override { + _hash_vals.resize(rows); + // use golden ratio to initialize hash values to avoid collision with hash table's hash function + constexpr uint32_t CRC32C_SHUFFLE_SEED = 0x9E3779B9U; + std::ranges::fill(_hash_vals, CRC32C_SHUFFLE_SEED); + } +}; + #include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/test/olap/rowset/segment_v2/column_meta_accessor_test.cpp b/be/test/olap/rowset/segment_v2/column_meta_accessor_test.cpp index b118968a589128..a1f599cb7070c7 100644 --- a/be/test/olap/rowset/segment_v2/column_meta_accessor_test.cpp +++ b/be/test/olap/rowset/segment_v2/column_meta_accessor_test.cpp @@ -17,6 +17,7 @@ #include "olap/rowset/segment_v2/column_meta_accessor.h" +#include #include #include @@ -27,7 +28,6 @@ #include "olap/rowset/segment_v2/segment.h" #include "olap/rowset/segment_v2/segment_writer.h" #include "util/coding.h" -#include "util/crc32c.h" using namespace doris; using namespace doris::segment_v2; @@ -52,7 +52,7 @@ Status append_footer_trailer(io::FileWriter* fw, SegmentFooterPB* footer) { } faststring fixed_buf; put_fixed32_le(&fixed_buf, static_cast(footer_buf.size())); - uint32_t checksum = crc32c::Value(footer_buf.data(), footer_buf.size()); + uint32_t checksum = crc32c::Crc32c(footer_buf.data(), footer_buf.size()); put_fixed32_le(&fixed_buf, checksum); fixed_buf.append("D0R1", 4); std::vector slices {Slice(footer_buf), Slice(fixed_buf)}; diff --git a/be/test/olap/rowset/segment_v2/external_col_meta_util_test.cpp b/be/test/olap/rowset/segment_v2/external_col_meta_util_test.cpp index 6b7a4c583c8861..b210a09ec67f7a 100644 --- a/be/test/olap/rowset/segment_v2/external_col_meta_util_test.cpp +++ b/be/test/olap/rowset/segment_v2/external_col_meta_util_test.cpp @@ -17,6 +17,8 @@ #include "olap/rowset/segment_v2/external_col_meta_util.h" +#include + #include #include #include @@ -31,7 +33,6 @@ #include "olap/tablet_schema_helper.h" #include "olap/types.h" #include "util/coding.h" -#include "util/crc32c.h" #include "vec/json/path_in_data.h" using namespace doris; @@ -63,7 +64,7 @@ Status append_footer_trailer(io::FileWriter* fw, SegmentFooterPB* footer) { // footer size (4 bytes) put_fixed32_le(&fixed_buf, static_cast(footer_buf.size())); // footer checksum (4 bytes) - uint32_t checksum = crc32c::Value(footer_buf.data(), footer_buf.size()); + uint32_t checksum = crc32c::Crc32c(footer_buf.data(), footer_buf.size()); put_fixed32_le(&fixed_buf, checksum); // magic number (4 bytes) fixed_buf.append("D0R1", 4); @@ -100,7 +101,7 @@ Status read_footer_from_file(const io::FileReaderSPtr& fr, SegmentFooterPB* foot footer_length); } const uint32_t expect_checksum = decode_fixed32_le(fixed_buf + 4); - const uint32_t actual_checksum = crc32c::Value(footer_buf.data(), footer_buf.size()); + const uint32_t actual_checksum = crc32c::Crc32c(footer_buf.data(), footer_buf.size()); if (actual_checksum != expect_checksum) { return Status::Corruption("footer checksum mismatch, actual={}, expect={}", actual_checksum, expect_checksum); diff --git a/be/test/util/crc32c_test.cpp b/be/test/util/crc32c_test.cpp index b20ac7e887e13c..5a6a7faa3a57ea 100644 --- a/be/test/util/crc32c_test.cpp +++ b/be/test/util/crc32c_test.cpp @@ -18,8 +18,7 @@ // the following code are modified from RocksDB: // https://github.com/facebook/rocksdb/blob/master/util/crc32c_test.cc -#include "util/crc32c.h" - +#include #include #include #include @@ -30,9 +29,6 @@ #include "util/slice.h" namespace doris { -namespace crc32c { - -class CRC {}; TEST(CRC, StandardResults) { // Original Fast_CRC32 tests. @@ -40,20 +36,20 @@ TEST(CRC, StandardResults) { char buf[32]; memset(buf, 0, sizeof(buf)); - EXPECT_EQ(0x8a9136aaU, Value(buf, sizeof(buf))); + EXPECT_EQ(0x8a9136aaU, crc32c::Crc32c(buf, sizeof(buf))); memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0x62a8ab43U, Value(buf, sizeof(buf))); + EXPECT_EQ(0x62a8ab43U, crc32c::Crc32c(buf, sizeof(buf))); for (int i = 0; i < 32; i++) { buf[i] = static_cast(i); } - EXPECT_EQ(0x46dd794eU, Value(buf, sizeof(buf))); + EXPECT_EQ(0x46dd794eU, crc32c::Crc32c(buf, sizeof(buf))); for (int i = 0; i < 32; i++) { buf[i] = static_cast(31 - i); } - EXPECT_EQ(0x113fdb5cU, Value(buf, sizeof(buf))); + EXPECT_EQ(0x113fdb5cU, crc32c::Crc32c(buf, sizeof(buf))); unsigned char data[48] = { 0x01, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -61,19 +57,21 @@ TEST(CRC, StandardResults) { 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x18, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; - EXPECT_EQ(0xd9963a56, Value(reinterpret_cast(data), sizeof(data))); + EXPECT_EQ(0xd9963a56, crc32c::Crc32c(reinterpret_cast(data), sizeof(data))); } TEST(CRC, Values) { - EXPECT_NE(Value("a", 1), Value("foo", 3)); + EXPECT_NE(crc32c::Crc32c(std::string("a")), crc32c::Crc32c(std::string("foo"))); } TEST(CRC, Extend) { - EXPECT_EQ(Value("hello world", 11), Extend(Value("hello ", 6), "world", 5)); - - std::vector slices = {Slice("hello "), Slice("world")}; - EXPECT_EQ(Value("hello world", 11), Value(slices)); + auto s1 = std::string("hello "); + auto s2 = std::string("world"); + EXPECT_EQ(crc32c::Crc32c(std::string("hello world")), + crc32c::Extend(crc32c::Crc32c(s1), (const uint8_t*)s2.data(), s2.size())); + std::vector slices = {s1, s2}; + EXPECT_EQ(crc32c::Crc32c(std::string("hello world")), + crc32c::Extend(crc32c::Crc32c(slices[0]), (const uint8_t*)s2.data(), s2.size())); } -} // namespace crc32c } // namespace doris diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 5e51b0ee374277..919e9c6a894761 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -306,6 +306,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_PARALLEL_SCAN = "enable_parallel_scan"; + public static final String ENABLE_NEW_SHUFFLE_HASH_METHOD = "enable_new_shuffle_hash_method"; + // Force the number of scanners to equal the number of segments in OLAP scan when parallel scan is enabled. public static final String OPTIMIZE_INDEX_SCAN_PARALLELISM = "optimize_index_scan_parallelism"; @@ -955,6 +957,9 @@ public static double getHotValueThreshold() { "Enable pushing common sub-expressions as virtual columns into OlapScan (experimental)"}) public boolean experimentalEnableVirtualSlotForCse = false; + @VariableMgr.VarAttr(name = ENABLE_NEW_SHUFFLE_HASH_METHOD) + public boolean enableNewShffleHashMethod = true; + @VariableMgr.VarAttr(name = JDBC_CLICKHOUSE_QUERY_FINAL, needForward = true, description = {"是否在查询 ClickHouse JDBC 外部表时,对查询 SQL 添加 FINAL 关键字。", "Whether to add the FINAL keyword to the query SQL when querying ClickHouse JDBC external tables."}) @@ -4816,6 +4821,7 @@ public TQueryOptions toThrift() { tResult.setCheckOverflowForDecimal(checkOverflowForDecimal); tResult.setFragmentTransmissionCompressionCodec(fragmentTransmissionCompressionCodec.trim().toLowerCase()); tResult.setEnableLocalExchange(enableLocalExchange); + tResult.setEnableNewShuffleHashMethod(enableNewShffleHashMethod); tResult.setSkipStorageEngineMerge(skipStorageEngineMerge); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 8aa5d2bd1a595f..ef36feaf0099ce 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -178,7 +178,7 @@ struct TQueryOptions { // For debug purpose, skip delete predicates when reading data 50: optional bool skip_delete_predicate = false - 51: optional bool enable_new_shuffle_hash_method // deprecated + 51: optional bool enable_new_shuffle_hash_method 52: optional i32 be_exec_version = 0