Skip to content

Commit 0902e17

Browse files
committed
Revert "[Chore](thirdparty) add crc32c-1.1.2 to thirdparty (#58462)"
This reverts commit 066b69e.
1 parent 727e126 commit 0902e17

24 files changed

+285
-199
lines changed

be/cmake/thirdparty.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ add_thirdparty(curl)
6868
add_thirdparty(lz4)
6969
add_thirdparty(thrift)
7070
add_thirdparty(thriftnb)
71-
add_thirdparty(crc32c)
7271

7372
add_thirdparty(libevent_core LIBNAME "lib/libevent_core.a")
7473
add_thirdparty(libevent_openssl LIBNAME "lib/libevent_openssl.a")

be/src/exprs/block_bloom_filter.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class BlockBloomFilter {
7676
// Same as above with convenience of hashing the key.
7777
void insert(const StringRef& key) noexcept {
7878
if (key.data) {
79-
insert(HashUtil::crc32c_hash(key.data, uint32_t(key.size), _hash_seed));
79+
insert(HashUtil::crc_hash(key.data, uint32_t(key.size), _hash_seed));
8080
}
8181
}
8282

@@ -105,7 +105,7 @@ class BlockBloomFilter {
105105
// Same as above with convenience of hashing the key.
106106
bool find(const StringRef& key) const noexcept {
107107
if (key.data) {
108-
return find(HashUtil::crc32c_hash(key.data, uint32_t(key.size), _hash_seed));
108+
return find(HashUtil::crc_hash(key.data, uint32_t(key.size), _hash_seed));
109109
}
110110
return false;
111111
}

be/src/pipeline/dependency.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,7 @@ struct SetSharedState : public BasicSharedState {
714714

715715
enum class ExchangeType : uint8_t {
716716
NOOP = 0,
717-
// Shuffle data by Crc32CHashPartitioner
717+
// Shuffle data by Crc32HashPartitioner<LocalExchangeChannelIds>.
718718
HASH_SHUFFLE = 1,
719719
// Round-robin passthrough data blocks.
720720
PASSTHROUGH = 2,

be/src/pipeline/exec/exchange_sink_operator.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,13 @@ Status ExchangeSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& inf
119119

120120
if (_part_type == TPartitionType::HASH_PARTITIONED) {
121121
_partition_count = channels.size();
122-
_partitioner = std::make_unique<vectorized::Crc32CHashPartitioner>(channels.size());
122+
_partitioner =
123+
std::make_unique<vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>>(
124+
channels.size());
123125
RETURN_IF_ERROR(_partitioner->init(p._texprs));
124126
RETURN_IF_ERROR(_partitioner->prepare(state, p._row_desc));
125127
custom_profile()->add_info_string(
126-
"Partitioner", fmt::format("Crc32CHashPartitioner({})", _partition_count));
128+
"Partitioner", fmt::format("Crc32HashPartitioner({})", _partition_count));
127129
} else if (_part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) {
128130
_partition_count = channels.size();
129131
_partitioner =

be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ Status LocalExchangeSinkOperatorX::init(ExchangeType type, const int num_buckets
5454
_shuffle_idx_to_instance_idx[i] = i;
5555
}
5656
}
57-
_partitioner.reset(new vectorized::Crc32CHashPartitioner(_num_partitions));
57+
_partitioner.reset(new vectorized::Crc32HashPartitioner<vectorized::ShuffleChannelIds>(
58+
_num_partitions));
5859
RETURN_IF_ERROR(_partitioner->init(_texprs));
5960
} else if (_type == ExchangeType::BUCKET_HASH_SHUFFLE) {
6061
DCHECK_GT(num_buckets, 0);

be/src/util/crc32c.cpp

Lines changed: 243 additions & 11 deletions
Large diffs are not rendered by default.

be/src/util/crc32c.h

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,39 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
// the following code are modified from RocksDB:
19+
// https://github.com/facebook/rocksdb/blob/master/util/crc32c.h
20+
1821
#pragma once
1922

23+
#include <stddef.h>
24+
#include <stdint.h>
25+
2026
#include <vector>
2127

2228
#include "util/slice.h"
2329

2430
namespace doris {
2531
namespace crc32c {
2632

27-
uint32_t Extend(uint32_t crc, const char* data, size_t n);
33+
// Return the crc32c of concat(A, data[0,n-1]) where init_crc is the
34+
// crc32c of some string A. Extend() is often used to maintain the
35+
// crc32c of a stream of data.
36+
extern uint32_t Extend(uint32_t init_crc, const char* data, size_t n);
2837

2938
// Return the crc32c of data[0,n-1]
30-
uint32_t Value(const char* data, size_t n);
39+
inline uint32_t Value(const char* data, size_t n) {
40+
return Extend(0, data, n);
41+
}
3142

3243
// Return the crc32c of data content in all slices
33-
uint32_t Value(const std::vector<Slice>& slices);
44+
inline uint32_t Value(const std::vector<Slice>& slices) {
45+
uint32_t crc = 0;
46+
for (auto& slice : slices) {
47+
crc = Extend(crc, slice.get_data(), slice.get_size());
48+
}
49+
return crc;
50+
}
3451

3552
} // namespace crc32c
3653
} // namespace doris

be/src/util/hash_util.hpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929

3030
#include "common/compiler_util.h" // IWYU pragma: keep
3131
#include "util/cpu_info.h"
32-
#include "util/crc32c.h"
3332
#include "util/hash/city.h"
3433
#include "util/murmur_hash3.h"
3534
#include "util/sse_util.hpp"
@@ -49,12 +48,7 @@ class HashUtil {
4948
return (uint32_t)crc32(hash, (const unsigned char*)(&INT_VALUE), 4);
5049
}
5150

52-
// ATTN: crc32c's result is different with zlib_crc32 coz of different polynomial
53-
// crc32c have better performance than zlib_crc32/crc_hash
54-
static uint32_t crc32c_hash(const void* data, uint32_t bytes, uint32_t hash) {
55-
return crc32c::Extend(hash, static_cast<const char*>(data), bytes);
56-
}
57-
51+
#if defined(__SSE4_2__) || defined(__aarch64__)
5852
// Compute the Crc32 hash for data using SSE4 instructions. The input hash parameter is
5953
// the current hash/seed value.
6054
// This should only be called if SSE is supported.
@@ -64,8 +58,6 @@ class HashUtil {
6458
// NOTE: Any changes made to this function need to be reflected in Codegen::GetHashFn.
6559
// TODO: crc32 hashes with different seeds do not result in different hash functions.
6660
// The resulting hashes are correlated.
67-
// ATTN: prefer do not use this function anymore, use crc32c_hash instead
68-
// This function is retained because it is not certain whether there are compatibility issues with historical data.
6961
static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
7062
if (!CpuInfo::is_supported(CpuInfo::SSE4_2)) {
7163
return zlib_crc_hash(data, bytes, hash);
@@ -124,6 +116,11 @@ class HashUtil {
124116

125117
return converter.u64;
126118
}
119+
#else
120+
static uint32_t crc_hash(const void* data, uint32_t bytes, uint32_t hash) {
121+
return zlib_crc_hash(data, bytes, hash);
122+
}
123+
#endif
127124

128125
// refer to https://github.com/apache/commons-codec/blob/master/src/main/java/org/apache/commons/codec/digest/MurmurHash3.java
129126
static const uint32_t MURMUR3_32_SEED = 104729;

be/src/vec/columns/column.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -401,14 +401,6 @@ class IColumn : public COW<IColumn> {
401401
"Method update_crcs_with_value is not supported for " + get_name());
402402
}
403403

404-
virtual void update_crc32cs_with_value(uint32_t* __restrict hashes, uint32_t rows,
405-
uint32_t offset,
406-
const uint8_t* __restrict null_data = nullptr) const {
407-
throw doris::Exception(
408-
ErrorCode::NOT_IMPLEMENTED_ERROR,
409-
"Method update_crc32cs_with_value is not supported for " + get_name());
410-
}
411-
412404
// use range for one hash value to avoid virtual function call in loop
413405
virtual void update_crc_with_value(size_t start, size_t end, uint32_t& hash,
414406
const uint8_t* __restrict null_data) const {

be/src/vec/columns/column_decimal.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
#include "vec/columns/column_decimal.h"
2222

23-
#include <crc32c/crc32c.h>
2423
#include <fmt/format.h>
2524

2625
#include <limits>
@@ -209,19 +208,6 @@ void ColumnDecimal<T>::update_crcs_with_value(uint32_t* __restrict hashes, Primi
209208
}
210209
}
211210

212-
template <PrimitiveType T>
213-
void ColumnDecimal<T>::update_crc32cs_with_value(uint32_t* __restrict hashes, uint32_t rows,
214-
uint32_t offset,
215-
const uint8_t* __restrict null_data) const {
216-
auto s = rows;
217-
DCHECK(s == size());
218-
219-
for (size_t i = 0; i < s; i++) {
220-
hashes[i] = crc32c_extend(hashes[i], (const uint8_t*)&data[i],
221-
sizeof(typename PrimitiveTypeTraits<T>::ColumnItemType));
222-
}
223-
}
224-
225211
template <PrimitiveType T>
226212
void ColumnDecimal<T>::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash,
227213
const uint8_t* __restrict null_data) const {

0 commit comments

Comments
 (0)