From cbe0ed98716888668a6a2ca7c6dc2db50e3e0ecc Mon Sep 17 00:00:00 2001 From: felixwluo Date: Mon, 3 Mar 2025 21:44:10 +0800 Subject: [PATCH] [fix](bug) Resolve the crash issue during string hash computation --- be/src/vec/common/string_ref.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h index abc745b1db2b42..701229327852cc 100644 --- a/be/src/vec/common/string_ref.h +++ b/be/src/vec/common/string_ref.h @@ -21,6 +21,7 @@ #pragma once // IWYU pragma: no_include +#include #include #include @@ -386,16 +387,24 @@ struct CRC32Hash { const char* end = pos + size; size_t res = -1ULL; - do { - doris::vectorized::UInt64 word = unaligned_load(pos); + // process complete 8-byte blocks + while (pos + 8 <= end) { + auto word = unaligned_load(pos); res = _mm_crc32_u64(res, word); - pos += 8; - } while (pos + 8 < end); + } - doris::vectorized::UInt64 word = unaligned_load( - end - 8); /// I'm not sure if this is normal. - res = _mm_crc32_u64(res, word); + // process the remaining bytes (if any) + if (pos < end) { + char buffer[8] = {0}; + memcpy(buffer, pos, end - pos); + auto word = unaligned_load(buffer); + res = _mm_crc32_u64(res, word); + } else if (size >= 8) { + // ensure that at least the last 8 bytes are processed (when the size is a multiple of 8) + auto word = unaligned_load(end - 8); + res = _mm_crc32_u64(res, word); + } return res; }