@@ -291,10 +291,6 @@ struct MethodSerialized : public MethodBase<TData> {
291291 }
292292};
293293
294- inline size_t get_bitmap_size (size_t key_number) {
295- return (key_number + BITSIZE - 1 ) / BITSIZE;
296- }
297-
298294template <typename TData>
299295struct MethodStringNoCache : public MethodBase <TData> {
300296 using Base = MethodBase<TData>;
@@ -562,7 +558,7 @@ struct MethodKeysFixed : public MethodBase<TData> {
562558 template <typename T>
563559 void pack_fixeds (size_t row_numbers, const ColumnRawPtrs& key_columns,
564560 const ColumnRawPtrs& nullmap_columns, DorisVector<T>& result) {
565- size_t bitmap_size = get_bitmap_size ( nullmap_columns.size ()) ;
561+ size_t bitmap_size = nullmap_columns.empty () ? 0 : 1 ;
566562 if (bitmap_size) {
567563 // set size to 0 at first, then use resize to call default constructor on index included from [0, row_numbers) to reset all memory
568564 // only need to reset the memory used to bitmap
@@ -584,19 +580,15 @@ struct MethodKeysFixed : public MethodBase<TData> {
584580 const uint8_t * __restrict data =
585581 assert_cast<const ColumnUInt8&>(*nullmap_columns[j]).get_data ().data ();
586582
587- has_null_column[j] = simd::contain_one (data, row_numbers);
583+ has_null_column[j] = simd::contain_non_zero (data, row_numbers);
588584 if (has_null_column[j]) {
589585 nullmap_datas.emplace_back (data);
590586 bit_offsets.emplace_back (j % BITSIZE);
591587 }
592588 }
593- for (size_t j = 0 , bucket = 0 ; j < nullmap_datas.size (); j += BITSIZE, bucket++) {
594- int column_batch = std::min (BITSIZE, (int )(nullmap_datas.size () - j));
595- constexpr_int_match<1 , BITSIZE, PackNullmapsReducer>::run (
596- column_batch, nullmap_datas.data () + j, bit_offsets.data () + j, row_numbers,
597- sizeof (T), reinterpret_cast <uint8_t *>(result_data + bucket));
598- }
599-
589+ constexpr_int_match<1 , BITSIZE, PackNullmapsReducer>::run (
590+ int (nullmap_datas.size ()), nullmap_datas.data (), bit_offsets.data (),
591+ row_numbers, sizeof (T), reinterpret_cast <uint8_t *>(result_data));
600592 offset += bitmap_size;
601593 }
602594
@@ -657,6 +649,7 @@ struct MethodKeysFixed : public MethodBase<TData> {
657649 void init_serialized_keys (const ColumnRawPtrs& key_columns, uint32_t num_rows,
658650 const uint8_t * null_map = nullptr , bool is_join = false ,
659651 bool is_build = false , uint32_t bucket_size = 0 ) override {
652+ CHECK (key_columns.size () <= BITSIZE);
660653 ColumnRawPtrs actual_columns;
661654 ColumnRawPtrs null_maps;
662655 actual_columns.reserve (key_columns.size ());
@@ -694,14 +687,7 @@ struct MethodKeysFixed : public MethodBase<TData> {
694687
695688 void insert_keys_into_columns (std::vector<typename Base::Key>& input_keys,
696689 MutableColumns& key_columns, const uint32_t num_rows) override {
697- // In any hash key value, column values to be read start just after the bitmap, if it exists.
698690 size_t pos = 0 ;
699- for (size_t i = 0 ; i < key_columns.size (); ++i) {
700- if (key_columns[i]->is_nullable ()) {
701- pos = get_bitmap_size (key_columns.size ());
702- break ;
703- }
704- }
705691
706692 for (size_t i = 0 ; i < key_columns.size (); ++i) {
707693 size_t size = key_sizes[i];
@@ -720,11 +706,8 @@ struct MethodKeysFixed : public MethodBase<TData> {
720706
721707 // The current column is nullable. Check if the value of the
722708 // corresponding key is nullable. Update the null map accordingly.
723- size_t bucket = i / BITSIZE;
724- size_t offset = i % BITSIZE;
725709 for (size_t j = 0 ; j < num_rows; j++) {
726- nullmap[j] =
727- (reinterpret_cast <const UInt8*>(&input_keys[j])[bucket] >> offset) & 1 ;
710+ nullmap[j] = (*reinterpret_cast <const UInt8*>(&input_keys[j]) >> i) & 1 ;
728711 }
729712 } else {
730713 // key_columns is a mutable element. However, when accessed through get_raw_data().data,
0 commit comments