Skip to content

Commit 468b1e6

Browse files
gongxun0928my-ship-it
authored andcommitted
Optimize null bitmap handling for better append performance
1. Pre-allocate null bitmap capacity to pax_max_tuples_per_group to avoid capacity checks 2. initialize null bitmap with all bits set to 1 (0xff) by default, assuming non-null values are more frequent
1 parent c680b29 commit 468b1e6

File tree

3 files changed

+65
-24
lines changed

3 files changed

+65
-24
lines changed

contrib/pax_storage/src/cpp/comm/bitmap.h

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,28 @@ struct BitmapRaw final {
134134
static_assert(BM_WORD_BITS == (1 << BM_WORD_SHIFTS));
135135
return (index >> BM_WORD_SHIFTS) < size;
136136
}
137-
inline bool Empty() const {
137+
138+
inline bool Empty(uint32 end_index) const {
138139
if (!bitmap) return true;
139-
for (size_t i = 0; i < size; i++)
140-
if (bitmap[i]) return false;
140+
141+
uint32 end_word = BM_INDEX_WORD_OFF(end_index);
142+
uint32 end_bit_offset = BM_INDEX_BIT_OFF(end_index);
143+
144+
for (uint32 i = 0; i < end_word && i < size; i++) {
145+
if (bitmap[i] != 0) return false;
146+
}
147+
148+
// Check partial word at end
149+
if (end_word < size && end_bit_offset > 0) {
150+
T mask = (T(1) << end_bit_offset) - 1;
151+
if (bitmap[end_word] & mask) return false;
152+
}
153+
141154
return true;
142155
}
156+
157+
inline bool Empty() const { return Empty(size * sizeof(T) * 8ULL); }
158+
143159
BitmapRaw() = default;
144160
BitmapRaw(T *buffer, size_t size) : bitmap(buffer), size(size) {}
145161
BitmapRaw(const BitmapRaw &) = delete;
@@ -160,13 +176,14 @@ struct BitmapRaw final {
160176
template <typename T>
161177
class BitmapTpl final {
162178
public:
163-
using BitmapMemoryPolicy = void (*)(BitmapRaw<T> &, uint32);
164-
explicit BitmapTpl(uint32 initial_size = 16) {
179+
using BitmapMemoryPolicy = void (*)(BitmapRaw<T> &, uint32, uint8);
180+
explicit BitmapTpl(uint32 initial_size = 16, uint8 init_value = 0) {
165181
static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
166182
sizeof(T) == 8);
167183
static_assert(BM_WORD_BITS == (1 << BM_WORD_SHIFTS));
168184
policy_ = DefaultBitmapMemoryPolicy;
169-
policy_(raw_, Max(initial_size, 16));
185+
policy_(raw_, Max(initial_size, 16), init_value);
186+
init_value_ = init_value;
170187
}
171188
explicit BitmapTpl(const BitmapRaw<T> &raw) {
172189
static_assert(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
@@ -177,8 +194,7 @@ class BitmapTpl final {
177194
raw_.size = raw.size;
178195
}
179196
BitmapTpl(const BitmapTpl &tpl) = delete;
180-
BitmapTpl(BitmapTpl &&tpl)
181-
: raw_(std::move(tpl.raw_)), policy_(tpl.policy_) {
197+
BitmapTpl(BitmapTpl &&tpl) : raw_(std::move(tpl.raw_)), policy_(tpl.policy_) {
182198
tpl.raw_.bitmap = nullptr;
183199
tpl.policy_ = ReadOnlyRefBitmap;
184200
}
@@ -188,8 +204,7 @@ class BitmapTpl final {
188204
BitmapTpl &operator=(BitmapTpl &&tpl) = delete;
189205
~BitmapTpl() {
190206
// Reference doesn't free the memory
191-
if (policy_ == DefaultBitmapMemoryPolicy)
192-
PAX_DELETE_ARRAY(raw_.bitmap);
207+
if (policy_ == DefaultBitmapMemoryPolicy) PAX_DELETE_ARRAY(raw_.bitmap);
193208
raw_.bitmap = nullptr;
194209
}
195210

@@ -205,11 +220,13 @@ class BitmapTpl final {
205220

206221
inline size_t WordBits() const { return BM_WORD_BITS; }
207222
inline void Set(uint32 index) {
208-
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
223+
if (unlikely(!raw_.HasEnoughSpace(index)))
224+
policy_(raw_, index, init_value_);
209225
raw_.Set(index);
210226
}
211227
inline void SetN(uint32 index) {
212-
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
228+
if (unlikely(!raw_.HasEnoughSpace(index)))
229+
policy_(raw_, index, init_value_);
213230
raw_.SetN(index);
214231
}
215232
inline void Clear(uint32 index) {
@@ -228,7 +245,8 @@ class BitmapTpl final {
228245
}
229246
// invert the bit and return the old value.
230247
inline bool Toggle(uint32 index) {
231-
if (unlikely(!raw_.HasEnoughSpace(index))) policy_(raw_, index);
248+
if (unlikely(!raw_.HasEnoughSpace(index)))
249+
policy_(raw_, index, init_value_);
232250
return raw_.Toggle(index);
233251
}
234252
// count bits in range [0, index]
@@ -248,23 +266,28 @@ class BitmapTpl final {
248266

249267
inline bool Empty() const { return raw_.Empty(); }
250268

269+
// check if the bitmap is empty in the range [0, end_index)
270+
inline bool Empty(uint32 end_index) const { return raw_.Empty(end_index); }
271+
251272
BitmapMemoryPolicy Policy() const { return policy_; }
252273

253274
const BitmapRaw<T> &Raw() const { return raw_; }
254275
BitmapRaw<T> &Raw() { return raw_; }
255276

256-
static void DefaultBitmapMemoryPolicy(BitmapRaw<T> &raw, uint32 index) {
277+
static void DefaultBitmapMemoryPolicy(BitmapRaw<T> &raw, uint32 index,
278+
uint8 init_value = 0) {
257279
auto old_bitmap = raw.bitmap;
258280
auto old_size = raw.size;
259281
auto size = Max(BM_INDEX_WORD_OFF(index) + 1, old_size * 2);
260282
auto p = PAX_NEW_ARRAY<T>(size);
261283
if (old_size > 0) memcpy(p, old_bitmap, sizeof(T) * old_size);
262-
memset(&p[old_size], 0, sizeof(T) * (size - old_size));
284+
memset(&p[old_size], init_value, sizeof(T) * (size - old_size));
263285
raw.bitmap = p;
264286
raw.size = size;
265287
PAX_DELETE_ARRAY(old_bitmap);
266288
}
267-
static void ReadOnlyRefBitmap(BitmapRaw<T> & /*raw*/, uint32 /*index*/) {
289+
static void ReadOnlyRefBitmap(BitmapRaw<T> & /*raw*/, uint32 /*index*/,
290+
uint8 /*init_value*/) {
268291
// raise
269292
CBDB_RAISE(cbdb::CException::kExTypeInvalidMemoryOperation);
270293
}
@@ -280,12 +303,14 @@ class BitmapTpl final {
280303
return nwords * sizeof(T);
281304
}
282305

283-
static std::unique_ptr<BitmapTpl<T>> BitmapTplCopy(const BitmapTpl<T> *bitmap) {
306+
static std::unique_ptr<BitmapTpl<T>> BitmapTplCopy(
307+
const BitmapTpl<T> *bitmap) {
284308
if (bitmap == nullptr) return nullptr;
285309
return bitmap->Clone();
286310
}
287311

288-
static std::unique_ptr<BitmapTpl<T>> Union(const BitmapTpl<T> *a, const BitmapTpl<T> *b) {
312+
static std::unique_ptr<BitmapTpl<T>> Union(const BitmapTpl<T> *a,
313+
const BitmapTpl<T> *b) {
289314
std::unique_ptr<BitmapTpl<T>> result;
290315
const BitmapTpl<T> *large;
291316
const BitmapTpl<T> *small;
@@ -315,6 +340,7 @@ class BitmapTpl final {
315340

316341
BitmapRaw<T> raw_;
317342
BitmapMemoryPolicy policy_;
343+
uint8 init_value_ = 0;
318344
};
319345

320346
using Bitmap8 = BitmapTpl<uint8>;

contrib/pax_storage/src/cpp/storage/columns/pax_column.cc

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,21 @@ size_t PaxColumn::GetRangeNonNullRows(size_t start_pos, size_t len) {
8383

8484
void PaxColumn::CreateNulls(size_t cap) {
8585
Assert(!null_bitmap_);
86-
null_bitmap_ = std::make_unique<Bitmap8>(cap);
87-
null_bitmap_->SetN(total_rows_);
86+
// By default, initialize every bit in the null bitmap to 1.
87+
// This is based on the assumption that null values are much less frequent
88+
// than non-null values in most datasets. As a result, when appending non-null
89+
// values, we can simply skip setting the bit to 1, since it is already set.
90+
// Only when appending a null value do we need to explicitly clear the
91+
// corresponding bit.
92+
null_bitmap_ = std::make_unique<Bitmap8>(cap, 0xff);
8893
}
8994

9095
void PaxColumn::AppendNull() {
9196
if (!null_bitmap_) {
92-
CreateNulls(DEFAULT_CAPACITY);
97+
// Ensure that the capacity of null_bitmap_ is pax_max_tuples_per_group.
98+
// This design allows the use of raw_bitmap in normal cases without
99+
// incurring the overhead of checking the bitmap's capacity.
100+
CreateNulls(pax::pax_max_tuples_per_group);
93101
}
94102
null_bitmap_->Clear(total_rows_);
95103
++total_rows_;
@@ -111,7 +119,6 @@ void PaxColumn::AppendToast(char *buffer, size_t size) {
111119
}
112120

113121
void PaxColumn::Append(char * /*buffer*/, size_t /*size*/) {
114-
if (null_bitmap_) null_bitmap_->Set(total_rows_);
115122
++total_rows_;
116123
++non_null_rows_;
117124
}

contrib/pax_storage/src/cpp/storage/columns/pax_column.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@
4646

4747
namespace pax {
4848

49-
#define DEFAULT_CAPACITY MIN(2048, MAX(16, MAXALIGN(pax::pax_max_tuples_per_group)))
49+
#define DEFAULT_CAPACITY \
50+
MIN(2048, MAX(16, MAXALIGN(pax::pax_max_tuples_per_group)))
5051

5152
// Used to mapping pg_type
5253
enum PaxColumnTypeInMem {
@@ -230,7 +231,14 @@ class PaxColumn {
230231
inline bool HasNull() { return null_bitmap_ != nullptr; }
231232

232233
// Are all values null?
233-
inline bool AllNull() const { return null_bitmap_ && null_bitmap_->Empty(); }
234+
// Check whether all bits in the specified range are zero.
235+
// In pax_column, to avoid checking the capacity of the null bitmap, we
236+
// allocate memory based on pax_max_tuples_per_group. As a result, the last
237+
// group may contain fewer tuples than pax_max_tuples_per_group, so we need to
238+
// check whether all bits in the range [0, total_rows_) are zero.
239+
inline bool AllNull() const {
240+
return null_bitmap_ && null_bitmap_->Empty(total_rows_);
241+
}
234242

235243
// Set the null bitmap
236244
inline void SetBitmap(std::unique_ptr<Bitmap8> null_bitmap) {

0 commit comments

Comments
 (0)