@@ -89,13 +89,22 @@ static slab_t *create_slab(bucket_t *bucket) {
8989
9090 slab -> num_chunks_total =
9191 utils_max (bucket_slab_min_size (bucket ) / bucket -> size , 1 );
92- slab -> chunks =
93- umf_ba_global_alloc (sizeof (* slab -> chunks ) * slab -> num_chunks_total );
94- if (slab -> chunks == NULL ) {
95- LOG_ERR ("allocation of slab chunks failed!" );
96- goto free_slab ;
92+ size_t chunks_size_in_64_increments =
93+ slab -> num_chunks_total / 8 + (slab -> num_chunks_total % 8 != 0 );
94+
95+ if (slab -> num_chunks_total > 64 ) {
96+ slab -> chunks .chunks = umf_ba_global_alloc (chunks_size_in_64_increments *
97+ sizeof (* slab -> chunks .chunks ));
98+ if (slab -> chunks .chunks == NULL ) {
99+ LOG_ERR ("allocation of slab chunks failed!" );
100+ goto free_slab ;
101+ }
102+
103+ memset (slab -> chunks .chunks , 0 ,
104+ chunks_size_in_64_increments * sizeof (* slab -> chunks .chunks ));
105+ } else {
106+ slab -> chunks .chunk = 0 ;
97107 }
98- memset (slab -> chunks , 0 , sizeof (* slab -> chunks ) * slab -> num_chunks_total );
99108
100109 // if slab_min_size is not a multiple of bucket size, we would have some
101110 // padding at the end of the slab
@@ -117,7 +126,9 @@ static slab_t *create_slab(bucket_t *bucket) {
117126 return slab ;
118127
119128free_slab_chunks :
120- umf_ba_global_free (slab -> chunks );
129+ if (slab -> num_chunks_total > 64 ) {
130+ umf_ba_global_free (slab -> chunks .chunks );
131+ }
121132
122133free_slab :
123134 umf_ba_global_free (slab );
@@ -135,26 +146,55 @@ static void destroy_slab(slab_t *slab) {
135146 LOG_ERR ("deallocation of slab data failed!" );
136147 }
137148
138- umf_ba_global_free (slab -> chunks );
149+ if (slab -> num_chunks_total > 64 ) {
150+ umf_ba_global_free (slab -> chunks .chunks );
151+ }
152+
139153 umf_ba_global_free (slab );
140154}
141155
142- // return the index of the first available chunk, SIZE_MAX otherwise
143156static size_t slab_find_first_available_chunk_idx (const slab_t * slab ) {
144- // use the first free chunk index as a hint for the search
145- for (bool * chunk = slab -> chunks + slab -> first_free_chunk_idx ;
146- chunk != slab -> chunks + slab -> num_chunks_total ; chunk ++ ) {
147-
148- // false means not used
149- if (* chunk == false) {
150- size_t idx = chunk - slab -> chunks ;
151- LOG_DEBUG ("idx: %zu" , idx );
152- return idx ;
157+ if (slab -> num_chunks_total <= 64 ) {
158+ // Invert the bits so that free (0) bits become 1.
159+ uint64_t word = ~(slab -> chunks .chunk );
160+
161+ // Mask out any bits beyond num_chunks_total if necessary.
162+ if (slab -> num_chunks_total < 64 ) {
163+ word &= (((uint64_t )1 << slab -> num_chunks_total ) - 1 );
153164 }
154- }
155165
156- LOG_DEBUG ("idx: SIZE_MAX" );
157- return SIZE_MAX ;
166+ if (word == 0 ) {
167+ return SIZE_MAX ; // No free chunk found.
168+ }
169+
170+ unsigned int bit_index = utils_get_rightmost_set_bit_pos (word );
171+
172+ return (size_t )bit_index ;
173+ } else {
174+ // Calculate the number of 64-bit words needed.
175+ size_t num_words = (slab -> num_chunks_total + 63 ) / 64 ;
176+ for (size_t i = 0 ; i < num_words ; i ++ ) {
177+ // Invert the word: free bits (0 in the allocated mask) become 1.
178+ uint64_t word = ~(slab -> chunks .chunks [i ]);
179+
180+ // For the final word, clear out bits that exceed num_chunks_total.
181+ if (i == num_words - 1 ) {
182+ size_t bits_in_last_word = slab -> num_chunks_total - (i * 64 );
183+ if (bits_in_last_word < 64 ) {
184+ word &= (((uint64_t )1 << bits_in_last_word ) - 1 );
185+ }
186+ }
187+ if (word != 0 ) {
188+ unsigned bit_index = utils_get_rightmost_set_bit_pos (word );
189+ size_t free_chunk = i * 64 + bit_index ;
190+ if (free_chunk < slab -> num_chunks_total ) {
191+ return free_chunk ;
192+ }
193+ }
194+ }
195+ // No free chunk was found.
196+ return SIZE_MAX ;
197+ }
158198}
159199
160200static void * slab_get_chunk (slab_t * slab ) {
@@ -166,7 +206,7 @@ static void *slab_get_chunk(slab_t *slab) {
166206 (void * )((uintptr_t )slab -> mem_ptr + chunk_idx * slab -> bucket -> size );
167207
168208 // mark chunk as used
169- slab -> chunks [ chunk_idx ] = true;
209+ slab_set_chunk_bit ( slab , chunk_idx , true) ;
170210 slab -> num_chunks_allocated += 1 ;
171211
172212 // use the found index as the next hint
@@ -194,8 +234,8 @@ static void slab_free_chunk(slab_t *slab, void *ptr) {
194234 size_t chunk_idx = ptr_diff / slab -> bucket -> size ;
195235
196236 // Make sure that the chunk was allocated
197- assert (slab -> chunks [ chunk_idx ] && "double free detected" );
198- slab -> chunks [ chunk_idx ] = false;
237+ assert (slab_read_chunk_bit ( slab , chunk_idx ) && "double free detected" );
238+ slab_set_chunk_bit ( slab , chunk_idx , false) ;
199239 slab -> num_chunks_allocated -= 1 ;
200240
201241 if (chunk_idx < slab -> first_free_chunk_idx ) {
@@ -467,7 +507,7 @@ static size_t size_to_idx(disjoint_pool_t *pool, size_t size) {
467507 }
468508
469509 // get the position of the leftmost set bit
470- size_t position = getLeftmostSetBitPos (size );
510+ size_t position = utils_get_leftmost_set_bit_pos (size );
471511
472512 bool is_power_of_2 = 0 == (size & (size - 1 ));
473513 bool larger_than_halfway_between_powers_of_2 =
@@ -623,7 +663,8 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider,
623663 Size1 = utils_max (Size1 , UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE );
624664
625665 // Calculate the exponent for min_bucket_size used for finding buckets.
626- disjoint_pool -> min_bucket_size_exp = (size_t )log2Utils (Size1 );
666+ disjoint_pool -> min_bucket_size_exp =
667+ (size_t )utils_get_leftmost_set_bit_pos (Size1 );
627668 disjoint_pool -> default_shared_limits =
628669 umfDisjointPoolSharedLimitsCreate (SIZE_MAX );
629670
0 commit comments