Skip to content

Commit b2666ec

Browse files
nordic-krchnordicjm
authored andcommitted
[nrf fromtree] soc: nordic: common: dmm: Optimize by using a micro heap
Add micro heap implementation which is using one or more 32 bit masks to allocate quickly blocks. It is significantly better than using sys_heap. Difference is especially big on RAM3 heap because heap control data is in RAM3 space so operations there were extremely slowly (15 us to allocate a buffer). Simplified implementation of the heap requires DMM API change as release functions need to know the length of the allocated buffer as simple heap requires that (buffer address is enough for the standard heap). Signed-off-by: Krzysztof Chruściński <[email protected]> (cherry picked from commit decdb30)
1 parent ecb6fbc commit b2666ec

File tree

2 files changed

+157
-21
lines changed

2 files changed

+157
-21
lines changed

soc/nordic/common/Kconfig

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,19 @@ source "subsys/logging/Kconfig.template.log_config"
4848

4949
endif # MRAM_LATENCY
5050

51+
if HAS_NORDIC_DMM
52+
53+
config DMM_HEAP_CHUNKS
54+
int "Number of chunks in the DMM heap"
55+
default 32
56+
help
57+
DMM is using a simplified heap which is using 32 bit mask to allocate
58+
required buffer which consists of contiguous chunks. If there are many
59+
small buffers used with DMM it is possible that allocation will fail.
60+
Number of chunks is a trade-off between performance and granularity.
61+
Must be multiply of 32.
62+
63+
endif # HAS_NORDIC_DMM
64+
5165
rsource "vpr/Kconfig"
5266
rsource "uicr/Kconfig"

soc/nordic/common/dmm.c

Lines changed: 143 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include <string.h>
77
#include <zephyr/cache.h>
88
#include <zephyr/kernel.h>
9-
#include <zephyr/sys/sys_heap.h>
9+
#include <zephyr/sys/bitarray.h>
1010
#include <zephyr/mem_mgmt/mem_attr.h>
1111
#include "dmm.h"
1212

@@ -26,6 +26,9 @@
2626
.dt_align = DMM_REG_ALIGN_SIZE(node_id), \
2727
.dt_allc = &_BUILD_LINKER_END_VAR(node_id)},
2828

29+
#define HEAP_NUM_WORDS (CONFIG_DMM_HEAP_CHUNKS / 32)
30+
BUILD_ASSERT(IS_ALIGNED(CONFIG_DMM_HEAP_CHUNKS, 32));
31+
2932
/* Generate declarations of linker variables used to determine size of preallocated variables
3033
* stored in memory sections spanning over memory regions.
3134
* These are used to determine memory left for dynamic bounce buffer allocator to work with.
@@ -42,9 +45,13 @@ struct dmm_region {
4245
};
4346

4447
struct dmm_heap {
45-
struct sys_heap heap;
48+
uint32_t mask[HEAP_NUM_WORDS];
49+
atomic_t tail_mask[HEAP_NUM_WORDS];
50+
uintptr_t ptr;
51+
uintptr_t ptr_end;
52+
size_t blk_size;
4653
const struct dmm_region *region;
47-
struct k_spinlock lock;
54+
sys_bitarray_t bitarray;
4855
};
4956

5057
static const struct dmm_region dmm_regions[] = {
@@ -55,7 +62,6 @@ struct {
5562
struct dmm_heap dmm_heaps[ARRAY_SIZE(dmm_regions)];
5663
} dmm_heaps_data;
5764

58-
5965
static struct dmm_heap *dmm_heap_find(void *region)
6066
{
6167
struct dmm_heap *dh;
@@ -103,37 +109,144 @@ static bool is_user_buffer_correctly_preallocated(void const *user_buffer, size_
103109
return false;
104110
}
105111

106-
static size_t dmm_heap_start_get(struct dmm_heap *dh)
112+
/* Function updates the tail bits mask after the allocation. Tail bits are all bits
113+
* except the head. Tail bits mask together with a known index of the start of
114+
* chunk (because freeing has a buffer address) allows to determine the size of the
115+
* buffer (how many chunks were included. Because tail_mask is updated after allocation
116+
* we can safely modify bits that represents allocated buffer, we only need to use
117+
* atomic operation on the mask since mask may be modified (but different bits).
118+
*/
119+
static void tail_mask_set(atomic_t *tail_mask, size_t num_bits, size_t off)
107120
{
108-
return ROUND_UP(dh->region->dt_allc, dh->region->dt_align);
121+
size_t tail_bits = num_bits - 1;
122+
size_t tail_off = off + 1;
123+
124+
if (tail_bits == 0) {
125+
return;
126+
}
127+
128+
if (HEAP_NUM_WORDS == 1) {
129+
atomic_or(tail_mask, BIT_MASK(tail_bits) << tail_off);
130+
return;
131+
}
132+
133+
/* If bit mask exceeds a single word then tail may spill to the adjacent word. */
134+
size_t idx = tail_off / 32;
135+
136+
tail_off = tail_off - 32 * idx;
137+
if ((tail_off + tail_bits) <= 32) {
138+
/* Tail mask fits in a single word. */
139+
atomic_or(&tail_mask[idx], BIT_MASK(tail_bits) << tail_off);
140+
return;
141+
}
142+
143+
/* Tail spilled. Remainder is set in the next word. Since number of tail_masks
144+
* match number of words in bitarray we don't need to check if we are exceeding
145+
* the array boundary.
146+
*/
147+
atomic_or(&tail_mask[idx], BIT_MASK(32 - tail_off) << tail_off);
148+
149+
150+
size_t rem_tail = tail_bits - (32 - tail_off);
151+
atomic_t *mask = &tail_mask[idx + 1];
152+
153+
while (rem_tail >= 32) {
154+
atomic_or(mask, UINT32_MAX);
155+
mask++;
156+
rem_tail -= 32;
157+
}
158+
atomic_or(mask, BIT_MASK(rem_tail));
109159
}
110160

111-
static size_t dmm_heap_size_get(struct dmm_heap *dh)
161+
/* Function determines how many chunks were used for the allocated buffer. It is
162+
* determined from tail bits mask and index of the starting chunk (%p off).
163+
* Function is called before bits are freed in the bitarray so we can safely modify
164+
* bits that belong to that buffer.
165+
*
166+
* @param tail_mask Pointer to tail_mask array.
167+
* @param off Index of the start of the buffer.
168+
*
169+
* @return Number of chunks that forms the buffer that will be freed.
170+
*/
171+
static uint32_t num_bits_get(atomic_t *tail_mask, size_t off)
112172
{
113-
return (dh->region->dt_size - (dmm_heap_start_get(dh) - dh->region->dt_addr));
173+
uint32_t mask;
174+
uint32_t num_bits;
175+
176+
if (HEAP_NUM_WORDS == 1) {
177+
mask = (*tail_mask | BIT(off)) >> off;
178+
num_bits = (~mask == 0) ? 32 : __builtin_ctz(~mask);
179+
if (num_bits > 1) {
180+
mask = BIT_MASK(num_bits - 1) << (off + 1);
181+
atomic_and(tail_mask, ~mask);
182+
}
183+
184+
return num_bits;
185+
}
186+
187+
/* In multiword bit array we need to check if tail is spilling over to the next word. */
188+
size_t idx = off / 32;
189+
size_t w_off = off - 32 * idx;
190+
atomic_t *t_mask = &tail_mask[idx];
191+
192+
mask = (*t_mask | BIT(w_off)) >> w_off;
193+
num_bits = (~mask == 0) ? 32 : __builtin_ctz(~mask);
194+
if (num_bits == 1) {
195+
return num_bits;
196+
}
197+
198+
mask = BIT_MASK(num_bits - 1) << (w_off + 1);
199+
atomic_and(t_mask, ~mask);
200+
if (((w_off + num_bits) == 32) && (idx < (HEAP_NUM_WORDS - 1))) {
201+
size_t tmp_bits;
202+
203+
/* If we are at the end of the one mask we need to check the beginning of the
204+
* next one as there might be remaining part of the tail.
205+
*/
206+
do {
207+
t_mask++;
208+
tmp_bits = (*t_mask == UINT32_MAX) ? 32 : __builtin_ctz(~(*t_mask));
209+
mask = (tmp_bits == 32) ? UINT32_MAX : BIT_MASK(tmp_bits);
210+
atomic_and(t_mask, ~mask);
211+
num_bits += tmp_bits;
212+
} while ((tmp_bits == 32) && (t_mask != &tail_mask[HEAP_NUM_WORDS - 1]));
213+
}
214+
215+
return num_bits;
114216
}
115217

116218
static void *dmm_buffer_alloc(struct dmm_heap *dh, size_t length)
117219
{
118-
void *ret;
119-
k_spinlock_key_t key;
220+
size_t num_bits, off;
221+
int rv;
222+
223+
if (dh->ptr == 0) {
224+
/* Not initialized. */
225+
return NULL;
226+
}
120227

121228
length = ROUND_UP(length, dh->region->dt_align);
229+
num_bits = DIV_ROUND_UP(length, dh->blk_size);
230+
231+
rv = sys_bitarray_alloc(&dh->bitarray, num_bits, &off);
232+
if (rv < 0) {
233+
return NULL;
234+
}
122235

123-
key = k_spin_lock(&dh->lock);
124-
ret = sys_heap_aligned_alloc(&dh->heap, dh->region->dt_align, length);
125-
k_spin_unlock(&dh->lock, key);
236+
tail_mask_set(dh->tail_mask, num_bits, off);
126237

127-
return ret;
238+
return (void *)(dh->ptr + dh->blk_size * off);
128239
}
129240

130241
static void dmm_buffer_free(struct dmm_heap *dh, void *buffer)
131242
{
132-
k_spinlock_key_t key;
243+
size_t offset = ((uintptr_t)buffer - dh->ptr) / dh->blk_size;
244+
size_t num_bits = num_bits_get(dh->tail_mask, offset);
245+
int rv;
133246

134-
key = k_spin_lock(&dh->lock);
135-
sys_heap_free(&dh->heap, buffer);
136-
k_spin_unlock(&dh->lock, key);
247+
rv = sys_bitarray_free(&dh->bitarray, num_bits, offset);
248+
(void)rv;
249+
__ASSERT_NO_MSG(rv == 0);
137250
}
138251

139252
static void dmm_memcpy(void *dst, const void *src, size_t len)
@@ -222,7 +335,7 @@ int dmm_buffer_out_release(void *region, void *buffer_out)
222335
/* Check if output buffer is contained within memory area
223336
* managed by dynamic memory allocator
224337
*/
225-
if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) {
338+
if (is_buffer_within_region(addr, 0, dh->ptr, dh->ptr_end)) {
226339
/* If yes, free the buffer */
227340
dmm_buffer_free(dh, buffer_out);
228341
}
@@ -309,7 +422,7 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v
309422
/* Check if input buffer is contained within memory area
310423
* managed by dynamic memory allocator
311424
*/
312-
if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) {
425+
if (is_buffer_within_region(addr, user_length, dh->ptr, dh->ptr_end)) {
313426
/* If yes, free the buffer */
314427
dmm_buffer_free(dh, buffer_in);
315428
}
@@ -321,11 +434,20 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v
321434
int dmm_init(void)
322435
{
323436
struct dmm_heap *dh;
437+
int blk_cnt;
438+
int heap_space;
324439

325440
for (size_t idx = 0; idx < ARRAY_SIZE(dmm_regions); idx++) {
326441
dh = &dmm_heaps_data.dmm_heaps[idx];
327442
dh->region = &dmm_regions[idx];
328-
sys_heap_init(&dh->heap, (void *)dmm_heap_start_get(dh), dmm_heap_size_get(dh));
443+
dh->ptr = ROUND_UP(dh->region->dt_allc, dh->region->dt_align);
444+
heap_space = dh->region->dt_size - (dh->ptr - dh->region->dt_addr);
445+
dh->blk_size = ROUND_UP(heap_space / (32 * HEAP_NUM_WORDS), dh->region->dt_align);
446+
blk_cnt = heap_space / dh->blk_size;
447+
dh->ptr_end = dh->ptr + blk_cnt * dh->blk_size;
448+
dh->bitarray.num_bits = blk_cnt;
449+
dh->bitarray.num_bundles = HEAP_NUM_WORDS;
450+
dh->bitarray.bundles = dh->mask;
329451
}
330452

331453
return 0;

0 commit comments

Comments
 (0)