Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions soc/nordic/common/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,22 @@ source "subsys/logging/Kconfig.template.log_config"

endif # MRAM_LATENCY

if HAS_NORDIC_DMM

config DMM_HEAP_CHUNKS
int "Number of chunks in the DMM heap"
default 32
help
DMM is using a simplified heap which is using 32 bit mask to allocate
required buffer which consists of contiguous chunks. If there are many
small buffers used with DMM it is possible that allocation will fail.
Number of chunks is a trade-off between performance and granularity.
Must be multiply of 32.

config DMM_STATS
bool "Usage statistics"

endif # HAS_NORDIC_DMM

rsource "vpr/Kconfig"
rsource "uicr/Kconfig"
205 changes: 185 additions & 20 deletions soc/nordic/common/dmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <string.h>
#include <zephyr/cache.h>
#include <zephyr/kernel.h>
#include <zephyr/sys/sys_heap.h>
#include <zephyr/sys/bitarray.h>
#include <zephyr/mem_mgmt/mem_attr.h>
#include "dmm.h"

Expand All @@ -26,6 +26,9 @@
.dt_align = DMM_REG_ALIGN_SIZE(node_id), \
.dt_allc = &_BUILD_LINKER_END_VAR(node_id)},

#define HEAP_NUM_WORDS (CONFIG_DMM_HEAP_CHUNKS / 32)
BUILD_ASSERT(IS_ALIGNED(CONFIG_DMM_HEAP_CHUNKS, 32));

/* Generate declarations of linker variables used to determine size of preallocated variables
* stored in memory sections spanning over memory regions.
* These are used to determine memory left for dynamic bounce buffer allocator to work with.
Expand All @@ -42,9 +45,18 @@ struct dmm_region {
};

struct dmm_heap {
struct sys_heap heap;
uint32_t mask[HEAP_NUM_WORDS];
atomic_t tail_mask[HEAP_NUM_WORDS];
uintptr_t ptr;
uintptr_t ptr_end;
size_t blk_size;
const struct dmm_region *region;
sys_bitarray_t bitarray;
#ifdef CONFIG_DMM_STATS
atomic_t curr_use;
uint32_t max_use;
struct k_spinlock lock;
#endif
};

static const struct dmm_region dmm_regions[] = {
Expand All @@ -55,7 +67,6 @@ struct {
struct dmm_heap dmm_heaps[ARRAY_SIZE(dmm_regions)];
} dmm_heaps_data;


static struct dmm_heap *dmm_heap_find(void *region)
{
struct dmm_heap *dh;
Expand Down Expand Up @@ -103,37 +114,154 @@ static bool is_user_buffer_correctly_preallocated(void const *user_buffer, size_
return false;
}

static size_t dmm_heap_start_get(struct dmm_heap *dh)
/* Function updates the tail bits mask after the allocation. Tail bits are all bits
* except the head. Tail bits mask together with a known index of the start of
* chunk (because freeing has a buffer address) allows to determine the size of the
* buffer (how many chunks were included. Because tail_mask is updated after allocation
* we can safely modify bits that represents allocated buffer, we only need to use
* atomic operation on the mask since mask may be modified (but different bits).
*/
static void tail_mask_set(atomic_t *tail_mask, size_t num_bits, size_t off)
{
return ROUND_UP(dh->region->dt_allc, dh->region->dt_align);
size_t tail_bits = num_bits - 1;
size_t tail_off = off + 1;

if (tail_bits == 0) {
return;
}

if (HEAP_NUM_WORDS == 1) {
atomic_or(tail_mask, BIT_MASK(tail_bits) << tail_off);
return;
}

size_t idx = tail_off / 32;
atomic_t *t_mask = &tail_mask[idx];

tail_off = tail_off % 32;
while (tail_bits > 0) {
uint32_t bits = MIN(32 - tail_off, tail_bits);
uint32_t mask = (bits == 32) ? UINT32_MAX : (BIT_MASK(bits) << tail_off);

atomic_or(t_mask, mask);
t_mask++;
tail_off = 0;
tail_bits -= bits;
}
}

static size_t dmm_heap_size_get(struct dmm_heap *dh)
/* Function determines how many chunks were used for the allocated buffer. It is
* determined from tail bits mask and index of the starting chunk (%p off).
* Function is called before bits are freed in the bitarray so we can safely modify
* bits that belong to that buffer.
*
* @param tail_mask Pointer to tail_mask array.
* @param off Index of the start of the buffer.
*
* @return Number of chunks that forms the buffer that will be freed.
*/
static uint32_t num_bits_get(atomic_t *tail_mask, size_t off)
{
return (dh->region->dt_size - (dmm_heap_start_get(dh) - dh->region->dt_addr));
uint32_t num_bits = 1;
size_t tail_off = off + 1;
size_t idx = tail_off / 32;
atomic_t *t_mask = &tail_mask[idx];

tail_off = tail_off % 32;
do {
uint32_t mask = (uint32_t)*t_mask >> tail_off;

if (mask == UINT32_MAX) {
num_bits += 32;
atomic_set(t_mask, 0);
} else {
uint32_t bits = __builtin_ctz(~mask);

if (bits == 0) {
break;
}

num_bits += bits;
atomic_and(t_mask, ~(BIT_MASK(bits) << tail_off));

if (bits + tail_off < 32) {
break;
}

tail_off = 0;
}

t_mask++;
} while ((HEAP_NUM_WORDS > 1) && (t_mask != &tail_mask[HEAP_NUM_WORDS]));

return num_bits;
}

static void *dmm_buffer_alloc(struct dmm_heap *dh, size_t length)
{
void *ret;
k_spinlock_key_t key;
size_t num_bits, off;
int rv;

if (dh->ptr == 0) {
/* Not initialized. */
return NULL;
}

length = ROUND_UP(length, dh->region->dt_align);
num_bits = DIV_ROUND_UP(length, dh->blk_size);

rv = sys_bitarray_alloc(&dh->bitarray, num_bits, &off);
if (rv < 0) {
return NULL;
}

tail_mask_set(dh->tail_mask, num_bits, off);

#ifdef CONFIG_DMM_STATS
k_spinlock_key_t key;

key = k_spin_lock(&dh->lock);
ret = sys_heap_aligned_alloc(&dh->heap, dh->region->dt_align, length);
dh->curr_use += num_bits;
dh->max_use = MAX(dh->max_use, dh->curr_use);
k_spin_unlock(&dh->lock, key);
#endif

return ret;
return (void *)(dh->ptr + dh->blk_size * off);
}

static void dmm_buffer_free(struct dmm_heap *dh, void *buffer)
{
k_spinlock_key_t key;
size_t offset = ((uintptr_t)buffer - dh->ptr) / dh->blk_size;
size_t num_bits = num_bits_get(dh->tail_mask, offset);
int rv;

#ifdef CONFIG_DMM_STATS
atomic_sub(&dh->curr_use, num_bits);
#endif
rv = sys_bitarray_free(&dh->bitarray, num_bits, offset);
(void)rv;
__ASSERT_NO_MSG(rv == 0);
}

key = k_spin_lock(&dh->lock);
sys_heap_free(&dh->heap, buffer);
k_spin_unlock(&dh->lock, key);
static void dmm_memcpy(void *dst, const void *src, size_t len)
{
#define IS_ALIGNED32(x) IS_ALIGNED(x, sizeof(uint32_t))
#define IS_ALIGNED64(x) IS_ALIGNED(x, sizeof(uint64_t))
if (IS_ALIGNED64(len) && IS_ALIGNED64(dst) && IS_ALIGNED64(src)) {
for (uint32_t i = 0; i < len / sizeof(uint64_t); i++) {
((uint64_t *)dst)[i] = ((uint64_t *)src)[i];
}
return;
}

if (IS_ALIGNED32(len) && IS_ALIGNED32(dst) && IS_ALIGNED32(src)) {
for (uint32_t i = 0; i < len / sizeof(uint32_t); i++) {
((uint32_t *)dst)[i] = ((uint32_t *)src)[i];
}
return;
}

memcpy(dst, src, len);
}

int dmm_buffer_out_prepare(void *region, void const *user_buffer, size_t user_length,
Expand Down Expand Up @@ -172,7 +300,7 @@ int dmm_buffer_out_prepare(void *region, void const *user_buffer, size_t user_le
return -ENOMEM;
}
/* - copy user buffer contents into allocated buffer */
memcpy(*buffer_out, user_buffer, user_length);
dmm_memcpy(*buffer_out, user_buffer, user_length);
}

/* Check if device memory region is cacheable
Expand Down Expand Up @@ -201,7 +329,7 @@ int dmm_buffer_out_release(void *region, void *buffer_out)
/* Check if output buffer is contained within memory area
* managed by dynamic memory allocator
*/
if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) {
if (is_buffer_within_region(addr, 0, dh->ptr, dh->ptr_end)) {
/* If yes, free the buffer */
dmm_buffer_free(dh, buffer_out);
}
Expand Down Expand Up @@ -281,14 +409,14 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v
* If no, copy allocated buffer to the user buffer
*/
if (buffer_in != user_buffer) {
memcpy(user_buffer, buffer_in, user_length);
dmm_memcpy(user_buffer, buffer_in, user_length);
}
/* If yes, no action is needed */

/* Check if input buffer is contained within memory area
* managed by dynamic memory allocator
*/
if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) {
if (is_buffer_within_region(addr, user_length, dh->ptr, dh->ptr_end)) {
/* If yes, free the buffer */
dmm_buffer_free(dh, buffer_in);
}
Expand All @@ -297,14 +425,51 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v
return 0;
}

int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, uint32_t *max_use)
{
#ifdef CONFIG_DMM_STATS
struct dmm_heap *dh;

dh = dmm_heap_find(region);
if (dh == NULL) {
return -EINVAL;
}

if (start_addr) {
*start_addr = dh->ptr;
}

if (curr_use) {
*curr_use = (100 * dh->curr_use) / dh->bitarray.num_bits;
}

if (max_use) {
*max_use = (100 * dh->max_use) / dh->bitarray.num_bits;
}

return 0;
#else
return -ENOTSUP;
#endif
}

int dmm_init(void)
{
struct dmm_heap *dh;
int blk_cnt;
int heap_space;

for (size_t idx = 0; idx < ARRAY_SIZE(dmm_regions); idx++) {
dh = &dmm_heaps_data.dmm_heaps[idx];
dh->region = &dmm_regions[idx];
sys_heap_init(&dh->heap, (void *)dmm_heap_start_get(dh), dmm_heap_size_get(dh));
dh->ptr = ROUND_UP(dh->region->dt_allc, dh->region->dt_align);
heap_space = dh->region->dt_size - (dh->ptr - dh->region->dt_addr);
dh->blk_size = ROUND_UP(heap_space / (32 * HEAP_NUM_WORDS), dh->region->dt_align);
blk_cnt = heap_space / dh->blk_size;
dh->ptr_end = dh->ptr + blk_cnt * dh->blk_size;
dh->bitarray.num_bits = blk_cnt;
dh->bitarray.num_bundles = HEAP_NUM_WORDS;
dh->bitarray.bundles = dh->mask;
}

return 0;
Expand Down
31 changes: 29 additions & 2 deletions soc/nordic/common/dmm.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@
* Cache line alignment is required if region is cacheable and data cache is enabled.
*/
#define DMM_REG_ALIGN_SIZE(node_id) \
(DMM_IS_REG_CACHEABLE(node_id) ? CONFIG_DCACHE_LINE_SIZE : sizeof(uint8_t))
(DMM_IS_REG_CACHEABLE(node_id) ? CONFIG_DCACHE_LINE_SIZE : sizeof(uint32_t))

#else

#define DMM_IS_REG_CACHEABLE(node_id) 0
#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint8_t))
#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint32_t))

Check notice on line 44 in soc/nordic/common/dmm.h

View workflow job for this annotation

GitHub Actions / Run compliance checks on patch series (PR)

You may want to run clang-format on this change

soc/nordic/common/dmm.h:44 -#define DMM_REG_ALIGN_SIZE(node_id) \ +#define DMM_REG_ALIGN_SIZE(node_id) \ (DMM_IS_REG_CACHEABLE(node_id) ? CONFIG_DCACHE_LINE_SIZE : sizeof(uint32_t)) #else #define DMM_IS_REG_CACHEABLE(node_id) 0 -#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint32_t)) +#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint32_t))
#endif /* CONFIG_DCACHE */

/* Determine required alignment of the data buffers in memory region
Expand Down Expand Up @@ -163,6 +163,22 @@
*/
int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, void *buffer_in);

/**
* @brief Get statistics.
*
* Must be enabled with CONFIG_DMM_STATS.
*
* @param[in] region DMM memory region.
* @param[out] start_addr Location where starting address of the memory region is set. Can be null.
* @param[out] curr_use Location where current use in percent is written. Can be null.
* @param[out] max_use Location where maximum use in percent is written. Can be null.
*
* @retval 0 on success.
* @retval -EINVAL Invalid region.
* @retval -ENOTSUP Feature is disabled.
*/
int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, uint32_t *max_use);

/**
* @brief Initialize DMM.
*
Expand Down Expand Up @@ -210,6 +226,17 @@
return 0;
}

static ALWAYS_INLINE int dmm_stats_get(void *region, uintptr_t *start_addr,
uint32_t *curr_use, uint32_t *max_use)
{

Check notice on line 231 in soc/nordic/common/dmm.h

View workflow job for this annotation

GitHub Actions / Run compliance checks on patch series (PR)

You may want to run clang-format on this change

soc/nordic/common/dmm.h:231 -static ALWAYS_INLINE int dmm_stats_get(void *region, uintptr_t *start_addr, - uint32_t *curr_use, uint32_t *max_use) +static ALWAYS_INLINE int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, + uint32_t *max_use)
ARG_UNUSED(region);
ARG_UNUSED(start_addr);
ARG_UNUSED(curr_use);
ARG_UNUSED(max_use);

return 0;
}

static ALWAYS_INLINE int dmm_init(void)
{
return 0;
Expand Down
10 changes: 10 additions & 0 deletions tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
/*
* Copyright (c) 2024 Nordic Semiconductor ASA
*
* SPDX-License-Identifier: Apache-2.0
*/

/ {
aliases {
dut-cache = &spi1;
Expand Down Expand Up @@ -52,3 +58,7 @@
pinctrl-1 = <&spi3_sleep_alt>;
pinctrl-names = "default", "sleep";
};

cycle_timer: &timer1 {
status = "okay";
};
Loading
Loading