nrfconnect · nordicjm · Oct 7, 2025 · Sep 1, 2025 · Sep 2, 2025 · Sep 2, 2025
diff --git a/soc/nordic/common/Kconfig b/soc/nordic/common/Kconfig
@@ -48,5 +48,22 @@ source "subsys/logging/Kconfig.template.log_config"
 
 endif # MRAM_LATENCY
 
+if HAS_NORDIC_DMM
+
+config DMM_HEAP_CHUNKS
+	int "Number of chunks in the DMM heap"
+	default 32
+	help
+	  DMM is using a simplified heap which is using 32 bit mask to allocate
+	  required buffer which consists of contiguous chunks. If there are many
+	  small buffers used with DMM it is possible that allocation will fail.
+	  Number of chunks is a trade-off between performance and granularity.
+	  Must be multiply of 32.
+
+config DMM_STATS
+	bool "Usage statistics"
+
+endif # HAS_NORDIC_DMM
+
 rsource "vpr/Kconfig"
 rsource "uicr/Kconfig"
diff --git a/soc/nordic/common/dmm.c b/soc/nordic/common/dmm.c
@@ -6,7 +6,7 @@
 #include <string.h>
 #include <zephyr/cache.h>
 #include <zephyr/kernel.h>
-#include <zephyr/sys/sys_heap.h>
+#include <zephyr/sys/bitarray.h>
 #include <zephyr/mem_mgmt/mem_attr.h>
 #include "dmm.h"
 
@@ -26,6 +26,9 @@
 	 .dt_align = DMM_REG_ALIGN_SIZE(node_id),                                                  \
 	 .dt_allc = &_BUILD_LINKER_END_VAR(node_id)},
 
+#define HEAP_NUM_WORDS (CONFIG_DMM_HEAP_CHUNKS / 32)
+BUILD_ASSERT(IS_ALIGNED(CONFIG_DMM_HEAP_CHUNKS, 32));
+
 /* Generate declarations of linker variables used to determine size of preallocated variables
  * stored in memory sections spanning over memory regions.
  * These are used to determine memory left for dynamic bounce buffer allocator to work with.
@@ -42,9 +45,18 @@ struct dmm_region {
 };
 
 struct dmm_heap {
-	struct sys_heap heap;
+	uint32_t mask[HEAP_NUM_WORDS];
+	atomic_t tail_mask[HEAP_NUM_WORDS];
+	uintptr_t ptr;
+	uintptr_t ptr_end;
+	size_t blk_size;
 	const struct dmm_region *region;
+	sys_bitarray_t bitarray;
+#ifdef CONFIG_DMM_STATS
+	atomic_t curr_use;
+	uint32_t max_use;
 	struct k_spinlock lock;
+#endif
 };
 
 static const struct dmm_region dmm_regions[] = {
@@ -55,7 +67,6 @@ struct {
 	struct dmm_heap dmm_heaps[ARRAY_SIZE(dmm_regions)];
 } dmm_heaps_data;
 
-
 static struct dmm_heap *dmm_heap_find(void *region)
 {
 	struct dmm_heap *dh;
@@ -103,37 +114,154 @@ static bool is_user_buffer_correctly_preallocated(void const *user_buffer, size_
 	return false;
 }
 
-static size_t dmm_heap_start_get(struct dmm_heap *dh)
+/* Function updates the tail bits mask after the allocation. Tail bits are all bits
+ * except the head. Tail bits mask together with a known index of the start of
+ * chunk (because freeing has a buffer address) allows to determine the size of the
+ * buffer (how many chunks were included. Because tail_mask is updated after allocation
+ * we can safely modify bits that represents allocated buffer, we only need to use
+ * atomic operation on the mask since mask may be modified (but different bits).
+ */
+static void tail_mask_set(atomic_t *tail_mask, size_t num_bits, size_t off)
 {
-	return ROUND_UP(dh->region->dt_allc, dh->region->dt_align);
+	size_t tail_bits = num_bits - 1;
+	size_t tail_off = off + 1;
+
+	if (tail_bits == 0) {
+		return;
+	}
+
+	if (HEAP_NUM_WORDS == 1) {
+		atomic_or(tail_mask, BIT_MASK(tail_bits) << tail_off);
+		return;
+	}
+
+	size_t idx = tail_off / 32;
+	atomic_t *t_mask = &tail_mask[idx];
+
+	tail_off = tail_off % 32;
+	while (tail_bits > 0) {
+		uint32_t bits = MIN(32 - tail_off, tail_bits);
+		uint32_t mask = (bits == 32) ? UINT32_MAX : (BIT_MASK(bits) << tail_off);
+
+		atomic_or(t_mask, mask);
+		t_mask++;
+		tail_off = 0;
+		tail_bits -= bits;
+	}
 }
 
-static size_t dmm_heap_size_get(struct dmm_heap *dh)
+/* Function determines how many chunks were used for the allocated buffer. It is
+ * determined from tail bits mask and index of the starting chunk (%p off).
+ * Function is called before bits are freed in the bitarray so we can safely modify
+ * bits that belong to that buffer.
+ *
+ * @param tail_mask Pointer to tail_mask array.
+ * @param off Index of the start of the buffer.
+ *
+ * @return Number of chunks that forms the buffer that will be freed.
+ */
+static uint32_t num_bits_get(atomic_t *tail_mask, size_t off)
 {
-	return (dh->region->dt_size - (dmm_heap_start_get(dh) - dh->region->dt_addr));
+	uint32_t num_bits = 1;
+	size_t tail_off = off + 1;
+	size_t idx = tail_off / 32;
+	atomic_t *t_mask = &tail_mask[idx];
+
+	tail_off = tail_off % 32;
+	do {
+		uint32_t mask = (uint32_t)*t_mask >> tail_off;
+
+		if (mask == UINT32_MAX) {
+			num_bits += 32;
+			atomic_set(t_mask, 0);
+		} else {
+			uint32_t bits = __builtin_ctz(~mask);
+
+			if (bits == 0) {
+				break;
+			}
+
+			num_bits += bits;
+			atomic_and(t_mask, ~(BIT_MASK(bits) << tail_off));
+
+			if (bits + tail_off < 32) {
+				break;
+			}
+
+			tail_off = 0;
+		}
+
+		t_mask++;
+	} while ((HEAP_NUM_WORDS > 1) && (t_mask != &tail_mask[HEAP_NUM_WORDS]));
+
+	return num_bits;
 }
 
 static void *dmm_buffer_alloc(struct dmm_heap *dh, size_t length)
 {
-	void *ret;
-	k_spinlock_key_t key;
+	size_t num_bits, off;
+	int rv;
+
+	if (dh->ptr == 0) {
+		/* Not initialized. */
+		return NULL;
+	}
 
 	length = ROUND_UP(length, dh->region->dt_align);
+	num_bits = DIV_ROUND_UP(length, dh->blk_size);
+
+	rv = sys_bitarray_alloc(&dh->bitarray, num_bits, &off);
+	if (rv < 0) {
+		return NULL;
+	}
+
+	tail_mask_set(dh->tail_mask, num_bits, off);
+
+#ifdef CONFIG_DMM_STATS
+	k_spinlock_key_t key;
 
 	key = k_spin_lock(&dh->lock);
-	ret = sys_heap_aligned_alloc(&dh->heap, dh->region->dt_align, length);
+	dh->curr_use += num_bits;
+	dh->max_use = MAX(dh->max_use, dh->curr_use);
 	k_spin_unlock(&dh->lock, key);
+#endif
 
-	return ret;
+	return (void *)(dh->ptr + dh->blk_size * off);
 }
 
 static void dmm_buffer_free(struct dmm_heap *dh, void *buffer)
 {
-	k_spinlock_key_t key;
+	size_t offset = ((uintptr_t)buffer - dh->ptr) / dh->blk_size;
+	size_t num_bits = num_bits_get(dh->tail_mask, offset);
+	int rv;
+
+#ifdef CONFIG_DMM_STATS
+	atomic_sub(&dh->curr_use, num_bits);
+#endif
+	rv = sys_bitarray_free(&dh->bitarray, num_bits, offset);
+	(void)rv;
+	__ASSERT_NO_MSG(rv == 0);
+}
 
-	key = k_spin_lock(&dh->lock);
-	sys_heap_free(&dh->heap, buffer);
-	k_spin_unlock(&dh->lock, key);
+static void dmm_memcpy(void *dst, const void *src, size_t len)
+{
+#define IS_ALIGNED32(x) IS_ALIGNED(x, sizeof(uint32_t))
+#define IS_ALIGNED64(x) IS_ALIGNED(x, sizeof(uint64_t))
+	if (IS_ALIGNED64(len) && IS_ALIGNED64(dst) && IS_ALIGNED64(src)) {
+		for (uint32_t i = 0; i < len / sizeof(uint64_t); i++) {
+			((uint64_t *)dst)[i] = ((uint64_t *)src)[i];
+		}
+		return;
+	}
+
+	if (IS_ALIGNED32(len) && IS_ALIGNED32(dst) && IS_ALIGNED32(src)) {
+		for (uint32_t i = 0; i < len / sizeof(uint32_t); i++) {
+			((uint32_t *)dst)[i] = ((uint32_t *)src)[i];
+		}
+		return;
+	}
+
+	memcpy(dst, src, len);
 }
 
 int dmm_buffer_out_prepare(void *region, void const *user_buffer, size_t user_length,
@@ -172,7 +300,7 @@ int dmm_buffer_out_prepare(void *region, void const *user_buffer, size_t user_le
 			return -ENOMEM;
 		}
 		/* - copy user buffer contents into allocated buffer */
-		memcpy(*buffer_out, user_buffer, user_length);
+		dmm_memcpy(*buffer_out, user_buffer, user_length);
 	}
 
 	/* Check if device memory region is cacheable
@@ -201,7 +329,7 @@ int dmm_buffer_out_release(void *region, void *buffer_out)
 	/* Check if output buffer is contained within memory area
 	 * managed by dynamic memory allocator
 	 */
-	if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) {
+	if (is_buffer_within_region(addr, 0, dh->ptr, dh->ptr_end)) {
 		/* If yes, free the buffer */
 		dmm_buffer_free(dh, buffer_out);
 	}
@@ -281,14 +409,14 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v
 	 * If no, copy allocated buffer to the user buffer
 	 */
 	if (buffer_in != user_buffer) {
-		memcpy(user_buffer, buffer_in, user_length);
+		dmm_memcpy(user_buffer, buffer_in, user_length);
 	}
 	/* If yes, no action is needed */
 
 	/* Check if input buffer is contained within memory area
 	 * managed by dynamic memory allocator
 	 */
-	if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) {
+	if (is_buffer_within_region(addr, user_length, dh->ptr, dh->ptr_end)) {
 		/* If yes, free the buffer */
 		dmm_buffer_free(dh, buffer_in);
 	}
@@ -297,14 +425,51 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v
 	return 0;
 }
 
+int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, uint32_t *max_use)
+{
+#ifdef CONFIG_DMM_STATS
+	struct dmm_heap *dh;
+
+	dh = dmm_heap_find(region);
+	if (dh == NULL) {
+		return -EINVAL;
+	}
+
+	if (start_addr) {
+		*start_addr = dh->ptr;
+	}
+
+	if (curr_use) {
+		*curr_use = (100 * dh->curr_use) / dh->bitarray.num_bits;
+	}
+
+	if (max_use) {
+		*max_use = (100 * dh->max_use) / dh->bitarray.num_bits;
+	}
+
+	return 0;
+#else
+	return -ENOTSUP;
+#endif
+}
+
 int dmm_init(void)
 {
 	struct dmm_heap *dh;
+	int blk_cnt;
+	int heap_space;
 
 	for (size_t idx = 0; idx < ARRAY_SIZE(dmm_regions); idx++) {
 		dh = &dmm_heaps_data.dmm_heaps[idx];
 		dh->region = &dmm_regions[idx];
-		sys_heap_init(&dh->heap, (void *)dmm_heap_start_get(dh), dmm_heap_size_get(dh));
+		dh->ptr = ROUND_UP(dh->region->dt_allc, dh->region->dt_align);
+		heap_space = dh->region->dt_size - (dh->ptr - dh->region->dt_addr);
+		dh->blk_size = ROUND_UP(heap_space / (32 * HEAP_NUM_WORDS), dh->region->dt_align);
+		blk_cnt = heap_space / dh->blk_size;
+		dh->ptr_end = dh->ptr + blk_cnt * dh->blk_size;
+		dh->bitarray.num_bits = blk_cnt;
+		dh->bitarray.num_bundles = HEAP_NUM_WORDS;
+		dh->bitarray.bundles = dh->mask;
 	}
 
 	return 0;

diff --git a/soc/nordic/common/dmm.h b/soc/nordic/common/dmm.h
@@ -35,13 +35,13 @@
  * Cache line alignment is required if region is cacheable and data cache is enabled.
  */
 #define DMM_REG_ALIGN_SIZE(node_id) \
-	(DMM_IS_REG_CACHEABLE(node_id) ? CONFIG_DCACHE_LINE_SIZE : sizeof(uint8_t))
+	(DMM_IS_REG_CACHEABLE(node_id) ? CONFIG_DCACHE_LINE_SIZE : sizeof(uint32_t))
 
 #else
 
 #define DMM_IS_REG_CACHEABLE(node_id) 0
-#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint8_t))
+#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint32_t))
 
 #endif /* CONFIG_DCACHE */
 
 /* Determine required alignment of the data buffers in memory region
@@ -163,6 +163,22 @@
  */
 int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, void *buffer_in);
 
+/**
+ * @brief Get statistics.
+ *
+ * Must be enabled with CONFIG_DMM_STATS.
+ *
+ * @param[in] region DMM memory region.
+ * @param[out] start_addr Location where starting address of the memory region is set. Can be null.
+ * @param[out] curr_use Location where current use in percent is written. Can be null.
+ * @param[out] max_use Location where maximum use in percent is written. Can be null.
+ *
+ * @retval 0 on success.
+ * @retval -EINVAL Invalid region.
+ * @retval -ENOTSUP Feature is disabled.
+ */
+int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, uint32_t *max_use);
+
 /**
  * @brief Initialize DMM.
  *
@@ -210,6 +226,17 @@
 	return 0;
 }
 
+static ALWAYS_INLINE int dmm_stats_get(void *region, uintptr_t *start_addr,
+				       uint32_t *curr_use, uint32_t *max_use)
+{
+	ARG_UNUSED(region);
+	ARG_UNUSED(start_addr);
+	ARG_UNUSED(curr_use);
+	ARG_UNUSED(max_use);
+
+	return 0;
+}
+
 static ALWAYS_INLINE int dmm_init(void)
 {
 	return 0;

diff --git a/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay b/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay
@@ -1,3 +1,9 @@
+/*
+ * Copyright (c) 2024 Nordic Semiconductor ASA
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
 / {
 	aliases {
 		dut-cache = &spi1;
@@ -52,3 +58,7 @@
 	pinctrl-1 = <&spi3_sleep_alt>;
 	pinctrl-names = "default", "sleep";
 };
+
+cycle_timer: &timer1 {
+	status = "okay";
+};