From a1333053a8f806780acbbd9312c69d689a5b0c5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Chru=C5=9Bci=C5=84ski?= Date: Mon, 1 Sep 2025 15:57:55 +0200 Subject: [PATCH 1/9] [nrf fromtree] tests: boards: nrf: dmm: Add timing measurements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add timing measurement to the test to allow DMM profiling. Signed-off-by: Krzysztof Chruściński (cherry picked from commit f06e05097f1b2b0f92a20630b93fc504c6ec7e6f) --- .../boards/nrf5340dk_nrf5340_cpuapp.overlay | 10 ++ .../boards/nrf54h20dk_nrf54h20_cpuapp.overlay | 10 ++ tests/boards/nrf/dmm/prj.conf | 4 + tests/boards/nrf/dmm/src/main.c | 153 ++++++++++++++++-- 4 files changed, 162 insertions(+), 15 deletions(-) diff --git a/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay b/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay index 3e0b1b4d535..48a4e8adc26 100644 --- a/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay +++ b/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2024 Nordic Semiconductor ASA + * + * SPDX-License-Identifier: Apache-2.0 + */ + / { aliases { dut-cache = &spi1; @@ -52,3 +58,7 @@ pinctrl-1 = <&spi3_sleep_alt>; pinctrl-names = "default", "sleep"; }; + +cycle_timer: &timer1 { + status = "okay"; +}; diff --git a/tests/boards/nrf/dmm/boards/nrf54h20dk_nrf54h20_cpuapp.overlay b/tests/boards/nrf/dmm/boards/nrf54h20dk_nrf54h20_cpuapp.overlay index e3924657b86..2507dd83dfe 100644 --- a/tests/boards/nrf/dmm/boards/nrf54h20dk_nrf54h20_cpuapp.overlay +++ b/tests/boards/nrf/dmm/boards/nrf54h20dk_nrf54h20_cpuapp.overlay @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2024 Nordic Semiconductor ASA + * + * SPDX-License-Identifier: Apache-2.0 + */ + / { aliases { dut-cache = &spi120; @@ -58,3 +64,7 @@ pinctrl-names = "default", "sleep"; memory-regions = <&dma_fast_region>; }; + +cycle_timer: &timer120 { + status = "okay"; +}; diff --git a/tests/boards/nrf/dmm/prj.conf b/tests/boards/nrf/dmm/prj.conf index 9467c292689..0b99d72b0c3 100644 --- a/tests/boards/nrf/dmm/prj.conf +++ b/tests/boards/nrf/dmm/prj.conf @@ -1 +1,5 @@ CONFIG_ZTEST=y +CONFIG_ASSERT=n +CONFIG_SPIN_VALIDATE=n +CONFIG_TEST_EXTRA_STACK_SIZE=512 +CONFIG_COUNTER=y diff --git a/tests/boards/nrf/dmm/src/main.c b/tests/boards/nrf/dmm/src/main.c index 214a9069752..06f4ace0e60 100644 --- a/tests/boards/nrf/dmm/src/main.c +++ b/tests/boards/nrf/dmm/src/main.c @@ -9,9 +9,12 @@ #include #include #include +#include #include +#define IS_ALIGNED64(x) IS_ALIGNED(x, sizeof(uint64_t)) + #define DUT_CACHE DT_ALIAS(dut_cache) #define DUT_NOCACHE DT_ALIAS(dut_nocache) @@ -57,13 +60,49 @@ static const struct dmm_test_region dmm_test_regions[DMM_TEST_REGION_COUNT] = { .size = DMM_TEST_GET_REG_SIZE(DUT_NOCACHE) }, }; +static const struct device *counter = DEVICE_DT_GET(DT_NODELABEL(cycle_timer)); +static uint32_t t_delta; + +static uint32_t ts_get(void) +{ + uint32_t t; + + (void)counter_get_value(counter, &t); + return t; +} + +static uint32_t ts_from_get(uint32_t from) +{ + return ts_get() - from; +} + +static uint32_t cyc_to_us(uint32_t cyc) +{ + return counter_ticks_to_us(counter, cyc); +} + +static uint32_t cyc_to_rem_ns(uint32_t cyc) +{ + uint32_t us = counter_ticks_to_us(counter, cyc); + uint32_t ns; + + cyc = cyc - counter_us_to_ticks(counter, (uint64_t)us); + ns = counter_ticks_to_us(counter, 1000 * cyc); + + return ns; +} static void *test_setup(void) { static struct dmm_fixture fixture; + uint32_t t; + counter_start(counter); + t = ts_get(); + t_delta = ts_get() - t; memcpy(fixture.regions, dmm_test_regions, sizeof(dmm_test_regions)); fixture.fill_value = 0x1; + return &fixture; } @@ -79,13 +118,25 @@ static bool dmm_buffer_in_region_check(struct dmm_test_region *dtr, void *buf, s } static void dmm_check_output_buffer(struct dmm_test_region *dtr, uint32_t *fill_value, - void *data, size_t size, bool was_prealloc, bool is_cached) + void *data, size_t size, bool was_prealloc, + bool is_cached, bool print_report) { void *buf; int retval; + uint32_t t; + bool aligned; memset(data, (*fill_value)++, size); + t = ts_get(); retval = dmm_buffer_out_prepare(dtr->mem_reg, data, size, &buf); + t = ts_from_get(t); + aligned = IS_ALIGNED64(data) && IS_ALIGNED64(buf) && IS_ALIGNED64(size); + + if (print_report) { + TC_PRINT("%saligned buffer out prepare size:%d buf:%p took %d.%dus (%d cycles)\n", + aligned ? "" : "not ", size, buf, cyc_to_us(t), cyc_to_rem_ns(t), t); + } + zassert_ok(retval); if (IS_ENABLED(CONFIG_DCACHE) && is_cached) { zassert_true(IS_ALIGNED(buf, CONFIG_DCACHE_LINE_SIZE)); @@ -104,21 +155,37 @@ static void dmm_check_output_buffer(struct dmm_test_region *dtr, uint32_t *fill_ sys_cache_data_invd_range(buf, size); zassert_mem_equal(buf, data, size); + t = ts_get(); retval = dmm_buffer_out_release(dtr->mem_reg, buf); + t = ts_from_get(t); + if (print_report) { + TC_PRINT("buffer out release buf:%p size:%d took %d.%dus (%d cycles)\n", + buf, size, cyc_to_us(t), cyc_to_rem_ns(t), t); + } zassert_ok(retval); } static void dmm_check_input_buffer(struct dmm_test_region *dtr, uint32_t *fill_value, - void *data, size_t size, bool was_prealloc, bool is_cached) + void *data, size_t size, bool was_prealloc, + bool is_cached, bool print_report) { void *buf; int retval; + uint32_t t; uint8_t intermediate_buf[128]; + bool aligned; - zassert_true(size < sizeof(intermediate_buf)); + zassert_true(size <= sizeof(intermediate_buf)); + t = ts_get(); retval = dmm_buffer_in_prepare(dtr->mem_reg, data, size, &buf); + t = ts_from_get(t); + aligned = IS_ALIGNED64(data) && IS_ALIGNED64(buf) && IS_ALIGNED64(size); zassert_ok(retval); + if (print_report) { + TC_PRINT("%saligned buffer in prepare buf:%p size:%d took %d.%dus (%d cycles)\n", + aligned ? "" : "not ", buf, size, cyc_to_us(t), cyc_to_rem_ns(t), t); + } if (IS_ENABLED(CONFIG_DCACHE) && is_cached) { zassert_true(IS_ALIGNED(buf, CONFIG_DCACHE_LINE_SIZE)); } @@ -144,7 +211,13 @@ static void dmm_check_input_buffer(struct dmm_test_region *dtr, uint32_t *fill_v memset(buf, (*fill_value)++, size); } + t = ts_get(); retval = dmm_buffer_in_release(dtr->mem_reg, data, size, buf); + t = ts_from_get(t); + if (print_report) { + TC_PRINT("buffer in release buf:%p size:%d took %d.%dus (%d cycles)\n", + buf, size, cyc_to_us(t), cyc_to_rem_ns(t), t); + } zassert_ok(retval); zassert_mem_equal(data, intermediate_buf, size); @@ -152,10 +225,14 @@ static void dmm_check_input_buffer(struct dmm_test_region *dtr, uint32_t *fill_v ZTEST_USER_F(dmm, test_check_dev_cache_in_allocate) { - uint8_t user_data[16]; + uint8_t user_data[128] __aligned(sizeof(uint64_t)); dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, - user_data, sizeof(user_data), false, true); + user_data, 16, false, true, false); + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + user_data, 16, false, true, true); + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + user_data, sizeof(user_data), false, true, true); } ZTEST_USER_F(dmm, test_check_dev_cache_in_preallocate) @@ -163,15 +240,30 @@ ZTEST_USER_F(dmm, test_check_dev_cache_in_preallocate) static uint8_t user_data[16] DMM_MEMORY_SECTION(DUT_CACHE); dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, - user_data, sizeof(user_data), true, true); + user_data, sizeof(user_data), true, true, true); } ZTEST_USER_F(dmm, test_check_dev_cache_out_allocate) { - uint8_t user_data[16]; + uint8_t user_data[129] __aligned(sizeof(uint64_t)); + + /* First run to get code into ICACHE so that following runs has consistent timing. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + user_data, 16, false, true, false); + /* Aligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + user_data, 16, false, true, true); + /* Unaligned user buffer. */ dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, - user_data, sizeof(user_data), false, true); + &user_data[1], 16, false, true, true); + + /* Aligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + user_data, sizeof(user_data) - 1, false, true, true); + /* Unaligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + &user_data[1], sizeof(user_data) - 1, false, true, true); } ZTEST_USER_F(dmm, test_check_dev_cache_out_preallocate) @@ -179,15 +271,31 @@ ZTEST_USER_F(dmm, test_check_dev_cache_out_preallocate) static uint8_t user_data[16] DMM_MEMORY_SECTION(DUT_CACHE); dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, - user_data, sizeof(user_data), true, true); + user_data, sizeof(user_data), true, true, true); } ZTEST_USER_F(dmm, test_check_dev_nocache_in_allocate) { - uint8_t user_data[16]; + uint8_t user_data[129] __aligned(sizeof(uint64_t)); + + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, 16, false, false, false); + + /* Aligned user buffer. */ + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, 16, false, false, true); + + /* Unaligned user buffer. */ + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + &user_data[1], 16, false, false, true); + /* Aligned user buffer. */ dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, - user_data, sizeof(user_data), false, false); + user_data, sizeof(user_data) - 1, false, false, true); + + /* Unaligned user buffer. */ + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + &user_data[1], sizeof(user_data) - 1, false, false, true); } ZTEST_USER_F(dmm, test_check_dev_nocache_in_preallocate) @@ -195,15 +303,30 @@ ZTEST_USER_F(dmm, test_check_dev_nocache_in_preallocate) static uint8_t user_data[16] DMM_MEMORY_SECTION(DUT_NOCACHE); dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, - user_data, sizeof(user_data), true, false); + user_data, sizeof(user_data), true, false, true); } ZTEST_USER_F(dmm, test_check_dev_nocache_out_allocate) { - uint8_t user_data[16]; + uint8_t user_data[129] __aligned(sizeof(uint64_t)); + /* First run to get code into ICACHE so that following results are consistent. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, 16, false, false, false); + + /* Aligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, 16, false, false, true); + /* Unaligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + &user_data[1], 16, false, false, true); + + /* Aligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, sizeof(user_data) - 1, false, false, true); + /* Unaligned user buffer. */ dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, - user_data, sizeof(user_data), false, false); + &user_data[1], sizeof(user_data) - 1, false, false, true); } ZTEST_USER_F(dmm, test_check_dev_nocache_out_preallocate) @@ -211,7 +334,7 @@ ZTEST_USER_F(dmm, test_check_dev_nocache_out_preallocate) static uint8_t user_data[16] DMM_MEMORY_SECTION(DUT_NOCACHE); dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, - user_data, sizeof(user_data), true, false); + user_data, sizeof(user_data), true, false, true); } ZTEST_SUITE(dmm, NULL, test_setup, NULL, test_cleanup, NULL); From 42daed9aba76526e21dca56074c995a9dda46ad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Chru=C5=9Bci=C5=84ski?= Date: Tue, 2 Sep 2025 07:57:29 +0200 Subject: [PATCH 2/9] [nrf fromtree] soc: nordic: common: dmm: Optimize memcpy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Default memcpy used in zephyr is not optimized and performs simple byte by byte copying. Using double word or word access can significantly reduce copying time especially for RAM3 (slow peripheral RAM). Signed-off-by: Krzysztof Chruściński (cherry picked from commit ff3e0180adcf65ce62d365167c8cf7c5144fe692) --- soc/nordic/common/dmm.c | 25 +++++++++++++++++++++++-- soc/nordic/common/dmm.h | 4 ++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/soc/nordic/common/dmm.c b/soc/nordic/common/dmm.c index 0b4e42f8c6d..78b43e7a4b9 100644 --- a/soc/nordic/common/dmm.c +++ b/soc/nordic/common/dmm.c @@ -136,6 +136,27 @@ static void dmm_buffer_free(struct dmm_heap *dh, void *buffer) k_spin_unlock(&dh->lock, key); } +static void dmm_memcpy(void *dst, const void *src, size_t len) +{ +#define IS_ALIGNED32(x) IS_ALIGNED(x, sizeof(uint32_t)) +#define IS_ALIGNED64(x) IS_ALIGNED(x, sizeof(uint64_t)) + if (IS_ALIGNED64(len) && IS_ALIGNED64(dst) && IS_ALIGNED64(src)) { + for (uint32_t i = 0; i < len / sizeof(uint64_t); i++) { + ((uint64_t *)dst)[i] = ((uint64_t *)src)[i]; + } + return; + } + + if (IS_ALIGNED32(len) && IS_ALIGNED32(dst) && IS_ALIGNED32(src)) { + for (uint32_t i = 0; i < len / sizeof(uint32_t); i++) { + ((uint32_t *)dst)[i] = ((uint32_t *)src)[i]; + } + return; + } + + memcpy(dst, src, len); +} + int dmm_buffer_out_prepare(void *region, void const *user_buffer, size_t user_length, void **buffer_out) { @@ -172,7 +193,7 @@ int dmm_buffer_out_prepare(void *region, void const *user_buffer, size_t user_le return -ENOMEM; } /* - copy user buffer contents into allocated buffer */ - memcpy(*buffer_out, user_buffer, user_length); + dmm_memcpy(*buffer_out, user_buffer, user_length); } /* Check if device memory region is cacheable @@ -281,7 +302,7 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v * If no, copy allocated buffer to the user buffer */ if (buffer_in != user_buffer) { - memcpy(user_buffer, buffer_in, user_length); + dmm_memcpy(user_buffer, buffer_in, user_length); } /* If yes, no action is needed */ diff --git a/soc/nordic/common/dmm.h b/soc/nordic/common/dmm.h index 34b517c92df..ca627fbd55f 100644 --- a/soc/nordic/common/dmm.h +++ b/soc/nordic/common/dmm.h @@ -35,12 +35,12 @@ extern "C" { * Cache line alignment is required if region is cacheable and data cache is enabled. */ #define DMM_REG_ALIGN_SIZE(node_id) \ - (DMM_IS_REG_CACHEABLE(node_id) ? CONFIG_DCACHE_LINE_SIZE : sizeof(uint8_t)) + (DMM_IS_REG_CACHEABLE(node_id) ? CONFIG_DCACHE_LINE_SIZE : sizeof(uint32_t)) #else #define DMM_IS_REG_CACHEABLE(node_id) 0 -#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint8_t)) +#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint32_t)) #endif /* CONFIG_DCACHE */ From 4a54f1335f0e1d315fae240a40975885ef621125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Chru=C5=9Bci=C5=84ski?= Date: Tue, 2 Sep 2025 07:57:53 +0200 Subject: [PATCH 3/9] [nrf fromtree] soc: nordic: common: dmm: Optimize by using a micro heap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add micro heap implementation which is using one or more 32 bit masks to allocate quickly blocks. It is significantly better than using sys_heap. Difference is especially big on RAM3 heap because heap control data is in RAM3 space so operations there were extremely slowly (15 us to allocate a buffer). Simplified implementation of the heap requires DMM API change as release functions need to know the length of the allocated buffer as simple heap requires that (buffer address is enough for the standard heap). Signed-off-by: Krzysztof Chruściński (cherry picked from commit decdb30b05fdfcae677f5f98872b84afbb80fdb9) --- soc/nordic/common/Kconfig | 14 ++++ soc/nordic/common/dmm.c | 164 +++++++++++++++++++++++++++++++++----- 2 files changed, 157 insertions(+), 21 deletions(-) diff --git a/soc/nordic/common/Kconfig b/soc/nordic/common/Kconfig index 782d9452b67..b3a8f13089d 100644 --- a/soc/nordic/common/Kconfig +++ b/soc/nordic/common/Kconfig @@ -48,5 +48,19 @@ source "subsys/logging/Kconfig.template.log_config" endif # MRAM_LATENCY +if HAS_NORDIC_DMM + +config DMM_HEAP_CHUNKS + int "Number of chunks in the DMM heap" + default 32 + help + DMM is using a simplified heap which is using 32 bit mask to allocate + required buffer which consists of contiguous chunks. If there are many + small buffers used with DMM it is possible that allocation will fail. + Number of chunks is a trade-off between performance and granularity. + Must be multiply of 32. + +endif # HAS_NORDIC_DMM + rsource "vpr/Kconfig" rsource "uicr/Kconfig" diff --git a/soc/nordic/common/dmm.c b/soc/nordic/common/dmm.c index 78b43e7a4b9..411b8be14a7 100644 --- a/soc/nordic/common/dmm.c +++ b/soc/nordic/common/dmm.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include "dmm.h" @@ -26,6 +26,9 @@ .dt_align = DMM_REG_ALIGN_SIZE(node_id), \ .dt_allc = &_BUILD_LINKER_END_VAR(node_id)}, +#define HEAP_NUM_WORDS (CONFIG_DMM_HEAP_CHUNKS / 32) +BUILD_ASSERT(IS_ALIGNED(CONFIG_DMM_HEAP_CHUNKS, 32)); + /* Generate declarations of linker variables used to determine size of preallocated variables * stored in memory sections spanning over memory regions. * These are used to determine memory left for dynamic bounce buffer allocator to work with. @@ -42,9 +45,13 @@ struct dmm_region { }; struct dmm_heap { - struct sys_heap heap; + uint32_t mask[HEAP_NUM_WORDS]; + atomic_t tail_mask[HEAP_NUM_WORDS]; + uintptr_t ptr; + uintptr_t ptr_end; + size_t blk_size; const struct dmm_region *region; - struct k_spinlock lock; + sys_bitarray_t bitarray; }; static const struct dmm_region dmm_regions[] = { @@ -55,7 +62,6 @@ struct { struct dmm_heap dmm_heaps[ARRAY_SIZE(dmm_regions)]; } dmm_heaps_data; - static struct dmm_heap *dmm_heap_find(void *region) { struct dmm_heap *dh; @@ -103,37 +109,144 @@ static bool is_user_buffer_correctly_preallocated(void const *user_buffer, size_ return false; } -static size_t dmm_heap_start_get(struct dmm_heap *dh) +/* Function updates the tail bits mask after the allocation. Tail bits are all bits + * except the head. Tail bits mask together with a known index of the start of + * chunk (because freeing has a buffer address) allows to determine the size of the + * buffer (how many chunks were included. Because tail_mask is updated after allocation + * we can safely modify bits that represents allocated buffer, we only need to use + * atomic operation on the mask since mask may be modified (but different bits). + */ +static void tail_mask_set(atomic_t *tail_mask, size_t num_bits, size_t off) { - return ROUND_UP(dh->region->dt_allc, dh->region->dt_align); + size_t tail_bits = num_bits - 1; + size_t tail_off = off + 1; + + if (tail_bits == 0) { + return; + } + + if (HEAP_NUM_WORDS == 1) { + atomic_or(tail_mask, BIT_MASK(tail_bits) << tail_off); + return; + } + + /* If bit mask exceeds a single word then tail may spill to the adjacent word. */ + size_t idx = tail_off / 32; + + tail_off = tail_off - 32 * idx; + if ((tail_off + tail_bits) <= 32) { + /* Tail mask fits in a single word. */ + atomic_or(&tail_mask[idx], BIT_MASK(tail_bits) << tail_off); + return; + } + + /* Tail spilled. Remainder is set in the next word. Since number of tail_masks + * match number of words in bitarray we don't need to check if we are exceeding + * the array boundary. + */ + atomic_or(&tail_mask[idx], BIT_MASK(32 - tail_off) << tail_off); + + + size_t rem_tail = tail_bits - (32 - tail_off); + atomic_t *mask = &tail_mask[idx + 1]; + + while (rem_tail >= 32) { + atomic_or(mask, UINT32_MAX); + mask++; + rem_tail -= 32; + } + atomic_or(mask, BIT_MASK(rem_tail)); } -static size_t dmm_heap_size_get(struct dmm_heap *dh) +/* Function determines how many chunks were used for the allocated buffer. It is + * determined from tail bits mask and index of the starting chunk (%p off). + * Function is called before bits are freed in the bitarray so we can safely modify + * bits that belong to that buffer. + * + * @param tail_mask Pointer to tail_mask array. + * @param off Index of the start of the buffer. + * + * @return Number of chunks that forms the buffer that will be freed. + */ +static uint32_t num_bits_get(atomic_t *tail_mask, size_t off) { - return (dh->region->dt_size - (dmm_heap_start_get(dh) - dh->region->dt_addr)); + uint32_t mask; + uint32_t num_bits; + + if (HEAP_NUM_WORDS == 1) { + mask = (*tail_mask | BIT(off)) >> off; + num_bits = (~mask == 0) ? 32 : __builtin_ctz(~mask); + if (num_bits > 1) { + mask = BIT_MASK(num_bits - 1) << (off + 1); + atomic_and(tail_mask, ~mask); + } + + return num_bits; + } + + /* In multiword bit array we need to check if tail is spilling over to the next word. */ + size_t idx = off / 32; + size_t w_off = off - 32 * idx; + atomic_t *t_mask = &tail_mask[idx]; + + mask = (*t_mask | BIT(w_off)) >> w_off; + num_bits = (~mask == 0) ? 32 : __builtin_ctz(~mask); + if (num_bits == 1) { + return num_bits; + } + + mask = BIT_MASK(num_bits - 1) << (w_off + 1); + atomic_and(t_mask, ~mask); + if (((w_off + num_bits) == 32) && (idx < (HEAP_NUM_WORDS - 1))) { + size_t tmp_bits; + + /* If we are at the end of the one mask we need to check the beginning of the + * next one as there might be remaining part of the tail. + */ + do { + t_mask++; + tmp_bits = (*t_mask == UINT32_MAX) ? 32 : __builtin_ctz(~(*t_mask)); + mask = (tmp_bits == 32) ? UINT32_MAX : BIT_MASK(tmp_bits); + atomic_and(t_mask, ~mask); + num_bits += tmp_bits; + } while ((tmp_bits == 32) && (t_mask != &tail_mask[HEAP_NUM_WORDS - 1])); + } + + return num_bits; } static void *dmm_buffer_alloc(struct dmm_heap *dh, size_t length) { - void *ret; - k_spinlock_key_t key; + size_t num_bits, off; + int rv; + + if (dh->ptr == 0) { + /* Not initialized. */ + return NULL; + } length = ROUND_UP(length, dh->region->dt_align); + num_bits = DIV_ROUND_UP(length, dh->blk_size); + + rv = sys_bitarray_alloc(&dh->bitarray, num_bits, &off); + if (rv < 0) { + return NULL; + } - key = k_spin_lock(&dh->lock); - ret = sys_heap_aligned_alloc(&dh->heap, dh->region->dt_align, length); - k_spin_unlock(&dh->lock, key); + tail_mask_set(dh->tail_mask, num_bits, off); - return ret; + return (void *)(dh->ptr + dh->blk_size * off); } static void dmm_buffer_free(struct dmm_heap *dh, void *buffer) { - k_spinlock_key_t key; + size_t offset = ((uintptr_t)buffer - dh->ptr) / dh->blk_size; + size_t num_bits = num_bits_get(dh->tail_mask, offset); + int rv; - key = k_spin_lock(&dh->lock); - sys_heap_free(&dh->heap, buffer); - k_spin_unlock(&dh->lock, key); + rv = sys_bitarray_free(&dh->bitarray, num_bits, offset); + (void)rv; + __ASSERT_NO_MSG(rv == 0); } static void dmm_memcpy(void *dst, const void *src, size_t len) @@ -222,7 +335,7 @@ int dmm_buffer_out_release(void *region, void *buffer_out) /* Check if output buffer is contained within memory area * managed by dynamic memory allocator */ - if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) { + if (is_buffer_within_region(addr, 0, dh->ptr, dh->ptr_end)) { /* If yes, free the buffer */ dmm_buffer_free(dh, buffer_out); } @@ -309,7 +422,7 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v /* Check if input buffer is contained within memory area * managed by dynamic memory allocator */ - if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) { + if (is_buffer_within_region(addr, user_length, dh->ptr, dh->ptr_end)) { /* If yes, free the buffer */ dmm_buffer_free(dh, buffer_in); } @@ -321,11 +434,20 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v int dmm_init(void) { struct dmm_heap *dh; + int blk_cnt; + int heap_space; for (size_t idx = 0; idx < ARRAY_SIZE(dmm_regions); idx++) { dh = &dmm_heaps_data.dmm_heaps[idx]; dh->region = &dmm_regions[idx]; - sys_heap_init(&dh->heap, (void *)dmm_heap_start_get(dh), dmm_heap_size_get(dh)); + dh->ptr = ROUND_UP(dh->region->dt_allc, dh->region->dt_align); + heap_space = dh->region->dt_size - (dh->ptr - dh->region->dt_addr); + dh->blk_size = ROUND_UP(heap_space / (32 * HEAP_NUM_WORDS), dh->region->dt_align); + blk_cnt = heap_space / dh->blk_size; + dh->ptr_end = dh->ptr + blk_cnt * dh->blk_size; + dh->bitarray.num_bits = blk_cnt; + dh->bitarray.num_bundles = HEAP_NUM_WORDS; + dh->bitarray.bundles = dh->mask; } return 0; From bf7b08066e20bd80ffd0506ce0965a15d6dbc764 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Chru=C5=9Bci=C5=84ski?= Date: Mon, 1 Sep 2025 15:58:37 +0200 Subject: [PATCH 4/9] [nrf fromtree] tests: boards: nrf: dmm: Align test to changes in DMM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align to API changes. Signed-off-by: Krzysztof Chruściński (cherry picked from commit 8cc4da31ca72507ccada7414f58ed6f5af4c6f8f) --- tests/boards/nrf/dmm/src/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/boards/nrf/dmm/src/main.c b/tests/boards/nrf/dmm/src/main.c index 06f4ace0e60..6cfeb1372ec 100644 --- a/tests/boards/nrf/dmm/src/main.c +++ b/tests/boards/nrf/dmm/src/main.c @@ -28,7 +28,7 @@ #if CONFIG_DCACHE BUILD_ASSERT(DMM_ALIGN_SIZE(DUT_CACHE) == CONFIG_DCACHE_LINE_SIZE); -BUILD_ASSERT(DMM_ALIGN_SIZE(DUT_NOCACHE) == 1); +BUILD_ASSERT(DMM_ALIGN_SIZE(DUT_NOCACHE) == sizeof(uint32_t)); #endif struct dmm_test_region { From 121f27ccd63e41357207d17959c18099791eb3c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Chru=C5=9Bci=C5=84ski?= Date: Tue, 9 Sep 2025 21:45:42 +0200 Subject: [PATCH 5/9] [nrf fromtree] soc: nordic: common: dmm: Add optional usage stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for getting usage statistics for DMM. Signed-off-by: Krzysztof Chruściński (cherry picked from commit d10ee98ee8294b72128fdef781fd98922c2e8250) --- soc/nordic/common/Kconfig | 3 +++ soc/nordic/common/dmm.c | 45 +++++++++++++++++++++++++++++++++++++++ soc/nordic/common/dmm.h | 27 +++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/soc/nordic/common/Kconfig b/soc/nordic/common/Kconfig index b3a8f13089d..e1fcd713c77 100644 --- a/soc/nordic/common/Kconfig +++ b/soc/nordic/common/Kconfig @@ -60,6 +60,9 @@ config DMM_HEAP_CHUNKS Number of chunks is a trade-off between performance and granularity. Must be multiply of 32. +config DMM_STATS + bool "Usage statistics" + endif # HAS_NORDIC_DMM rsource "vpr/Kconfig" diff --git a/soc/nordic/common/dmm.c b/soc/nordic/common/dmm.c index 411b8be14a7..e832a1f27b8 100644 --- a/soc/nordic/common/dmm.c +++ b/soc/nordic/common/dmm.c @@ -52,6 +52,11 @@ struct dmm_heap { size_t blk_size; const struct dmm_region *region; sys_bitarray_t bitarray; +#ifdef CONFIG_DMM_STATS + atomic_t curr_use; + uint32_t max_use; + struct k_spinlock lock; +#endif }; static const struct dmm_region dmm_regions[] = { @@ -235,6 +240,15 @@ static void *dmm_buffer_alloc(struct dmm_heap *dh, size_t length) tail_mask_set(dh->tail_mask, num_bits, off); +#ifdef CONFIG_DMM_STATS + k_spinlock_key_t key; + + key = k_spin_lock(&dh->lock); + dh->curr_use += num_bits; + dh->max_use = MAX(dh->max_use, dh->curr_use); + k_spin_unlock(&dh->lock, key); +#endif + return (void *)(dh->ptr + dh->blk_size * off); } @@ -244,6 +258,9 @@ static void dmm_buffer_free(struct dmm_heap *dh, void *buffer) size_t num_bits = num_bits_get(dh->tail_mask, offset); int rv; +#ifdef CONFIG_DMM_STATS + atomic_sub(&dh->curr_use, num_bits); +#endif rv = sys_bitarray_free(&dh->bitarray, num_bits, offset); (void)rv; __ASSERT_NO_MSG(rv == 0); @@ -431,6 +448,34 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v return 0; } +int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, uint32_t *max_use) +{ +#ifdef CONFIG_DMM_STATS + struct dmm_heap *dh; + + dh = dmm_heap_find(region); + if (dh == NULL) { + return -EINVAL; + } + + if (start_addr) { + *start_addr = dh->ptr; + } + + if (curr_use) { + *curr_use = (100 * dh->curr_use) / dh->bitarray.num_bits; + } + + if (max_use) { + *max_use = (100 * dh->max_use) / dh->bitarray.num_bits; + } + + return 0; +#else + return -ENOTSUP; +#endif +} + int dmm_init(void) { struct dmm_heap *dh; diff --git a/soc/nordic/common/dmm.h b/soc/nordic/common/dmm.h index ca627fbd55f..09486289aa6 100644 --- a/soc/nordic/common/dmm.h +++ b/soc/nordic/common/dmm.h @@ -163,6 +163,22 @@ int dmm_buffer_in_prepare(void *region, void *user_buffer, size_t user_length, v */ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, void *buffer_in); +/** + * @brief Get statistics. + * + * Must be enabled with CONFIG_DMM_STATS. + * + * @param[in] region DMM memory region. + * @param[out] start_addr Location where starting address of the memory region is set. Can be null. + * @param[out] curr_use Location where current use in percent is written. Can be null. + * @param[out] max_use Location where maximum use in percent is written. Can be null. + * + * @retval 0 on success. + * @retval -EINVAL Invalid region. + * @retval -ENOTSUP Feature is disabled. + */ +int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, uint32_t *max_use); + /** * @brief Initialize DMM. * @@ -210,6 +226,17 @@ static ALWAYS_INLINE int dmm_buffer_in_release(void *region, void *user_buffer, return 0; } +static ALWAYS_INLINE int dmm_stats_get(void *region, uintptr_t *start_addr, + uint32_t *curr_use, uint32_t *max_use) +{ + ARG_UNUSED(region); + ARG_UNUSED(start_addr); + ARG_UNUSED(curr_use); + ARG_UNUSED(max_use); + + return 0; +} + static ALWAYS_INLINE int dmm_init(void) { return 0; From 4e9bdbfc3ec5aecc78eefb0db440c13fb107320e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Chru=C5=9Bci=C5=84ski?= Date: Tue, 9 Sep 2025 21:47:07 +0200 Subject: [PATCH 6/9] [nrf fromtree] tests: boards: nrf: dmm: Extend test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extended test to check usage stats and longer buffer alloc. Signed-off-by: Krzysztof Chruściński (cherry picked from commit 3d3104207ca7219c2489182c9b5617b7ebe341c1) --- tests/boards/nrf/dmm/src/main.c | 46 ++++++++++++++++++++++++++++++ tests/boards/nrf/dmm/testcase.yaml | 5 ++++ 2 files changed, 51 insertions(+) diff --git a/tests/boards/nrf/dmm/src/main.c b/tests/boards/nrf/dmm/src/main.c index 6cfeb1372ec..f4073d82e9b 100644 --- a/tests/boards/nrf/dmm/src/main.c +++ b/tests/boards/nrf/dmm/src/main.c @@ -337,6 +337,52 @@ ZTEST_USER_F(dmm, test_check_dev_nocache_out_preallocate) user_data, sizeof(user_data), true, false, true); } +ZTEST_USER_F(dmm, test_check_multiple_alloc_and_free) +{ + int retval; + uint8_t buf[256]; + uint8_t buf2[32]; + void *dmm_buf; + void *dmm_buf2; + void *mem_reg = fixture->regions[DMM_TEST_REGION_NOCACHE].mem_reg; + uintptr_t start_address; + uint32_t curr_use, max_use; + + if (IS_ENABLED(CONFIG_DMM_STATS)) { + retval = dmm_stats_get(mem_reg, &start_address, &curr_use, &max_use); + zassert_ok(retval); + } + + memset(buf, 0, sizeof(buf)); + memset(buf2, 0, sizeof(buf2)); + + retval = dmm_buffer_out_prepare(mem_reg, (void *)buf, sizeof(buf), &dmm_buf); + zassert_ok(retval); + zassert_true(dmm_buf != NULL); + + retval = dmm_buffer_out_prepare(mem_reg, (void *)buf2, sizeof(buf2), &dmm_buf2); + zassert_ok(retval); + zassert_true(dmm_buf2 != NULL); + + retval = dmm_buffer_out_release(mem_reg, dmm_buf2); + zassert_ok(retval); + zassert_true(dmm_buf != NULL); + + retval = dmm_buffer_out_release(mem_reg, dmm_buf); + zassert_ok(retval); + zassert_true(dmm_buf != NULL); + + if (IS_ENABLED(CONFIG_DMM_STATS)) { + uint32_t curr_use2; + + retval = dmm_stats_get(mem_reg, &start_address, &curr_use2, &max_use); + zassert_ok(retval); + zassert_equal(curr_use, curr_use2); + TC_PRINT("Stats start_address:%p current use:%d%% max use:%d%%\n", + (void *)start_address, curr_use2, max_use); + } +} + ZTEST_SUITE(dmm, NULL, test_setup, NULL, test_cleanup, NULL); int dmm_test_prepare(void) diff --git a/tests/boards/nrf/dmm/testcase.yaml b/tests/boards/nrf/dmm/testcase.yaml index b5f41f281a5..140454add34 100644 --- a/tests/boards/nrf/dmm/testcase.yaml +++ b/tests/boards/nrf/dmm/testcase.yaml @@ -16,3 +16,8 @@ tests: - CONFIG_DCACHE=n platform_allow: - nrf54h20dk/nrf54h20/cpuapp + boards.nrf.dmm.stats: + extra_configs: + - CONFIG_DMM_STATS=y + platform_allow: + - nrf54h20dk/nrf54h20/cpuapp From c8a782872d79bba2e8a4a44bcbb7ea4167197a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Chru=C5=9Bci=C5=84ski?= Date: Mon, 22 Sep 2025 15:25:07 +0200 Subject: [PATCH 7/9] [nrf fromtree] tests: boards: nrf: dmm: Add stress test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add stress test which validates that allocator is thread safe and has no memory leaks. Signed-off-by: Krzysztof Chruściński (cherry picked from commit c02f904a574ac3f2c988d20d138c4627f48f3d24) --- tests/boards/nrf/dmm/prj.conf | 1 + tests/boards/nrf/dmm/src/main.c | 178 +++++++++++++++++++++++++++++ tests/boards/nrf/dmm/testcase.yaml | 6 + 3 files changed, 185 insertions(+) diff --git a/tests/boards/nrf/dmm/prj.conf b/tests/boards/nrf/dmm/prj.conf index 0b99d72b0c3..c05afbb6ad3 100644 --- a/tests/boards/nrf/dmm/prj.conf +++ b/tests/boards/nrf/dmm/prj.conf @@ -1,4 +1,5 @@ CONFIG_ZTEST=y +CONFIG_ZTRESS=y CONFIG_ASSERT=n CONFIG_SPIN_VALIDATE=n CONFIG_TEST_EXTRA_STACK_SIZE=512 diff --git a/tests/boards/nrf/dmm/src/main.c b/tests/boards/nrf/dmm/src/main.c index f4073d82e9b..27d239d5cf2 100644 --- a/tests/boards/nrf/dmm/src/main.c +++ b/tests/boards/nrf/dmm/src/main.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include @@ -383,6 +385,182 @@ ZTEST_USER_F(dmm, test_check_multiple_alloc_and_free) } } +struct dmm_stress_data { + void *mem_reg; + void *alloc_ptr[32]; + uint8_t alloc_token[32]; + size_t alloc_len[32]; + atomic_t alloc_mask; + atomic_t busy_mask; + atomic_t fails; + atomic_t cnt; + bool cached; +}; + +static void stress_free_op(struct dmm_stress_data *data, int prio, int id) +{ + /* buffer is allocated. */ + uint8_t token = data->alloc_token[id]; + size_t len = data->alloc_len[id]; + uint8_t *ptr = data->alloc_ptr[id]; + int rv; + + for (int j = 0; j < len; j++) { + uint8_t exp_val = (uint8_t)(token + j); + + if (ptr[j] != exp_val) { + for (int k = 0; k < len; k++) { + printk("%02x ", ptr[k]); + } + } + zassert_equal(ptr[j], exp_val, "At %d got:%d exp:%d, len:%d id:%d, alloc_cnt:%d", + j, ptr[j], exp_val, len, id, (uint32_t)data->cnt); + } + + rv = dmm_buffer_in_release(data->mem_reg, ptr, len, ptr); + zassert_ok(rv); + /* Indicate that buffer is released. */ + atomic_and(&data->alloc_mask, ~BIT(id)); +} + +static bool stress_alloc_op(struct dmm_stress_data *data, int prio, int id) +{ + uint32_t r32 = sys_rand32_get(); + size_t len = r32 % 512; + uint8_t *ptr = data->alloc_ptr[id]; + int rv; + + /* Rarely allocate bigger buffer. */ + if ((r32 & 0x7) == 0) { + len += 512; + } + + rv = dmm_buffer_in_prepare(data->mem_reg, &r32/*dummy*/, len, (void **)&ptr); + if (rv < 0) { + atomic_inc(&data->fails); + return true; + } + + uint8_t token = r32 >> 24; + + data->alloc_ptr[id] = ptr; + data->alloc_len[id] = len; + data->alloc_token[id] = token; + for (int j = 0; j < len; j++) { + ptr[j] = (uint8_t)(j + token); + } + if (data->cached) { + sys_cache_data_flush_range(ptr, len); + } + atomic_inc(&data->cnt); + return false; +} + +bool stress_func(void *user_data, uint32_t cnt, bool last, int prio) +{ + struct dmm_stress_data *data = user_data; + uint32_t r = sys_rand32_get(); + int rpt = r & 0x3; + + r >>= 2; + + for (int i = 0; i < rpt + 1; i++) { + int id = r % 32; + int key; + bool free_op; + bool clear_bit; + + key = irq_lock(); + if ((data->busy_mask & BIT(id)) == 0) { + data->busy_mask |= BIT(id); + if (data->alloc_mask & BIT(id)) { + free_op = true; + } else { + data->alloc_mask |= BIT(id); + free_op = false; + } + } else { + irq_unlock(key); + continue; + } + + irq_unlock(key); + r >>= 5; + + if (free_op) { + stress_free_op(data, prio, id); + clear_bit = true; + } else { + clear_bit = stress_alloc_op(data, prio, id); + } + + key = irq_lock(); + data->busy_mask &= ~BIT(id); + if (clear_bit) { + data->alloc_mask &= ~BIT(id); + } + irq_unlock(key); + } + + return true; +} + +static void free_all(struct dmm_stress_data *data) +{ + while (data->alloc_mask) { + int id = 31 - __builtin_clz(data->alloc_mask); + + stress_free_op(data, 0, id); + data->alloc_mask &= ~BIT(id); + } +} + +static void stress_allocator(void *mem_reg, bool cached) +{ + uint32_t timeout = 3000; + struct dmm_stress_data ctx; + int rv; + uint32_t curr_use; + + memset(&ctx, 0, sizeof(ctx)); + ctx.mem_reg = mem_reg; + ctx.cached = cached; + + if (IS_ENABLED(CONFIG_DMM_STATS)) { + rv = dmm_stats_get(ctx.mem_reg, NULL, &curr_use, NULL); + zassert_ok(rv); + } + + ztress_set_timeout(K_MSEC(timeout)); + + ZTRESS_EXECUTE(ZTRESS_THREAD(stress_func, &ctx, INT32_MAX, INT32_MAX, Z_TIMEOUT_TICKS(4)), + ZTRESS_THREAD(stress_func, &ctx, INT32_MAX, INT32_MAX, Z_TIMEOUT_TICKS(4)), + ZTRESS_THREAD(stress_func, &ctx, INT32_MAX, INT32_MAX, Z_TIMEOUT_TICKS(4))); + + free_all(&ctx); + TC_PRINT("Executed %d allocation operation. Failed to allocate %d times.\n", + (uint32_t)ctx.cnt, (uint32_t)ctx.fails); + + if (IS_ENABLED(CONFIG_DMM_STATS)) { + uint32_t curr_use2; + + rv = dmm_stats_get(ctx.mem_reg, NULL, &curr_use2, NULL); + zassert_ok(rv); + zassert_equal(curr_use, curr_use2, "Unexpected usage got:%d exp:%d", + curr_use2, curr_use); + } +} + +ZTEST_F(dmm, test_stress_allocator_nocache) +{ + stress_allocator(fixture->regions[DMM_TEST_REGION_NOCACHE].mem_reg, false); +} + +ZTEST_F(dmm, test_stress_allocator_cache) +{ + stress_allocator(fixture->regions[DMM_TEST_REGION_CACHE].mem_reg, true); +} + ZTEST_SUITE(dmm, NULL, test_setup, NULL, test_cleanup, NULL); int dmm_test_prepare(void) diff --git a/tests/boards/nrf/dmm/testcase.yaml b/tests/boards/nrf/dmm/testcase.yaml index 140454add34..7fc991d4824 100644 --- a/tests/boards/nrf/dmm/testcase.yaml +++ b/tests/boards/nrf/dmm/testcase.yaml @@ -21,3 +21,9 @@ tests: - CONFIG_DMM_STATS=y platform_allow: - nrf54h20dk/nrf54h20/cpuapp + boards.nrf.dmm.more_chunks: + extra_configs: + - CONFIG_DMM_STATS=y + - CONFIG_DMM_HEAP_CHUNKS=96 + platform_allow: + - nrf54h20dk/nrf54h20/cpuapp From 6bcf450ea1dae007b2df5be04007d93c1bcc5ef0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Chru=C5=9Bci=C5=84ski?= Date: Wed, 1 Oct 2025 09:56:55 +0200 Subject: [PATCH 8/9] [nrf fromlist] tests: boards: nrf: dmm: Skip stress test for empty memory region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip stress test if null memory region. That's the case if DMM is disabled. Upstream PR #: 96831 Signed-off-by: Krzysztof Chruściński --- tests/boards/nrf/dmm/src/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/boards/nrf/dmm/src/main.c b/tests/boards/nrf/dmm/src/main.c index 27d239d5cf2..58b7e891c79 100644 --- a/tests/boards/nrf/dmm/src/main.c +++ b/tests/boards/nrf/dmm/src/main.c @@ -522,6 +522,10 @@ static void stress_allocator(void *mem_reg, bool cached) int rv; uint32_t curr_use; + if (mem_reg == NULL) { + ztest_test_skip(); + } + memset(&ctx, 0, sizeof(ctx)); ctx.mem_reg = mem_reg; ctx.cached = cached; From b4263d1ca43f353e3a81a704d8d1088675d2cbc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Chru=C5=9Bci=C5=84ski?= Date: Wed, 1 Oct 2025 07:47:44 +0200 Subject: [PATCH 9/9] [nrf fromlist] soc: nordic: common: dmm: Fix allocation algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There were some corner cases and stress test could fail. Reworking tail bits handling to make the stress test pass. Upstream PR #: 96831 Signed-off-by: Krzysztof Chruściński --- soc/nordic/common/dmm.c | 91 +++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 57 deletions(-) diff --git a/soc/nordic/common/dmm.c b/soc/nordic/common/dmm.c index e832a1f27b8..ac22f8ee430 100644 --- a/soc/nordic/common/dmm.c +++ b/soc/nordic/common/dmm.c @@ -135,32 +135,19 @@ static void tail_mask_set(atomic_t *tail_mask, size_t num_bits, size_t off) return; } - /* If bit mask exceeds a single word then tail may spill to the adjacent word. */ size_t idx = tail_off / 32; + atomic_t *t_mask = &tail_mask[idx]; - tail_off = tail_off - 32 * idx; - if ((tail_off + tail_bits) <= 32) { - /* Tail mask fits in a single word. */ - atomic_or(&tail_mask[idx], BIT_MASK(tail_bits) << tail_off); - return; - } - - /* Tail spilled. Remainder is set in the next word. Since number of tail_masks - * match number of words in bitarray we don't need to check if we are exceeding - * the array boundary. - */ - atomic_or(&tail_mask[idx], BIT_MASK(32 - tail_off) << tail_off); - - - size_t rem_tail = tail_bits - (32 - tail_off); - atomic_t *mask = &tail_mask[idx + 1]; + tail_off = tail_off % 32; + while (tail_bits > 0) { + uint32_t bits = MIN(32 - tail_off, tail_bits); + uint32_t mask = (bits == 32) ? UINT32_MAX : (BIT_MASK(bits) << tail_off); - while (rem_tail >= 32) { - atomic_or(mask, UINT32_MAX); - mask++; - rem_tail -= 32; + atomic_or(t_mask, mask); + t_mask++; + tail_off = 0; + tail_bits -= bits; } - atomic_or(mask, BIT_MASK(rem_tail)); } /* Function determines how many chunks were used for the allocated buffer. It is @@ -175,47 +162,37 @@ static void tail_mask_set(atomic_t *tail_mask, size_t num_bits, size_t off) */ static uint32_t num_bits_get(atomic_t *tail_mask, size_t off) { - uint32_t mask; - uint32_t num_bits; + uint32_t num_bits = 1; + size_t tail_off = off + 1; + size_t idx = tail_off / 32; + atomic_t *t_mask = &tail_mask[idx]; - if (HEAP_NUM_WORDS == 1) { - mask = (*tail_mask | BIT(off)) >> off; - num_bits = (~mask == 0) ? 32 : __builtin_ctz(~mask); - if (num_bits > 1) { - mask = BIT_MASK(num_bits - 1) << (off + 1); - atomic_and(tail_mask, ~mask); - } + tail_off = tail_off % 32; + do { + uint32_t mask = (uint32_t)*t_mask >> tail_off; - return num_bits; - } + if (mask == UINT32_MAX) { + num_bits += 32; + atomic_set(t_mask, 0); + } else { + uint32_t bits = __builtin_ctz(~mask); - /* In multiword bit array we need to check if tail is spilling over to the next word. */ - size_t idx = off / 32; - size_t w_off = off - 32 * idx; - atomic_t *t_mask = &tail_mask[idx]; + if (bits == 0) { + break; + } - mask = (*t_mask | BIT(w_off)) >> w_off; - num_bits = (~mask == 0) ? 32 : __builtin_ctz(~mask); - if (num_bits == 1) { - return num_bits; - } + num_bits += bits; + atomic_and(t_mask, ~(BIT_MASK(bits) << tail_off)); - mask = BIT_MASK(num_bits - 1) << (w_off + 1); - atomic_and(t_mask, ~mask); - if (((w_off + num_bits) == 32) && (idx < (HEAP_NUM_WORDS - 1))) { - size_t tmp_bits; + if (bits + tail_off < 32) { + break; + } - /* If we are at the end of the one mask we need to check the beginning of the - * next one as there might be remaining part of the tail. - */ - do { - t_mask++; - tmp_bits = (*t_mask == UINT32_MAX) ? 32 : __builtin_ctz(~(*t_mask)); - mask = (tmp_bits == 32) ? UINT32_MAX : BIT_MASK(tmp_bits); - atomic_and(t_mask, ~mask); - num_bits += tmp_bits; - } while ((tmp_bits == 32) && (t_mask != &tail_mask[HEAP_NUM_WORDS - 1])); - } + tail_off = 0; + } + + t_mask++; + } while ((HEAP_NUM_WORDS > 1) && (t_mask != &tail_mask[HEAP_NUM_WORDS])); return num_bits; }