diff --git a/soc/nordic/common/Kconfig b/soc/nordic/common/Kconfig index 782d9452b67..e1fcd713c77 100644 --- a/soc/nordic/common/Kconfig +++ b/soc/nordic/common/Kconfig @@ -48,5 +48,22 @@ source "subsys/logging/Kconfig.template.log_config" endif # MRAM_LATENCY +if HAS_NORDIC_DMM + +config DMM_HEAP_CHUNKS + int "Number of chunks in the DMM heap" + default 32 + help + DMM is using a simplified heap which is using 32 bit mask to allocate + required buffer which consists of contiguous chunks. If there are many + small buffers used with DMM it is possible that allocation will fail. + Number of chunks is a trade-off between performance and granularity. + Must be multiply of 32. + +config DMM_STATS + bool "Usage statistics" + +endif # HAS_NORDIC_DMM + rsource "vpr/Kconfig" rsource "uicr/Kconfig" diff --git a/soc/nordic/common/dmm.c b/soc/nordic/common/dmm.c index 0b4e42f8c6d..ac22f8ee430 100644 --- a/soc/nordic/common/dmm.c +++ b/soc/nordic/common/dmm.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include "dmm.h" @@ -26,6 +26,9 @@ .dt_align = DMM_REG_ALIGN_SIZE(node_id), \ .dt_allc = &_BUILD_LINKER_END_VAR(node_id)}, +#define HEAP_NUM_WORDS (CONFIG_DMM_HEAP_CHUNKS / 32) +BUILD_ASSERT(IS_ALIGNED(CONFIG_DMM_HEAP_CHUNKS, 32)); + /* Generate declarations of linker variables used to determine size of preallocated variables * stored in memory sections spanning over memory regions. * These are used to determine memory left for dynamic bounce buffer allocator to work with. @@ -42,9 +45,18 @@ struct dmm_region { }; struct dmm_heap { - struct sys_heap heap; + uint32_t mask[HEAP_NUM_WORDS]; + atomic_t tail_mask[HEAP_NUM_WORDS]; + uintptr_t ptr; + uintptr_t ptr_end; + size_t blk_size; const struct dmm_region *region; + sys_bitarray_t bitarray; +#ifdef CONFIG_DMM_STATS + atomic_t curr_use; + uint32_t max_use; struct k_spinlock lock; +#endif }; static const struct dmm_region dmm_regions[] = { @@ -55,7 +67,6 @@ struct { struct dmm_heap dmm_heaps[ARRAY_SIZE(dmm_regions)]; } dmm_heaps_data; - static struct dmm_heap *dmm_heap_find(void *region) { struct dmm_heap *dh; @@ -103,37 +114,154 @@ static bool is_user_buffer_correctly_preallocated(void const *user_buffer, size_ return false; } -static size_t dmm_heap_start_get(struct dmm_heap *dh) +/* Function updates the tail bits mask after the allocation. Tail bits are all bits + * except the head. Tail bits mask together with a known index of the start of + * chunk (because freeing has a buffer address) allows to determine the size of the + * buffer (how many chunks were included. Because tail_mask is updated after allocation + * we can safely modify bits that represents allocated buffer, we only need to use + * atomic operation on the mask since mask may be modified (but different bits). + */ +static void tail_mask_set(atomic_t *tail_mask, size_t num_bits, size_t off) { - return ROUND_UP(dh->region->dt_allc, dh->region->dt_align); + size_t tail_bits = num_bits - 1; + size_t tail_off = off + 1; + + if (tail_bits == 0) { + return; + } + + if (HEAP_NUM_WORDS == 1) { + atomic_or(tail_mask, BIT_MASK(tail_bits) << tail_off); + return; + } + + size_t idx = tail_off / 32; + atomic_t *t_mask = &tail_mask[idx]; + + tail_off = tail_off % 32; + while (tail_bits > 0) { + uint32_t bits = MIN(32 - tail_off, tail_bits); + uint32_t mask = (bits == 32) ? UINT32_MAX : (BIT_MASK(bits) << tail_off); + + atomic_or(t_mask, mask); + t_mask++; + tail_off = 0; + tail_bits -= bits; + } } -static size_t dmm_heap_size_get(struct dmm_heap *dh) +/* Function determines how many chunks were used for the allocated buffer. It is + * determined from tail bits mask and index of the starting chunk (%p off). + * Function is called before bits are freed in the bitarray so we can safely modify + * bits that belong to that buffer. + * + * @param tail_mask Pointer to tail_mask array. + * @param off Index of the start of the buffer. + * + * @return Number of chunks that forms the buffer that will be freed. + */ +static uint32_t num_bits_get(atomic_t *tail_mask, size_t off) { - return (dh->region->dt_size - (dmm_heap_start_get(dh) - dh->region->dt_addr)); + uint32_t num_bits = 1; + size_t tail_off = off + 1; + size_t idx = tail_off / 32; + atomic_t *t_mask = &tail_mask[idx]; + + tail_off = tail_off % 32; + do { + uint32_t mask = (uint32_t)*t_mask >> tail_off; + + if (mask == UINT32_MAX) { + num_bits += 32; + atomic_set(t_mask, 0); + } else { + uint32_t bits = __builtin_ctz(~mask); + + if (bits == 0) { + break; + } + + num_bits += bits; + atomic_and(t_mask, ~(BIT_MASK(bits) << tail_off)); + + if (bits + tail_off < 32) { + break; + } + + tail_off = 0; + } + + t_mask++; + } while ((HEAP_NUM_WORDS > 1) && (t_mask != &tail_mask[HEAP_NUM_WORDS])); + + return num_bits; } static void *dmm_buffer_alloc(struct dmm_heap *dh, size_t length) { - void *ret; - k_spinlock_key_t key; + size_t num_bits, off; + int rv; + + if (dh->ptr == 0) { + /* Not initialized. */ + return NULL; + } length = ROUND_UP(length, dh->region->dt_align); + num_bits = DIV_ROUND_UP(length, dh->blk_size); + + rv = sys_bitarray_alloc(&dh->bitarray, num_bits, &off); + if (rv < 0) { + return NULL; + } + + tail_mask_set(dh->tail_mask, num_bits, off); + +#ifdef CONFIG_DMM_STATS + k_spinlock_key_t key; key = k_spin_lock(&dh->lock); - ret = sys_heap_aligned_alloc(&dh->heap, dh->region->dt_align, length); + dh->curr_use += num_bits; + dh->max_use = MAX(dh->max_use, dh->curr_use); k_spin_unlock(&dh->lock, key); +#endif - return ret; + return (void *)(dh->ptr + dh->blk_size * off); } static void dmm_buffer_free(struct dmm_heap *dh, void *buffer) { - k_spinlock_key_t key; + size_t offset = ((uintptr_t)buffer - dh->ptr) / dh->blk_size; + size_t num_bits = num_bits_get(dh->tail_mask, offset); + int rv; + +#ifdef CONFIG_DMM_STATS + atomic_sub(&dh->curr_use, num_bits); +#endif + rv = sys_bitarray_free(&dh->bitarray, num_bits, offset); + (void)rv; + __ASSERT_NO_MSG(rv == 0); +} - key = k_spin_lock(&dh->lock); - sys_heap_free(&dh->heap, buffer); - k_spin_unlock(&dh->lock, key); +static void dmm_memcpy(void *dst, const void *src, size_t len) +{ +#define IS_ALIGNED32(x) IS_ALIGNED(x, sizeof(uint32_t)) +#define IS_ALIGNED64(x) IS_ALIGNED(x, sizeof(uint64_t)) + if (IS_ALIGNED64(len) && IS_ALIGNED64(dst) && IS_ALIGNED64(src)) { + for (uint32_t i = 0; i < len / sizeof(uint64_t); i++) { + ((uint64_t *)dst)[i] = ((uint64_t *)src)[i]; + } + return; + } + + if (IS_ALIGNED32(len) && IS_ALIGNED32(dst) && IS_ALIGNED32(src)) { + for (uint32_t i = 0; i < len / sizeof(uint32_t); i++) { + ((uint32_t *)dst)[i] = ((uint32_t *)src)[i]; + } + return; + } + + memcpy(dst, src, len); } int dmm_buffer_out_prepare(void *region, void const *user_buffer, size_t user_length, @@ -172,7 +300,7 @@ int dmm_buffer_out_prepare(void *region, void const *user_buffer, size_t user_le return -ENOMEM; } /* - copy user buffer contents into allocated buffer */ - memcpy(*buffer_out, user_buffer, user_length); + dmm_memcpy(*buffer_out, user_buffer, user_length); } /* Check if device memory region is cacheable @@ -201,7 +329,7 @@ int dmm_buffer_out_release(void *region, void *buffer_out) /* Check if output buffer is contained within memory area * managed by dynamic memory allocator */ - if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) { + if (is_buffer_within_region(addr, 0, dh->ptr, dh->ptr_end)) { /* If yes, free the buffer */ dmm_buffer_free(dh, buffer_out); } @@ -281,14 +409,14 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v * If no, copy allocated buffer to the user buffer */ if (buffer_in != user_buffer) { - memcpy(user_buffer, buffer_in, user_length); + dmm_memcpy(user_buffer, buffer_in, user_length); } /* If yes, no action is needed */ /* Check if input buffer is contained within memory area * managed by dynamic memory allocator */ - if (is_buffer_within_region(addr, 0, dmm_heap_start_get(dh), dmm_heap_size_get(dh))) { + if (is_buffer_within_region(addr, user_length, dh->ptr, dh->ptr_end)) { /* If yes, free the buffer */ dmm_buffer_free(dh, buffer_in); } @@ -297,14 +425,51 @@ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, v return 0; } +int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, uint32_t *max_use) +{ +#ifdef CONFIG_DMM_STATS + struct dmm_heap *dh; + + dh = dmm_heap_find(region); + if (dh == NULL) { + return -EINVAL; + } + + if (start_addr) { + *start_addr = dh->ptr; + } + + if (curr_use) { + *curr_use = (100 * dh->curr_use) / dh->bitarray.num_bits; + } + + if (max_use) { + *max_use = (100 * dh->max_use) / dh->bitarray.num_bits; + } + + return 0; +#else + return -ENOTSUP; +#endif +} + int dmm_init(void) { struct dmm_heap *dh; + int blk_cnt; + int heap_space; for (size_t idx = 0; idx < ARRAY_SIZE(dmm_regions); idx++) { dh = &dmm_heaps_data.dmm_heaps[idx]; dh->region = &dmm_regions[idx]; - sys_heap_init(&dh->heap, (void *)dmm_heap_start_get(dh), dmm_heap_size_get(dh)); + dh->ptr = ROUND_UP(dh->region->dt_allc, dh->region->dt_align); + heap_space = dh->region->dt_size - (dh->ptr - dh->region->dt_addr); + dh->blk_size = ROUND_UP(heap_space / (32 * HEAP_NUM_WORDS), dh->region->dt_align); + blk_cnt = heap_space / dh->blk_size; + dh->ptr_end = dh->ptr + blk_cnt * dh->blk_size; + dh->bitarray.num_bits = blk_cnt; + dh->bitarray.num_bundles = HEAP_NUM_WORDS; + dh->bitarray.bundles = dh->mask; } return 0; diff --git a/soc/nordic/common/dmm.h b/soc/nordic/common/dmm.h index 34b517c92df..09486289aa6 100644 --- a/soc/nordic/common/dmm.h +++ b/soc/nordic/common/dmm.h @@ -35,12 +35,12 @@ extern "C" { * Cache line alignment is required if region is cacheable and data cache is enabled. */ #define DMM_REG_ALIGN_SIZE(node_id) \ - (DMM_IS_REG_CACHEABLE(node_id) ? CONFIG_DCACHE_LINE_SIZE : sizeof(uint8_t)) + (DMM_IS_REG_CACHEABLE(node_id) ? CONFIG_DCACHE_LINE_SIZE : sizeof(uint32_t)) #else #define DMM_IS_REG_CACHEABLE(node_id) 0 -#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint8_t)) +#define DMM_REG_ALIGN_SIZE(node_id) (sizeof(uint32_t)) #endif /* CONFIG_DCACHE */ @@ -163,6 +163,22 @@ int dmm_buffer_in_prepare(void *region, void *user_buffer, size_t user_length, v */ int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, void *buffer_in); +/** + * @brief Get statistics. + * + * Must be enabled with CONFIG_DMM_STATS. + * + * @param[in] region DMM memory region. + * @param[out] start_addr Location where starting address of the memory region is set. Can be null. + * @param[out] curr_use Location where current use in percent is written. Can be null. + * @param[out] max_use Location where maximum use in percent is written. Can be null. + * + * @retval 0 on success. + * @retval -EINVAL Invalid region. + * @retval -ENOTSUP Feature is disabled. + */ +int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, uint32_t *max_use); + /** * @brief Initialize DMM. * @@ -210,6 +226,17 @@ static ALWAYS_INLINE int dmm_buffer_in_release(void *region, void *user_buffer, return 0; } +static ALWAYS_INLINE int dmm_stats_get(void *region, uintptr_t *start_addr, + uint32_t *curr_use, uint32_t *max_use) +{ + ARG_UNUSED(region); + ARG_UNUSED(start_addr); + ARG_UNUSED(curr_use); + ARG_UNUSED(max_use); + + return 0; +} + static ALWAYS_INLINE int dmm_init(void) { return 0; diff --git a/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay b/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay index 3e0b1b4d535..48a4e8adc26 100644 --- a/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay +++ b/tests/boards/nrf/dmm/boards/nrf5340dk_nrf5340_cpuapp.overlay @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2024 Nordic Semiconductor ASA + * + * SPDX-License-Identifier: Apache-2.0 + */ + / { aliases { dut-cache = &spi1; @@ -52,3 +58,7 @@ pinctrl-1 = <&spi3_sleep_alt>; pinctrl-names = "default", "sleep"; }; + +cycle_timer: &timer1 { + status = "okay"; +}; diff --git a/tests/boards/nrf/dmm/boards/nrf54h20dk_nrf54h20_cpuapp.overlay b/tests/boards/nrf/dmm/boards/nrf54h20dk_nrf54h20_cpuapp.overlay index e3924657b86..2507dd83dfe 100644 --- a/tests/boards/nrf/dmm/boards/nrf54h20dk_nrf54h20_cpuapp.overlay +++ b/tests/boards/nrf/dmm/boards/nrf54h20dk_nrf54h20_cpuapp.overlay @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2024 Nordic Semiconductor ASA + * + * SPDX-License-Identifier: Apache-2.0 + */ + / { aliases { dut-cache = &spi120; @@ -58,3 +64,7 @@ pinctrl-names = "default", "sleep"; memory-regions = <&dma_fast_region>; }; + +cycle_timer: &timer120 { + status = "okay"; +}; diff --git a/tests/boards/nrf/dmm/prj.conf b/tests/boards/nrf/dmm/prj.conf index 9467c292689..c05afbb6ad3 100644 --- a/tests/boards/nrf/dmm/prj.conf +++ b/tests/boards/nrf/dmm/prj.conf @@ -1 +1,6 @@ CONFIG_ZTEST=y +CONFIG_ZTRESS=y +CONFIG_ASSERT=n +CONFIG_SPIN_VALIDATE=n +CONFIG_TEST_EXTRA_STACK_SIZE=512 +CONFIG_COUNTER=y diff --git a/tests/boards/nrf/dmm/src/main.c b/tests/boards/nrf/dmm/src/main.c index 214a9069752..58b7e891c79 100644 --- a/tests/boards/nrf/dmm/src/main.c +++ b/tests/boards/nrf/dmm/src/main.c @@ -9,9 +9,14 @@ #include #include #include +#include +#include +#include #include +#define IS_ALIGNED64(x) IS_ALIGNED(x, sizeof(uint64_t)) + #define DUT_CACHE DT_ALIAS(dut_cache) #define DUT_NOCACHE DT_ALIAS(dut_nocache) @@ -25,7 +30,7 @@ #if CONFIG_DCACHE BUILD_ASSERT(DMM_ALIGN_SIZE(DUT_CACHE) == CONFIG_DCACHE_LINE_SIZE); -BUILD_ASSERT(DMM_ALIGN_SIZE(DUT_NOCACHE) == 1); +BUILD_ASSERT(DMM_ALIGN_SIZE(DUT_NOCACHE) == sizeof(uint32_t)); #endif struct dmm_test_region { @@ -57,13 +62,49 @@ static const struct dmm_test_region dmm_test_regions[DMM_TEST_REGION_COUNT] = { .size = DMM_TEST_GET_REG_SIZE(DUT_NOCACHE) }, }; +static const struct device *counter = DEVICE_DT_GET(DT_NODELABEL(cycle_timer)); +static uint32_t t_delta; + +static uint32_t ts_get(void) +{ + uint32_t t; + + (void)counter_get_value(counter, &t); + return t; +} + +static uint32_t ts_from_get(uint32_t from) +{ + return ts_get() - from; +} + +static uint32_t cyc_to_us(uint32_t cyc) +{ + return counter_ticks_to_us(counter, cyc); +} + +static uint32_t cyc_to_rem_ns(uint32_t cyc) +{ + uint32_t us = counter_ticks_to_us(counter, cyc); + uint32_t ns; + + cyc = cyc - counter_us_to_ticks(counter, (uint64_t)us); + ns = counter_ticks_to_us(counter, 1000 * cyc); + + return ns; +} static void *test_setup(void) { static struct dmm_fixture fixture; + uint32_t t; + counter_start(counter); + t = ts_get(); + t_delta = ts_get() - t; memcpy(fixture.regions, dmm_test_regions, sizeof(dmm_test_regions)); fixture.fill_value = 0x1; + return &fixture; } @@ -79,13 +120,25 @@ static bool dmm_buffer_in_region_check(struct dmm_test_region *dtr, void *buf, s } static void dmm_check_output_buffer(struct dmm_test_region *dtr, uint32_t *fill_value, - void *data, size_t size, bool was_prealloc, bool is_cached) + void *data, size_t size, bool was_prealloc, + bool is_cached, bool print_report) { void *buf; int retval; + uint32_t t; + bool aligned; memset(data, (*fill_value)++, size); + t = ts_get(); retval = dmm_buffer_out_prepare(dtr->mem_reg, data, size, &buf); + t = ts_from_get(t); + aligned = IS_ALIGNED64(data) && IS_ALIGNED64(buf) && IS_ALIGNED64(size); + + if (print_report) { + TC_PRINT("%saligned buffer out prepare size:%d buf:%p took %d.%dus (%d cycles)\n", + aligned ? "" : "not ", size, buf, cyc_to_us(t), cyc_to_rem_ns(t), t); + } + zassert_ok(retval); if (IS_ENABLED(CONFIG_DCACHE) && is_cached) { zassert_true(IS_ALIGNED(buf, CONFIG_DCACHE_LINE_SIZE)); @@ -104,21 +157,37 @@ static void dmm_check_output_buffer(struct dmm_test_region *dtr, uint32_t *fill_ sys_cache_data_invd_range(buf, size); zassert_mem_equal(buf, data, size); + t = ts_get(); retval = dmm_buffer_out_release(dtr->mem_reg, buf); + t = ts_from_get(t); + if (print_report) { + TC_PRINT("buffer out release buf:%p size:%d took %d.%dus (%d cycles)\n", + buf, size, cyc_to_us(t), cyc_to_rem_ns(t), t); + } zassert_ok(retval); } static void dmm_check_input_buffer(struct dmm_test_region *dtr, uint32_t *fill_value, - void *data, size_t size, bool was_prealloc, bool is_cached) + void *data, size_t size, bool was_prealloc, + bool is_cached, bool print_report) { void *buf; int retval; + uint32_t t; uint8_t intermediate_buf[128]; + bool aligned; - zassert_true(size < sizeof(intermediate_buf)); + zassert_true(size <= sizeof(intermediate_buf)); + t = ts_get(); retval = dmm_buffer_in_prepare(dtr->mem_reg, data, size, &buf); + t = ts_from_get(t); + aligned = IS_ALIGNED64(data) && IS_ALIGNED64(buf) && IS_ALIGNED64(size); zassert_ok(retval); + if (print_report) { + TC_PRINT("%saligned buffer in prepare buf:%p size:%d took %d.%dus (%d cycles)\n", + aligned ? "" : "not ", buf, size, cyc_to_us(t), cyc_to_rem_ns(t), t); + } if (IS_ENABLED(CONFIG_DCACHE) && is_cached) { zassert_true(IS_ALIGNED(buf, CONFIG_DCACHE_LINE_SIZE)); } @@ -144,7 +213,13 @@ static void dmm_check_input_buffer(struct dmm_test_region *dtr, uint32_t *fill_v memset(buf, (*fill_value)++, size); } + t = ts_get(); retval = dmm_buffer_in_release(dtr->mem_reg, data, size, buf); + t = ts_from_get(t); + if (print_report) { + TC_PRINT("buffer in release buf:%p size:%d took %d.%dus (%d cycles)\n", + buf, size, cyc_to_us(t), cyc_to_rem_ns(t), t); + } zassert_ok(retval); zassert_mem_equal(data, intermediate_buf, size); @@ -152,10 +227,14 @@ static void dmm_check_input_buffer(struct dmm_test_region *dtr, uint32_t *fill_v ZTEST_USER_F(dmm, test_check_dev_cache_in_allocate) { - uint8_t user_data[16]; + uint8_t user_data[128] __aligned(sizeof(uint64_t)); dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, - user_data, sizeof(user_data), false, true); + user_data, 16, false, true, false); + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + user_data, 16, false, true, true); + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + user_data, sizeof(user_data), false, true, true); } ZTEST_USER_F(dmm, test_check_dev_cache_in_preallocate) @@ -163,15 +242,30 @@ ZTEST_USER_F(dmm, test_check_dev_cache_in_preallocate) static uint8_t user_data[16] DMM_MEMORY_SECTION(DUT_CACHE); dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, - user_data, sizeof(user_data), true, true); + user_data, sizeof(user_data), true, true, true); } ZTEST_USER_F(dmm, test_check_dev_cache_out_allocate) { - uint8_t user_data[16]; + uint8_t user_data[129] __aligned(sizeof(uint64_t)); + + /* First run to get code into ICACHE so that following runs has consistent timing. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + user_data, 16, false, true, false); + + /* Aligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + user_data, 16, false, true, true); + /* Unaligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + &user_data[1], 16, false, true, true); + /* Aligned user buffer. */ dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, - user_data, sizeof(user_data), false, true); + user_data, sizeof(user_data) - 1, false, true, true); + /* Unaligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, + &user_data[1], sizeof(user_data) - 1, false, true, true); } ZTEST_USER_F(dmm, test_check_dev_cache_out_preallocate) @@ -179,15 +273,31 @@ ZTEST_USER_F(dmm, test_check_dev_cache_out_preallocate) static uint8_t user_data[16] DMM_MEMORY_SECTION(DUT_CACHE); dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_CACHE], &fixture->fill_value, - user_data, sizeof(user_data), true, true); + user_data, sizeof(user_data), true, true, true); } ZTEST_USER_F(dmm, test_check_dev_nocache_in_allocate) { - uint8_t user_data[16]; + uint8_t user_data[129] __aligned(sizeof(uint64_t)); + + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, 16, false, false, false); + + /* Aligned user buffer. */ + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, 16, false, false, true); + + /* Unaligned user buffer. */ + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + &user_data[1], 16, false, false, true); + /* Aligned user buffer. */ dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, - user_data, sizeof(user_data), false, false); + user_data, sizeof(user_data) - 1, false, false, true); + + /* Unaligned user buffer. */ + dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + &user_data[1], sizeof(user_data) - 1, false, false, true); } ZTEST_USER_F(dmm, test_check_dev_nocache_in_preallocate) @@ -195,15 +305,30 @@ ZTEST_USER_F(dmm, test_check_dev_nocache_in_preallocate) static uint8_t user_data[16] DMM_MEMORY_SECTION(DUT_NOCACHE); dmm_check_input_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, - user_data, sizeof(user_data), true, false); + user_data, sizeof(user_data), true, false, true); } ZTEST_USER_F(dmm, test_check_dev_nocache_out_allocate) { - uint8_t user_data[16]; + uint8_t user_data[129] __aligned(sizeof(uint64_t)); + + /* First run to get code into ICACHE so that following results are consistent. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, 16, false, false, false); + /* Aligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, 16, false, false, true); + /* Unaligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + &user_data[1], 16, false, false, true); + + /* Aligned user buffer. */ + dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, + user_data, sizeof(user_data) - 1, false, false, true); + /* Unaligned user buffer. */ dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, - user_data, sizeof(user_data), false, false); + &user_data[1], sizeof(user_data) - 1, false, false, true); } ZTEST_USER_F(dmm, test_check_dev_nocache_out_preallocate) @@ -211,7 +336,233 @@ ZTEST_USER_F(dmm, test_check_dev_nocache_out_preallocate) static uint8_t user_data[16] DMM_MEMORY_SECTION(DUT_NOCACHE); dmm_check_output_buffer(&fixture->regions[DMM_TEST_REGION_NOCACHE], &fixture->fill_value, - user_data, sizeof(user_data), true, false); + user_data, sizeof(user_data), true, false, true); +} + +ZTEST_USER_F(dmm, test_check_multiple_alloc_and_free) +{ + int retval; + uint8_t buf[256]; + uint8_t buf2[32]; + void *dmm_buf; + void *dmm_buf2; + void *mem_reg = fixture->regions[DMM_TEST_REGION_NOCACHE].mem_reg; + uintptr_t start_address; + uint32_t curr_use, max_use; + + if (IS_ENABLED(CONFIG_DMM_STATS)) { + retval = dmm_stats_get(mem_reg, &start_address, &curr_use, &max_use); + zassert_ok(retval); + } + + memset(buf, 0, sizeof(buf)); + memset(buf2, 0, sizeof(buf2)); + + retval = dmm_buffer_out_prepare(mem_reg, (void *)buf, sizeof(buf), &dmm_buf); + zassert_ok(retval); + zassert_true(dmm_buf != NULL); + + retval = dmm_buffer_out_prepare(mem_reg, (void *)buf2, sizeof(buf2), &dmm_buf2); + zassert_ok(retval); + zassert_true(dmm_buf2 != NULL); + + retval = dmm_buffer_out_release(mem_reg, dmm_buf2); + zassert_ok(retval); + zassert_true(dmm_buf != NULL); + + retval = dmm_buffer_out_release(mem_reg, dmm_buf); + zassert_ok(retval); + zassert_true(dmm_buf != NULL); + + if (IS_ENABLED(CONFIG_DMM_STATS)) { + uint32_t curr_use2; + + retval = dmm_stats_get(mem_reg, &start_address, &curr_use2, &max_use); + zassert_ok(retval); + zassert_equal(curr_use, curr_use2); + TC_PRINT("Stats start_address:%p current use:%d%% max use:%d%%\n", + (void *)start_address, curr_use2, max_use); + } +} + +struct dmm_stress_data { + void *mem_reg; + void *alloc_ptr[32]; + uint8_t alloc_token[32]; + size_t alloc_len[32]; + atomic_t alloc_mask; + atomic_t busy_mask; + atomic_t fails; + atomic_t cnt; + bool cached; +}; + +static void stress_free_op(struct dmm_stress_data *data, int prio, int id) +{ + /* buffer is allocated. */ + uint8_t token = data->alloc_token[id]; + size_t len = data->alloc_len[id]; + uint8_t *ptr = data->alloc_ptr[id]; + int rv; + + for (int j = 0; j < len; j++) { + uint8_t exp_val = (uint8_t)(token + j); + + if (ptr[j] != exp_val) { + for (int k = 0; k < len; k++) { + printk("%02x ", ptr[k]); + } + } + zassert_equal(ptr[j], exp_val, "At %d got:%d exp:%d, len:%d id:%d, alloc_cnt:%d", + j, ptr[j], exp_val, len, id, (uint32_t)data->cnt); + } + + rv = dmm_buffer_in_release(data->mem_reg, ptr, len, ptr); + zassert_ok(rv); + /* Indicate that buffer is released. */ + atomic_and(&data->alloc_mask, ~BIT(id)); +} + +static bool stress_alloc_op(struct dmm_stress_data *data, int prio, int id) +{ + uint32_t r32 = sys_rand32_get(); + size_t len = r32 % 512; + uint8_t *ptr = data->alloc_ptr[id]; + int rv; + + /* Rarely allocate bigger buffer. */ + if ((r32 & 0x7) == 0) { + len += 512; + } + + rv = dmm_buffer_in_prepare(data->mem_reg, &r32/*dummy*/, len, (void **)&ptr); + if (rv < 0) { + atomic_inc(&data->fails); + return true; + } + + uint8_t token = r32 >> 24; + + data->alloc_ptr[id] = ptr; + data->alloc_len[id] = len; + data->alloc_token[id] = token; + for (int j = 0; j < len; j++) { + ptr[j] = (uint8_t)(j + token); + } + if (data->cached) { + sys_cache_data_flush_range(ptr, len); + } + atomic_inc(&data->cnt); + return false; +} + +bool stress_func(void *user_data, uint32_t cnt, bool last, int prio) +{ + struct dmm_stress_data *data = user_data; + uint32_t r = sys_rand32_get(); + int rpt = r & 0x3; + + r >>= 2; + + for (int i = 0; i < rpt + 1; i++) { + int id = r % 32; + int key; + bool free_op; + bool clear_bit; + + key = irq_lock(); + if ((data->busy_mask & BIT(id)) == 0) { + data->busy_mask |= BIT(id); + if (data->alloc_mask & BIT(id)) { + free_op = true; + } else { + data->alloc_mask |= BIT(id); + free_op = false; + } + } else { + irq_unlock(key); + continue; + } + + irq_unlock(key); + r >>= 5; + + if (free_op) { + stress_free_op(data, prio, id); + clear_bit = true; + } else { + clear_bit = stress_alloc_op(data, prio, id); + } + + key = irq_lock(); + data->busy_mask &= ~BIT(id); + if (clear_bit) { + data->alloc_mask &= ~BIT(id); + } + irq_unlock(key); + } + + return true; +} + +static void free_all(struct dmm_stress_data *data) +{ + while (data->alloc_mask) { + int id = 31 - __builtin_clz(data->alloc_mask); + + stress_free_op(data, 0, id); + data->alloc_mask &= ~BIT(id); + } +} + +static void stress_allocator(void *mem_reg, bool cached) +{ + uint32_t timeout = 3000; + struct dmm_stress_data ctx; + int rv; + uint32_t curr_use; + + if (mem_reg == NULL) { + ztest_test_skip(); + } + + memset(&ctx, 0, sizeof(ctx)); + ctx.mem_reg = mem_reg; + ctx.cached = cached; + + if (IS_ENABLED(CONFIG_DMM_STATS)) { + rv = dmm_stats_get(ctx.mem_reg, NULL, &curr_use, NULL); + zassert_ok(rv); + } + + ztress_set_timeout(K_MSEC(timeout)); + + ZTRESS_EXECUTE(ZTRESS_THREAD(stress_func, &ctx, INT32_MAX, INT32_MAX, Z_TIMEOUT_TICKS(4)), + ZTRESS_THREAD(stress_func, &ctx, INT32_MAX, INT32_MAX, Z_TIMEOUT_TICKS(4)), + ZTRESS_THREAD(stress_func, &ctx, INT32_MAX, INT32_MAX, Z_TIMEOUT_TICKS(4))); + + free_all(&ctx); + TC_PRINT("Executed %d allocation operation. Failed to allocate %d times.\n", + (uint32_t)ctx.cnt, (uint32_t)ctx.fails); + + if (IS_ENABLED(CONFIG_DMM_STATS)) { + uint32_t curr_use2; + + rv = dmm_stats_get(ctx.mem_reg, NULL, &curr_use2, NULL); + zassert_ok(rv); + zassert_equal(curr_use, curr_use2, "Unexpected usage got:%d exp:%d", + curr_use2, curr_use); + } +} + +ZTEST_F(dmm, test_stress_allocator_nocache) +{ + stress_allocator(fixture->regions[DMM_TEST_REGION_NOCACHE].mem_reg, false); +} + +ZTEST_F(dmm, test_stress_allocator_cache) +{ + stress_allocator(fixture->regions[DMM_TEST_REGION_CACHE].mem_reg, true); } ZTEST_SUITE(dmm, NULL, test_setup, NULL, test_cleanup, NULL); diff --git a/tests/boards/nrf/dmm/testcase.yaml b/tests/boards/nrf/dmm/testcase.yaml index b5f41f281a5..7fc991d4824 100644 --- a/tests/boards/nrf/dmm/testcase.yaml +++ b/tests/boards/nrf/dmm/testcase.yaml @@ -16,3 +16,14 @@ tests: - CONFIG_DCACHE=n platform_allow: - nrf54h20dk/nrf54h20/cpuapp + boards.nrf.dmm.stats: + extra_configs: + - CONFIG_DMM_STATS=y + platform_allow: + - nrf54h20dk/nrf54h20/cpuapp + boards.nrf.dmm.more_chunks: + extra_configs: + - CONFIG_DMM_STATS=y + - CONFIG_DMM_HEAP_CHUNKS=96 + platform_allow: + - nrf54h20dk/nrf54h20/cpuapp