From 07711cb8bd40fd16b4dcae4c5d2fe52358564422 Mon Sep 17 00:00:00 2001 From: Immo Birnbaum Date: Thu, 13 Jul 2023 22:22:53 +0200 Subject: [PATCH 1/3] arch: aarch32: place .bss, .noinit sections at the end of the binary This is a follow up to #53262, which still lacked the adjustment of the .noinit section's position within the binary by the time the PR went stale. Adjust the linker command file so that the .bss and .noinit sections are placed at the end of the resulting binary. Until now, those sections have been located somewhere in the middle of the binary, so that the inclusion of structures like statically defined heaps or large zero- initialized arrays reflected 1:1 in the resulting binary's size. Even for a stripped binary, such data was included in full as the linker couldn't omit it due to subsequent sections within the binary. This fix has been tested with a 32 MB statically allocated heap and a 32 MB uint8 zero-initialized array. Both structures are clearly identifyable in the memory consumption statistics, however, the final binary's size is unaffected by their inclusion. Signed-off-by: Immo Birnbaum --- .../arm/aarch32/cortex_a_r/scripts/linker.ld | 62 ++++++++++--------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/include/zephyr/arch/arm/aarch32/cortex_a_r/scripts/linker.ld b/include/zephyr/arch/arm/aarch32/cortex_a_r/scripts/linker.ld index a27dd55a8cf29..15ddfba82c8e2 100644 --- a/include/zephyr/arch/arm/aarch32/cortex_a_r/scripts/linker.ld +++ b/include/zephyr/arch/arm/aarch32/cortex_a_r/scripts/linker.ld @@ -26,6 +26,13 @@ #endif #define RAMABLE_REGION RAM +/* section alignment directive, valid only if not running in XIP mode */ +#ifndef CONFIG_XIP + #define SECTION_ALIGN ALIGN(_region_min_align) +#else + #define SECTION_ALIGN +#endif + #if !defined(CONFIG_XIP) && (CONFIG_FLASH_SIZE == 0) #define ROM_ADDR RAM_ADDR #else @@ -73,8 +80,6 @@ _region_min_align = 4; . = ALIGN(_region_min_align) #endif -#define BSS_ALIGN ALIGN(_region_min_align) - MEMORY { FLASH (rx) : ORIGIN = ROM_ADDR, LENGTH = ROM_SIZE @@ -266,35 +271,10 @@ SECTIONS _app_smem_rom_start = LOADADDR(_APP_SMEM_SECTION_NAME); #endif /* CONFIG_USERSPACE */ - SECTION_DATA_PROLOGUE(_BSS_SECTION_NAME,(NOLOAD), BSS_ALIGN) - { - /* - * For performance, BSS section is assumed to be 4 byte aligned and - * a multiple of 4 bytes - */ - . = ALIGN(4); - __bss_start = .; - __kernel_ram_start = .; - - *(.bss) - *(".bss.*") - *(COMMON) - *(".kernel_bss.*") - -#ifdef CONFIG_CODE_DATA_RELOCATION -#include -#endif - - /* - * As memory is cleared in words only, it is simpler to ensure the BSS - * section ends on a 4 byte boundary. This wastes a maximum of 3 bytes. - */ - __bss_end = ALIGN(4); - } GROUP_DATA_LINK_IN(RAMABLE_REGION, RAMABLE_REGION) - -#include + . = ALIGN(_region_min_align); + __kernel_ram_start = .; - SECTION_DATA_PROLOGUE(_DATA_SECTION_NAME,,) + SECTION_DATA_PROLOGUE(_DATA_SECTION_NAME,,SECTION_ALIGN) { __data_region_start = .; __data_start = .; @@ -328,7 +308,29 @@ SECTIONS #include __data_region_end = .; + . = ALIGN(_region_min_align); + + SECTION_DATA_PROLOGUE(_BSS_SECTION_NAME,(NOLOAD), SECTION_ALIGN) + { + __bss_start = .; + + *(.bss) + *(".bss.*") + *(COMMON) + *(".kernel_bss.*") +#ifdef CONFIG_CODE_DATA_RELOCATION +#include +#endif + + /* + * As memory is cleared in words only, it is simpler to ensure the BSS + * section ends on a 4 byte boundary. This wastes a maximum of 3 bytes. + */ + __bss_end = ALIGN(4); + } GROUP_DATA_LINK_IN(RAMABLE_REGION, RAMABLE_REGION) + +#include /* Define linker symbols */ From 9b4fede29c507004282df032e3282bd0a1668d0f Mon Sep 17 00:00:00 2001 From: Immo Birnbaum Date: Mon, 7 Aug 2023 10:20:48 +0200 Subject: [PATCH 2/3] arch: aarch32: fix z_mapped_start location for non-XIP operation Move the z_mapped_start marker for non-XIP builds to the base RAM address. This has already been the marker's location when building for XIP. Prior to this change, z_mapped_start was located at the start of the text section for non-XIP builds. However, at least for the Cortex-A family of CPUs, the first section located at the RAM base address isn't the text section, but the single 4k page containing the exception vectors which are copied to either address 0 or the HIVECS address upon early boot. This resulted in this first 4k page at the bottom of RAM to be considered available for memory mappings via the MMU at run-time, followed by all the permanently mapped stuff, with available mappable memory only continuing behind z_mapped_end. A call at run-time requesting memory to be mapped via the MMU therefore always assigned the single 4k page containing the vectors data first, before continuing mapping physical memory behind z_mapped_end. For any map call requesting more than 4k, this resulted in a contiguous virtual memory address range mapped to non-contiguous physical memory. This behaviour has already been documented in #51024. This behaviour would also cause further problems in case of support for the Cortex-A's VBAR register, which has been proposed before, but eventually wasn't merged. Letting VBAR point to the RAM base where the 4k vectors page is located within the Zephyr image instead of copying the vectors to either 0 or the HIVECS address means that this page may under no circumstance be re-assigned for use as regular RAM mapped via the MMU. Signed-off-by: Immo Birnbaum --- include/zephyr/arch/arm/aarch32/cortex_a_r/scripts/linker.ld | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/zephyr/arch/arm/aarch32/cortex_a_r/scripts/linker.ld b/include/zephyr/arch/arm/aarch32/cortex_a_r/scripts/linker.ld index 15ddfba82c8e2..94d3ee2bd7100 100644 --- a/include/zephyr/arch/arm/aarch32/cortex_a_r/scripts/linker.ld +++ b/include/zephyr/arch/arm/aarch32/cortex_a_r/scripts/linker.ld @@ -138,9 +138,6 @@ SECTIONS { . = ALIGN(_region_min_align); __text_region_start = .; -#ifndef CONFIG_XIP - z_mapped_start = .; -#endif #include @@ -252,9 +249,7 @@ SECTIONS */ . = ALIGN(_region_min_align); _image_ram_start = .; -#ifdef CONFIG_XIP z_mapped_start = .; -#endif /* Located in generated directory. This file is populated by the * zephyr_linker_sources() Cmake function. From d8545f42e2bfceb51dd89b990c632401374be70f Mon Sep 17 00:00:00 2001 From: Immo Birnbaum Date: Tue, 8 Aug 2023 09:46:26 +0200 Subject: [PATCH 3/3] doc: release-notes: document aarch32 cortex_a_r linker command file changes Document the changes implemented in #60368: * Placement of the .bss and .noinit sections at the end of the binary so that large zero-/uninitialized data structures such as heaps, arrays etc. don't have to be padded in the resulting binary. * Location of the z_mapped_start marker: prevents the assignment of the single 4k-page wide .vectors section right at the RAM base address as dynamic memory by the MMU at run-time. Instead of pointing to the start of the subsequent .text section, the z_mapped_start marker now covers all the data contained within the binary that ends up in RAM. Signed-off-by: Immo Birnbaum --- doc/releases/release-notes-3.5.rst | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/releases/release-notes-3.5.rst b/doc/releases/release-notes-3.5.rst index 117171f992939..a567e18112814 100644 --- a/doc/releases/release-notes-3.5.rst +++ b/doc/releases/release-notes-3.5.rst @@ -44,10 +44,33 @@ Kernel Architectures ************* -* ARM +* ARC * ARM + * Fixed the Cortex-A/-R linker command file: + + * The sections for zero-initialized (.bss) and uninitialized (.noinit) data + are now the last sections within the binary. This allows the linker to just + account for the required memory, but not having to actually include large + empty spaces within the binary. With the .bss and .noinit sections placed + somewhere in the middle of the resulting binary, as was the case with + previous releases, the linker had to pad the space for zero-/uninitialized + data due to subsequent sections containing initialized data. The inclusion + of large zero-initialized arrays or statically defined heaps reflected + directly in the size of the resulting binary, resulting in unnecessarily + large binaries, even when stripped. + * Fixed the location of the z_mapped_start address marker to point to the + base of RAM instead of to the start of the .text section. Therefore, the + single 4k page .vectors section, which is located right at the base of RAM + before the .text section and which was previously not included in the + mapped memory range, is now considered mapped and unavailable for dynamic + memory mapping via the MMU at run-time. This prevents the 4k page containing + the exception vectors data being mapped as regular memory at run-time, with + any subsequently mapped pages being located beyond the permanently mapped + memory regions (beyond z_mapped_end), resulting in non-contiguous memory + allocation for any first memory request greater than 4k. + * ARM64 * RISC-V