Skip to content

Commit 5932477

Browse files
authored
[libc] Add support for MVE to Arm startup code (#167338)
In order to have MVE support, the same bits of the CPACR register that enable the floating-point extension must be set.
1 parent 834a3cc commit 5932477

File tree

1 file changed

+18
-9
lines changed

1 file changed

+18
-9
lines changed

libc/startup/baremetal/arm/start.cpp

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,30 +131,39 @@ namespace LIBC_NAMESPACE_DECL {
131131
__arm_wsr("CPSR_c", 0x13); // SVC
132132
#endif
133133

134-
#ifdef __ARM_FP
135-
// Enable FPU
136-
#if __ARM_ARCH_PROFILE == 'M'
134+
#if __ARM_ARCH_PROFILE == 'M' && \
135+
(defined(__ARM_FP) || defined(__ARM_FEATURE_MVE))
136+
// Enable FPU and MVE. They can't be enabled independently: the two are
137+
// governed by the same bits in CPACR.
137138
// Based on
138139
// https://developer.arm.com/documentation/dui0646/c/Cortex-M7-Peripherals/Floating-Point-Unit/Enabling-the-FPU
139-
// Set CPACR cp10 and cp11
140-
auto cpacr = (volatile uint32_t *const)0xE000ED88;
140+
// Set CPACR cp10 and cp11.
141+
auto cpacr = reinterpret_cast<volatile uint32_t *const>(0xE000ED88);
141142
*cpacr |= (0xF << 20);
142143
__dsb(0xF);
143144
__isb(0xF);
144-
#elif __ARM_ARCH_PROFILE == 'A' || __ARM_ARCH_PROFILE == 'R'
145+
#if defined(__ARM_FEATURE_MVE)
146+
// Initialize low-overhead-loop tail predication to its neutral state
147+
uint32_t fpscr;
148+
__asm__ __volatile__("vmrs %0, FPSCR" : "=r"(fpscr) : :);
149+
fpscr |= (0x4 << 16);
150+
__asm__ __volatile__("vmsr FPSCR, %0" : : "r"(fpscr) :);
151+
#endif
152+
#elif (__ARM_ARCH_PROFILE == 'A' || __ARM_ARCH_PROFILE == 'R') && \
153+
defined(__ARM_FP)
154+
// Enable FPU.
145155
// Based on
146156
// https://developer.arm.com/documentation/dui0472/m/Compiler-Coding-Practices/Enabling-NEON-and-FPU-for-bare-metal
147-
// Set CPACR cp10 and cp11
157+
// Set CPACR cp10 and cp11.
148158
uint32_t cpacr = __arm_rsr("p15:0:c1:c0:2");
149159
cpacr |= (0xF << 20);
150160
__arm_wsr("p15:0:c1:c0:2", cpacr);
151161
__isb(0xF);
152162
// Set FPEXC.EN
153163
uint32_t fpexc;
154164
__asm__ __volatile__("vmrs %0, FPEXC" : "=r"(fpexc) : :);
155-
fpexc |= (1 << 30);
165+
fpexc |= (0x1 << 30);
156166
__asm__ __volatile__("vmsr FPEXC, %0" : : "r"(fpexc) :);
157-
#endif
158167
#endif
159168

160169
// Perform the equivalent of scatterloading

0 commit comments

Comments
 (0)