Skip to content

Commit cafe045

Browse files
ioannisgnashif
authored andcommitted
arm: cortex_m: make lazy FP stacking enabling dynamic
Under FPU sharing mode, any thread is allowed to generate a Floating Point context (use FP registers in FP instructions), regardless of whether threads are pre-tagged with K_FP_REGS option when they are created. When building with MPU stack guard feature enabled, a large MPU stack guard is required to catch stack overflows, if lazy FP stacking is enabled. When lazy FP stacking is not enabled, a default 32 byte guard is sufficient. If lazy stacking is enabled by default, all threads may potentially generate FP context, so they would need to program a large MPU guard, carved out of their reserved stack memory. To avoid this memory waste, we modify the behavior, and make lazy stacking a dynamically enabled feature, implemented as follows: - threads that are not pre-tagged with K_FP_REGS, and have not generated an FP context use a default MPU guard and disable lazy stacking. As long as the threads do not have an active FP context, they won't stack FP registers, anyway, on ISRs and exceptions, while they will benefit from reserving a small MPU guard size - as soon as a thread starts using FP registers, ISR might temporarily experience some increased ISR latency due to lazy stacking being disabled. This will be the case until the next context switch, where the threads that have active FP context will be tagged with K_FP_REGS, enable lazy stacking, and program a wide MPU guard. The implementation is a tradeoff between performance (ISR latency) and memory consumption. Note that when MPU STACK GUARD feature is not enabled, lazy FP stacking is always activated. Signed-off-by: Ioannis Glaropoulos <[email protected]>
1 parent e063ba1 commit cafe045

File tree

2 files changed

+115
-7
lines changed

2 files changed

+115
-7
lines changed

arch/arm/core/aarch32/cortex_m/mpu/arm_core_mpu.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ LOG_MODULE_REGISTER(mpu);
5050
extern K_THREAD_STACK_DEFINE(z_main_stack, CONFIG_MAIN_STACK_SIZE);
5151
#endif
5252

53+
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING) \
54+
&& defined(CONFIG_MPU_STACK_GUARD)
55+
uint32_t z_arm_mpu_stack_guard_and_fpu_adjust(struct k_thread *thread);
56+
#endif
57+
5358
static const struct z_arm_mpu_partition static_regions[] = {
5459
#if defined(CONFIG_COVERAGE_GCOV) && defined(CONFIG_USERSPACE)
5560
{
@@ -247,9 +252,7 @@ void z_arm_configure_dynamic_mpu_regions(struct k_thread *thread)
247252
size_t guard_size = MPU_GUARD_ALIGN_AND_SIZE;
248253

249254
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
250-
if ((thread->base.user_options & K_FP_REGS) != 0) {
251-
guard_size = MPU_GUARD_ALIGN_AND_SIZE_FLOAT;
252-
}
255+
guard_size = z_arm_mpu_stack_guard_and_fpu_adjust(thread);
253256
#endif
254257

255258
#if defined(CONFIG_USERSPACE)

arch/arm/core/aarch32/thread.c

Lines changed: 109 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
103103

104104
#if defined(CONFIG_USERSPACE) || defined(CONFIG_FPU_SHARING)
105105
thread->arch.mode = 0;
106+
#if FP_GUARD_EXTRA_SIZE > 0
107+
if ((thread->base.user_options & K_FP_REGS) != 0) {
108+
thread->arch.mode |= Z_ARM_MODE_MPU_GUARD_FLOAT_Msk;
109+
}
110+
#endif
106111
#if defined(CONFIG_USERSPACE)
107112
thread->arch.priv_stack_start = 0;
108113
#endif
@@ -113,6 +118,100 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *stack,
113118
*/
114119
}
115120

121+
#if defined(CONFIG_MPU_STACK_GUARD) && defined(CONFIG_FPU) \
122+
&& defined(CONFIG_FPU_SHARING)
123+
124+
static inline void z_arm_thread_stack_info_adjust(struct k_thread *thread,
125+
bool use_large_guard)
126+
{
127+
if (use_large_guard) {
128+
/* Switch to use a large MPU guard if not already. */
129+
if ((thread->arch.mode &
130+
Z_ARM_MODE_MPU_GUARD_FLOAT_Msk) == 0) {
131+
/* Default guard size is used. Update required. */
132+
thread->arch.mode |= Z_ARM_MODE_MPU_GUARD_FLOAT_Msk;
133+
#if defined(CONFIG_USERSPACE)
134+
if (thread->arch.priv_stack_start) {
135+
/* User thread */
136+
thread->arch.priv_stack_start +=
137+
FP_GUARD_EXTRA_SIZE;
138+
} else
139+
#endif /* CONFIG_USERSPACE */
140+
{
141+
/* Privileged thread */
142+
thread->stack_info.start +=
143+
FP_GUARD_EXTRA_SIZE;
144+
thread->stack_info.size -=
145+
FP_GUARD_EXTRA_SIZE;
146+
}
147+
}
148+
} else {
149+
/* Switch to use the default MPU guard size if not already. */
150+
if ((thread->arch.mode &
151+
Z_ARM_MODE_MPU_GUARD_FLOAT_Msk) != 0) {
152+
/* Large guard size is used. Update required. */
153+
thread->arch.mode &= ~Z_ARM_MODE_MPU_GUARD_FLOAT_Msk;
154+
#if defined(CONFIG_USERSPACE)
155+
if (thread->arch.priv_stack_start) {
156+
/* User thread */
157+
thread->arch.priv_stack_start -=
158+
FP_GUARD_EXTRA_SIZE;
159+
} else
160+
#endif /* CONFIG_USERSPACE */
161+
{
162+
/* Privileged thread */
163+
thread->stack_info.start -=
164+
FP_GUARD_EXTRA_SIZE;
165+
thread->stack_info.size +=
166+
FP_GUARD_EXTRA_SIZE;
167+
}
168+
}
169+
}
170+
}
171+
172+
/*
173+
* Adjust the MPU stack guard size together with the FPU
174+
* policy and the stack_info values for the thread that is
175+
* being switched in.
176+
*/
177+
uint32_t z_arm_mpu_stack_guard_and_fpu_adjust(struct k_thread *thread)
178+
{
179+
if (((thread->base.user_options & K_FP_REGS) != 0) ||
180+
((thread->arch.mode & CONTROL_FPCA_Msk) != 0)) {
181+
/* The thread has been pre-tagged (at creation or later) with
182+
* K_FP_REGS, i.e. it is expected to be using the FPU registers
183+
* (if not already). Activate lazy stacking and program a large
184+
* MPU guard to safely detect privilege thread stack overflows.
185+
*
186+
* OR
187+
* The thread is not pre-tagged with K_FP_REGS, but it has
188+
* generated an FP context. Activate lazy stacking and
189+
* program a large MPU guard to detect privilege thread
190+
* stack overflows.
191+
*/
192+
FPU->FPCCR |= FPU_FPCCR_LSPEN_Msk;
193+
194+
z_arm_thread_stack_info_adjust(thread, true);
195+
196+
/* Tag the thread with K_FP_REGS */
197+
thread->base.user_options |= K_FP_REGS;
198+
199+
return MPU_GUARD_ALIGN_AND_SIZE_FLOAT;
200+
}
201+
202+
/* Thread is not pre-tagged with K_FP_REGS, and it has
203+
* not been using the FPU. Since there is no active FPU
204+
* context, de-activate lazy stacking and program the
205+
* default MPU guard size.
206+
*/
207+
FPU->FPCCR &= (~FPU_FPCCR_LSPEN_Msk);
208+
209+
z_arm_thread_stack_info_adjust(thread, false);
210+
211+
return MPU_GUARD_ALIGN_AND_SIZE;
212+
}
213+
#endif
214+
116215
#ifdef CONFIG_USERSPACE
117216
FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
118217
void *p1, void *p2, void *p3)
@@ -129,7 +228,7 @@ FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
129228
* which accounted for memory borrowed from the thread stack.
130229
*/
131230
#if FP_GUARD_EXTRA_SIZE > 0
132-
if ((_current->base.user_options & K_FP_REGS) != 0) {
231+
if ((_current->arch.mode & Z_ARM_MODE_MPU_GUARD_FLOAT_Msk) != 0) {
133232
_current->stack_info.start -= FP_GUARD_EXTRA_SIZE;
134233
_current->stack_info.size += FP_GUARD_EXTRA_SIZE;
135234
}
@@ -144,7 +243,7 @@ FUNC_NORETURN void arch_user_mode_enter(k_thread_entry_t user_entry,
144243
*/
145244
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
146245
_current->arch.priv_stack_start +=
147-
(_current->base.user_options & K_FP_REGS) ?
246+
((_current->arch.mode & Z_ARM_MODE_MPU_GUARD_FLOAT_Msk) != 0) ?
148247
MPU_GUARD_ALIGN_AND_SIZE_FLOAT : MPU_GUARD_ALIGN_AND_SIZE;
149248
#else
150249
_current->arch.priv_stack_start += MPU_GUARD_ALIGN_AND_SIZE;
@@ -264,10 +363,16 @@ uint32_t z_check_thread_stack_fail(const uint32_t fault_addr, const uint32_t psp
264363
}
265364
#endif
266365

267-
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
268-
uint32_t guard_len = (thread->base.user_options & K_FP_REGS) ?
366+
#if (defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)) && \
367+
defined(CONFIG_MPU_STACK_GUARD)
368+
uint32_t guard_len =
369+
((_current->arch.mode & Z_ARM_MODE_MPU_GUARD_FLOAT_Msk) != 0) ?
269370
MPU_GUARD_ALIGN_AND_SIZE_FLOAT : MPU_GUARD_ALIGN_AND_SIZE;
270371
#else
372+
/* If MPU_STACK_GUARD is not enabled, the guard length is
373+
* effectively zero. Stack overflows may be detected only
374+
* for user threads in nPRIV mode.
375+
*/
271376
uint32_t guard_len = MPU_GUARD_ALIGN_AND_SIZE;
272377
#endif /* CONFIG_FPU && CONFIG_FPU_SHARING */
273378

0 commit comments

Comments
 (0)