Skip to content

Commit e50851d

Browse files
peter-mitsiscfriedt
authored andcommitted
arch: xtensa: Add support for lazy HiFi ctx switching
When lazy HiFi context switching is enabled, the system starts with the HiFi coprocessor disabled. Should the thread use that coprocessor, it will generate an exception which in turn will enable the coprocessor and save/restore the HiFi registers as appropriate. When switching to a new thread, the HiFi coprocessor is again disabled. For simplicity, there are no restrictions as to which thread is allowed to use the coprocessor. Signed-off-by: Peter Mitsis <[email protected]>
1 parent d397a91 commit e50851d

File tree

7 files changed

+208
-1
lines changed

7 files changed

+208
-1
lines changed

arch/xtensa/core/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ zephyr_library_sources_ifdef(CONFIG_XTENSA_MPU mpu.c)
2929
zephyr_library_sources_ifdef(CONFIG_USERSPACE userspace.S syscall_helper.c)
3030
zephyr_library_sources_ifdef(CONFIG_LLEXT elf.c)
3131
zephyr_library_sources_ifdef(CONFIG_SMP smp.c)
32-
zephyr_library_sources_ifdef(CONFIG_XTENSA_EAGER_HIFI_SHARING xtensa_hifi.S)
32+
zephyr_library_sources_ifdef(CONFIG_XTENSA_HIFI_SHARING xtensa_hifi.S)
3333

3434
zephyr_library_sources_ifdef(
3535
CONFIG_KERNEL_VM_USE_CUSTOM_MEM_RANGE_CHECK

arch/xtensa/core/startup/reset_vector.S

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,17 @@ unpackdone:
575575
* all CPENABLE bits must be set, even though they may not always
576576
* correspond to a coprocessor.
577577
*/
578+
#ifdef CONFIG_XTENSA_LAZY_HIFI_SHARING
579+
/*
580+
* Disable HiFi coprocessor by default. Should a thread try using
581+
* the HiFi coprocessor, it will trigger an exception to both enable
582+
* it AND save/restore the HiFi state.
583+
*/
584+
585+
movi a2, 0xFF & ~(1 << XCHAL_CP_ID_AUDIOENGINELX)
586+
#else
578587
movi a2, 0xFF /* enable *all* bits, to allow dynamic TIE */
588+
#endif
579589
wsr a2, CPENABLE
580590
# endif
581591

arch/xtensa/core/vector_handlers.c

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
3131
extern char xtensa_arch_except_epc[];
3232
extern char xtensa_arch_kernel_oops_epc[];
3333

34+
extern void xtensa_lazy_hifi_save(uint8_t *regs);
35+
extern void xtensa_lazy_hifi_load(uint8_t *regs);
36+
37+
#if defined(CONFIG_XTENSA_LAZY_HIFI_SHARING) && (CONFIG_MP_MAX_NUM_CPUS > 1)
38+
#define LAZY_COPROCESSOR_LOCK
39+
40+
static struct k_spinlock coprocessor_lock;
41+
#endif
42+
43+
3444
bool xtensa_is_outside_stack_bounds(uintptr_t addr, size_t sz, uint32_t ps)
3545
{
3646
uintptr_t start, end;
@@ -279,6 +289,88 @@ static inline void *return_to(void *interrupted)
279289
#endif /* CONFIG_MULTITHREADING */
280290
}
281291

292+
#if defined(LAZY_COPROCESSOR_LOCK)
293+
/**
294+
* Spin until thread is no longer the HiFi owner on specified CPU.
295+
* Note: Interrupts are locked on entry. Unlock before spinning to allow
296+
* an IPI to be caught and processed; restore them afterwards.
297+
*/
298+
static void spin_while_hifi_owner(struct _cpu *cpu, struct k_thread *thread)
299+
{
300+
unsigned int key;
301+
unsigned int original;
302+
unsigned int unlocked;
303+
304+
__asm__ volatile("rsr.ps %0" : "=r"(original));
305+
unlocked = original & ~PS_INTLEVEL_MASK;
306+
__asm__ volatile("wsr.ps %0; rsync" :: "r"(unlocked) : "memory");
307+
308+
/* Spin until thread is no longer the HiFi owner on the other CPU */
309+
310+
while ((struct k_thread *)
311+
atomic_ptr_get(&cpu->arch.hifi_owner) == thread) {
312+
key = arch_irq_lock();
313+
arch_spin_relax();
314+
arch_irq_unlock(key);
315+
}
316+
317+
__asm__ volatile("wsr.ps %0; rsync" :: "r"(original) : "memory");
318+
}
319+
320+
/**
321+
* Determine if the thread is the owner of a HiFi on another CPU. This is
322+
* called with the coprocessor lock held
323+
*/
324+
static struct _cpu *thread_hifi_owner_elsewhere(struct k_thread *thread)
325+
{
326+
struct _cpu *this_cpu = arch_curr_cpu();
327+
struct k_thread *owner;
328+
329+
for (unsigned int i = 0; i < CONFIG_MP_MAX_NUM_CPUS; i++) {
330+
owner = (struct k_thread *)
331+
atomic_ptr_get(&_kernel.cpus[i].arch.hifi_owner);
332+
if ((this_cpu != &_kernel.cpus[i]) && (owner == thread)) {
333+
return &_kernel.cpus[i];
334+
}
335+
}
336+
return NULL;
337+
}
338+
#endif
339+
340+
/**
341+
* This routine only needed for SMP systems with HiFi sharing. It handles the
342+
* IPI sent to save the HiFi registers so the owner can load them onto another
343+
* CPU.
344+
*/
345+
void arch_ipi_lazy_coprocessors_save(void)
346+
{
347+
#if defined(LAZY_COPROCESSOR_LOCK)
348+
k_spinlock_key_t key = k_spin_lock(&coprocessor_lock);
349+
struct _cpu *cpu = arch_curr_cpu();
350+
struct k_thread *save_hifi = (struct k_thread *)
351+
atomic_ptr_get(&cpu->arch.save_hifi);
352+
struct k_thread *hifi_owner = (struct k_thread *)
353+
atomic_ptr_get(&cpu->arch.hifi_owner);
354+
355+
if ((save_hifi == hifi_owner) && (save_hifi != NULL)) {
356+
unsigned int cp;
357+
358+
__asm__ volatile("rsr.cpenable %0" : "=r"(cp));
359+
cp |= BIT(XCHAL_CP_ID_AUDIOENGINELX);
360+
__asm__ volatile("wsr.cpenable %0" :: "r"(cp));
361+
362+
xtensa_lazy_hifi_save(save_hifi->arch.hifi_regs);
363+
364+
cp &= ~BIT(XCHAL_CP_ID_AUDIOENGINELX);
365+
__asm__ volatile("wsr.cpenable %0" :: "r"(cp));
366+
367+
atomic_ptr_set(&cpu->arch.hifi_owner, NULL);
368+
}
369+
atomic_ptr_set(&cpu->arch.save_hifi, NULL);
370+
k_spin_unlock(&coprocessor_lock, key);
371+
#endif
372+
}
373+
282374
/* The wrapper code lives here instead of in the python script that
283375
* generates _xtensa_handle_one_int*(). Seems cleaner, still kind of
284376
* ugly.
@@ -484,6 +576,59 @@ void *xtensa_excint1_c(void *esf)
484576
bsa->pc += 3;
485577
break;
486578
#endif /* !CONFIG_USERSPACE */
579+
#ifdef CONFIG_XTENSA_LAZY_HIFI_SHARING
580+
case EXCCAUSE_CP_DISABLED(XCHAL_CP_ID_AUDIOENGINELX):
581+
/* Identify the interrupted thread and the old HiFi owner */
582+
struct k_thread *thread = _current;
583+
struct k_thread *owner;
584+
unsigned int cp;
585+
586+
#if defined(LAZY_COPROCESSOR_LOCK)
587+
/*
588+
* If the interrupted thread is a HiFi owner on another CPU,
589+
* then send an IPI to that CPU to have it save its HiFi state
590+
* and then return. This CPU will continue to raise the current
591+
* exception (and send IPIs) until the other CPU has both saved
592+
* the HiFi registers and cleared its HiFi owner.
593+
*/
594+
595+
k_spinlock_key_t key = k_spin_lock(&coprocessor_lock);
596+
struct _cpu *cpu = thread_hifi_owner_elsewhere(thread);
597+
598+
if (cpu != NULL) {
599+
cpu->arch.save_hifi = thread;
600+
arch_sched_directed_ipi(BIT(cpu->id));
601+
k_spin_unlock(&coprocessor_lock, key);
602+
spin_while_hifi_owner(cpu, thread);
603+
key = k_spin_lock(&coprocessor_lock);
604+
}
605+
#endif
606+
owner = (struct k_thread *)
607+
atomic_ptr_get(&arch_curr_cpu()->arch.hifi_owner);
608+
609+
/* Enable the HiFi coprocessor */
610+
__asm__ volatile("rsr.cpenable %0" : "=r"(cp));
611+
cp |= BIT(XCHAL_CP_ID_AUDIOENGINELX);
612+
__asm__ volatile("wsr.cpenable %0" :: "r"(cp));
613+
614+
if (owner == thread) {
615+
#if defined(LAZY_COPROCESSOR_LOCK)
616+
k_spin_unlock(&coprocessor_lock, key);
617+
#endif
618+
break;
619+
}
620+
621+
if (owner != NULL) {
622+
xtensa_lazy_hifi_save(owner->arch.hifi_regs);
623+
}
624+
625+
atomic_ptr_set(&arch_curr_cpu()->arch.hifi_owner, thread);
626+
#if defined(LAZY_COPROCESSOR_LOCK)
627+
k_spin_unlock(&coprocessor_lock, key);
628+
#endif
629+
xtensa_lazy_hifi_load(thread->arch.hifi_regs);
630+
break;
631+
#endif /* CONFIG_XTENSA_LAZY_HIFI_SHARING */
487632
default:
488633
reason = K_ERR_CPU_EXCEPTION;
489634

@@ -549,6 +694,9 @@ void *xtensa_excint1_c(void *esf)
549694
#ifndef CONFIG_USERSPACE
550695
case EXCCAUSE_SYSCALL:
551696
#endif /* !CONFIG_USERSPACE */
697+
#ifdef CONFIG_XTENSA_LAZY_HIFI_SHARING
698+
case EXCCAUSE_CP_DISABLED(XCHAL_CP_ID_AUDIOENGINELX):
699+
#endif /* CONFIG_XTENSA_LAZY_HIFI_SHARING */
552700
is_fatal_error = false;
553701
break;
554702
default:

arch/xtensa/core/xtensa_asm2_util.S

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,12 @@ xtensa_switch:
307307

308308
#if defined(CONFIG_XTENSA_EAGER_HIFI_SHARING)
309309
call0 _xtensa_hifi_save
310+
#elif defined(CONFIG_XTENSA_LAZY_HIFI_SHARING)
311+
/* Disable HiFi sharing */
312+
rsr a6, CPENABLE
313+
movi a7, ~(1 << XCHAL_CP_ID_AUDIOENGINELX)
314+
and a6, a6, a7
315+
wsr a6, CPENABLE
310316
#endif
311317

312318
/* Now the high registers */

arch/xtensa/core/xtensa_hifi.S

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <xtensa/config/tie.h>
1010
#include <xtensa/config/tie-asm.h>
1111

12+
#if defined(CONFIG_XTENSA_EAGER_HIFI_SHARING)
1213
/*
1314
* Load the HiFi registers from the hifi buffer in the BSA. Round the address
1415
* of this buffer up to XCHAL_CP1_SA_ALIGN bytes to guarantee the necessary
@@ -51,3 +52,32 @@ _xtensa_hifi_save:
5152
xchal_cp1_store a2 a3 a3 a3 a3 /* Only A2 and A3 are used by macro */
5253

5354
ret
55+
#elif defined(CONFIG_XTENSA_LAZY_HIFI_SHARING)
56+
/*
57+
* Load the HiFi registers from the HiFi buffer in the k_thread structure.
58+
*/
59+
.global xtensa_lazy_hifi_load
60+
.align 4
61+
xtensa_lazy_hifi_load:
62+
entry a1, 32
63+
/* Spill registers onto stack */
64+
call8 xthal_window_spill
65+
66+
/* A2 should be address of hifi storage; A3 is scratch */
67+
xchal_cp1_load a2 a3 a3 a3 a3
68+
retw
69+
70+
/*
71+
* Save the HiFi registers to the HiFi buffer in the k_thread structure.
72+
*/
73+
.global xtensa_lazy_hifi_save
74+
.align 4
75+
xtensa_lazy_hifi_save:
76+
entry a1, 32
77+
/* Spill registers onto stack */
78+
call8 xthal_window_spill
79+
80+
/* A2 should be address of hifi storage; A3 is scratch */
81+
xchal_cp1_store a2 a3 a3 a3 a3 /* Only A2 and A3 are used by macro */
82+
retw
83+
#endif

kernel/include/kernel_arch_interface.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,15 @@ void arch_switch_to_main_thread(struct k_thread *main_thread, char *stack_ptr,
175175
k_thread_entry_t _main);
176176
#endif /* CONFIG_ARCH_HAS_CUSTOM_SWAP_TO_MAIN */
177177

178+
/**
179+
* @brief Save coprocessor states on an IPI
180+
*
181+
* The function, invoked by the IPI handler, is used by cross-CPU lazy context
182+
* switches. It saves the relevant coprocessor context(s) before signalling the
183+
* waiting CPU that it has finished.
184+
*/
185+
void arch_ipi_lazy_coprocessors_save(void);
186+
178187
#if defined(CONFIG_FPU) && defined(CONFIG_FPU_SHARING)
179188
/**
180189
* @brief Disable floating point context preservation

kernel/ipi.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,8 @@ void z_sched_ipi(void)
105105
z_time_slice();
106106
}
107107
#endif /* CONFIG_TIMESLICING */
108+
109+
#ifdef CONFIG_ARCH_IPI_LAZY_COPROCESSORS_SAVE
110+
arch_ipi_lazy_coprocessors_save();
111+
#endif
108112
}

0 commit comments

Comments
 (0)