zephyrproject-rtos · ycsin · Nov 12, 2024 · Nov 12, 2024
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
@@ -37,6 +37,40 @@ config RISCV_ALWAYS_SWITCH_THROUGH_ECALL
 	  and most people should say n here to minimize context switching
 	  overhead.
 
+choice RISCV_SMP_IPI_IMPL
+	prompt "RISC-V SMP IPI implementation"
+	depends on SMP
+	default RISCV_SMP_IPI_CLINT if DT_HAS_SIFIVE_CLINT0_ENABLED
+	default RISCV_SMP_IPI_PLIC if PLIC_SUPPORTS_SOFT_INTERRUPT && PLIC_IRQ_AFFINITY
+	default RISCV_SMP_IPI_CUSTOM
+
+config RISCV_SMP_IPI_CLINT
+	bool "CLINT-based IPI"
+	depends on DT_HAS_SIFIVE_CLINT0_ENABLED
+	help
+	  Use CLINT-based IPI implementation.
+
+config RISCV_SMP_IPI_PLIC
+	bool "PLIC-based IPI"
+	depends on PLIC_SUPPORTS_SOFT_INTERRUPT
+	depends on PLIC_IRQ_AFFINITY
+	depends on !FPU_SHARING # not supported for now
+	help
+	  Use PLIC-based IPI implementation.
+
+config RISCV_SMP_IPI_CUSTOM
+	bool "Custom IPI implementation"
+	help
+	  Allow custom IPI implementation.
+
+	  When this is selected, the following functions must be provided:
+	   - arch_sched_directed_ipi()
+	   - arch_flush_fpu_ipi() if CONFIG_FPU_SHARING
+	   - arch_spin_relax() if CONFIG_FPU_SHARING
+	   - arch_smp_init()
+
+endchoice # RISCV_SMP_IPI_IMPL
+
 menu "RISCV Processor Options"
 
 config INCLUDE_RESET_VECTOR

diff --git a/arch/riscv/core/CMakeLists.txt b/arch/riscv/core/CMakeLists.txt
@@ -17,6 +17,13 @@ if ((CONFIG_MP_MAX_NUM_CPUS GREATER 1) OR (CONFIG_SMP))
   zephyr_library_sources(smp.c)
 endif ()
 
+if (CONFIG_SMP)
+  zephyr_library_sources(ipi.c)
+
+  zephyr_library_sources_ifdef(CONFIG_RISCV_SMP_IPI_CLINT ipi_clint.c)
+  zephyr_library_sources_ifdef(CONFIG_RISCV_SMP_IPI_PLIC ipi_plic.c)
+endif()
+
 zephyr_library_sources_ifdef(CONFIG_FPU_SHARING fpu.c fpu.S)
 zephyr_library_sources_ifdef(CONFIG_DEBUG_COREDUMP coredump.c)
 zephyr_library_sources_ifdef(CONFIG_IRQ_OFFLOAD irq_offload.c)

diff --git a/arch/riscv/core/ipi.c b/arch/riscv/core/ipi.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) 2021 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <ipi.h>
+
+#include <zephyr/kernel.h>
+
+void arch_sched_broadcast_ipi(void)
+{
+	arch_sched_directed_ipi(IPI_ALL_CPUS_MASK);
+}
diff --git a/arch/riscv/core/ipi_clint.c b/arch/riscv/core/ipi_clint.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2021 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <ipi.h>
+#include <ksched.h>
+
+#include <zephyr/kernel.h>
+
+#define MSIP_BASE    0x2000000UL
+#define MSIP(hartid) ((volatile uint32_t *)MSIP_BASE)[hartid]
+
+static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS];
+#define IPI_SCHED     0
+#define IPI_FPU_FLUSH 1
+
+void arch_sched_directed_ipi(uint32_t cpu_bitmap)
+{
+	unsigned int key = arch_irq_lock();
+	unsigned int id = _current_cpu->id;
+	unsigned int num_cpus = arch_num_cpus();
+
+	for (unsigned int i = 0; i < num_cpus; i++) {
+		if ((i != id) && _kernel.cpus[i].arch.online && ((cpu_bitmap & BIT(i)) != 0)) {
+			atomic_set_bit(&cpu_pending_ipi[i], IPI_SCHED);
+			MSIP(_kernel.cpus[i].arch.hartid) = 1;
+		}
+	}
+
+	arch_irq_unlock(key);
+}
+
+#ifdef CONFIG_FPU_SHARING
+void arch_flush_fpu_ipi(unsigned int cpu)
+{
+	atomic_set_bit(&cpu_pending_ipi[cpu], IPI_FPU_FLUSH);
+	MSIP(_kernel.cpus[cpu].arch.hartid) = 1;
+}
+#endif /* CONFIG_FPU_SHARING */
+
+static void sched_ipi_handler(const void *unused)
+{
+	ARG_UNUSED(unused);
+
+	MSIP(csr_read(mhartid)) = 0;
+
+	atomic_val_t pending_ipi = atomic_clear(&cpu_pending_ipi[_current_cpu->id]);
+
+	if (pending_ipi & ATOMIC_MASK(IPI_SCHED)) {
+		z_sched_ipi();
+	}
+#ifdef CONFIG_FPU_SHARING
+	if (pending_ipi & ATOMIC_MASK(IPI_FPU_FLUSH)) {
+		/* disable IRQs */
+		csr_clear(mstatus, MSTATUS_IEN);
+		/* perform the flush */
+		arch_flush_local_fpu();
+		/*
+		 * No need to re-enable IRQs here as long as
+		 * this remains the last case.
+		 */
+	}
+#endif /* CONFIG_FPU_SHARING */
+}
+
+#ifdef CONFIG_FPU_SHARING
+/*
+ * Make sure there is no pending FPU flush request for this CPU while
+ * waiting for a contended spinlock to become available. This prevents
+ * a deadlock when the lock we need is already taken by another CPU
+ * that also wants its FPU content to be reinstated while such content
+ * is still live in this CPU's FPU.
+ */
+void arch_spin_relax(void)
+{
+	atomic_val_t *pending_ipi = &cpu_pending_ipi[_current_cpu->id];
+
+	if (atomic_test_and_clear_bit(pending_ipi, IPI_FPU_FLUSH)) {
+		/*
+		 * We may not be in IRQ context here hence cannot use
+		 * arch_flush_local_fpu() directly.
+		 */
+		arch_float_disable(_current_cpu->arch.fpu_owner);
+	}
+}
+#endif /* CONFIG_FPU_SHARING */
+
+int arch_smp_init(void)
+{
+
+	IRQ_CONNECT(RISCV_IRQ_MSOFT, 0, sched_ipi_handler, NULL, 0);
+	irq_enable(RISCV_IRQ_MSOFT);
+
+	return 0;
+}
diff --git a/arch/riscv/core/ipi_plic.c b/arch/riscv/core/ipi_plic.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2024 Meta Platforms
+ * Copyright (c) 2021 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <ipi.h>
+
+#include <zephyr/devicetree.h>
+#include <zephyr/drivers/interrupt_controller/riscv_plic.h>
+#include <zephyr/kernel.h>
+#include <zephyr/sys/util.h>
+
+#ifdef CONFIG_FPU_SHARING
+#define FPU_IPI_NODE DT_NODELABEL(fpu_ipi)
+#define FPU_IPI_NUM_IRQS DT_NUM_IRQS(FPU_IPI_NODE)
+#define FPU_IPI_IRQ(n) DT_IRQN_BY_IDX(FPU_IPI_NODE, n + CONFIG_MP_MAX_NUM_CPUS)
+#define FPU_IPI_IRQS_FN(n, _) DT_IRQN_BY_IDX(FPU_IPI_NODE, n)
+static const uint32_t fpu_ipi_irqs[FPU_IPI_NUM_IRQS] = {
+	LISTIFY(FPU_IPI_NUM_IRQS, FPU_IPI_IRQS_FN, (,)),
+};
+
+static ALWAYS_INLINE void send_fpu_ipi(int cpu)
+{
+	riscv_plic_irq_set_pending(fpu_ipi_irqs[cpu]);
+}
+
+static ALWAYS_INLINE bool fpu_ipi_irq_is_pending(int cpu)
+{
+	return riscv_plic_irq_is_pending(fpu_ipi_irqs[cpu]);
+}
+
+static ALWAYS_INLINE void fpu_ipi_irq_clear_pending(int cpu)
+{
+	riscv_plic_irq_clear_pending(fpu_ipi_irqs[cpu]);
+}
+
+static void fpu_ipi_handler(const void *arg)
+{
+	ARG_UNUSED(arg);
+
+	/* disable IRQs */
+	csr_clear(mstatus, MSTATUS_IEN);
+	/* perform the flush */
+	arch_flush_local_fpu();
+	/*
+	 * No need to re-enable IRQs here as long as
+	 * this remains the last case.
+	 */
+}
+
+void arch_flush_fpu_ipi(unsigned int cpu)
+{
+	send_fpu_ipi(i);
+}
+
+/*
+ * Make sure there is no pending FPU flush request for this CPU while
+ * waiting for a contended spinlock to become available. This prevents
+ * a deadlock when the lock we need is already taken by another CPU
+ * that also wants its FPU content to be reinstated while such content
+ * is still live in this CPU's FPU.
+ */
+void arch_spin_relax(void)
+{
+	int cpu = _current_cpu->id;
+
+	if (fpu_ipi_irq_is_pending(cpu)) {
+		fpu_ipi_irq_clear_pending(cpu);
+		/*
+		 * We may not be in IRQ context here hence cannot use
+		 * arch_flush_local_fpu() directly.
+		 */
+		arch_float_disable(_current_cpu->arch.fpu_owner);
+	}
+}
+#define FPU_IPI_IRQ_CONNECT(n, _)                                                                  \
+	IRQ_CONNECT(FPU_IPI_IRQ(n), 1, fpu_ipi_handler, UINT_TO_POINTER(n), 0);                    \
+	irq_enable(FPU_IPI_IRQ(n));                                                                \
+	riscv_plic_irq_set_affinity(FPU_IPI_IRQ(n), BIT(n))
+
+#define fpu_ipi_irqs_setup() LISTIFY(CONFIG_MP_MAX_NUM_CPUS, FPU_IPI_IRQ_CONNECT, (;))
+#else
+#define fpu_ipi_irqs_setup()
+#endif /* CONFIG_FPU_SHARING */
+
+#define SCHED_IPI_NODE DT_NODELABEL(sched_ipi)
+#define SCHED_IPI_NUM_IRQS DT_NUM_IRQS(SCHED_IPI_NODE)
+#define SCHED_IPI_IRQ(n) DT_IRQN_BY_IDX(SCHED_IPI_NODE, n)
+#define SCHED_IPI_IRQS_FN(n, _) DT_IRQN_BY_IDX(SCHED_IPI_NODE, n)
+static const uint32_t sched_ipi_irqs[SCHED_IPI_NUM_IRQS] = {
+	LISTIFY(SCHED_IPI_NUM_IRQS, SCHED_IPI_IRQS_FN, (,)),
+};
+
+static ALWAYS_INLINE void send_sched_ipi(int cpu)
+{
+	riscv_plic_irq_set_pending(sched_ipi_irqs[cpu]);
+}
+
+void arch_sched_directed_ipi(uint32_t cpu_bitmap)
+{
+	unsigned int key = arch_irq_lock();
+	unsigned int id = _current_cpu->id;
+	unsigned int num_cpus = arch_num_cpus();
+
+	for (unsigned int i = 0; i < num_cpus; i++) {
+		if ((i != id) && _kernel.cpus[i].arch.online && ((cpu_bitmap & BIT(i)) != 0)) {
+			send_sched_ipi(i);
+		}
+	}
+
+	arch_irq_unlock(key);
+}
+
+static void sched_ipi_handler(const void *arg)
+{
+	ARG_UNUSED(arg);
+
+	z_sched_ipi();
+}
+
+#define SCHED_IPI_IRQ_CONNECT(n, _)                                                                \
+	IRQ_CONNECT(SCHED_IPI_IRQ(n), 1, sched_ipi_handler, UINT_TO_POINTER(n), 0);                \
+	irq_enable(SCHED_IPI_IRQ(n));                                                              \
+	riscv_plic_irq_set_affinity(SCHED_IPI_IRQ(n), BIT(n))
+
+#define sched_ipi_irqs_setup() LISTIFY(CONFIG_MP_MAX_NUM_CPUS, SCHED_IPI_IRQ_CONNECT, (;))
+
+int arch_smp_init(void)
+{
+	sched_ipi_irqs_setup();
+	fpu_ipi_irqs_setup();
+
+	return 0;
+}
+