diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6c42061ca20e58..9e895e9ca65577 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4393,7 +4393,7 @@ as generic guest with no PV drivers. Currently support XEN HVM, KVM, HYPER_V and VMWARE guest. - nopvspin [X86,XEN,KVM,EARLY] + nopvspin [X86,RISCV,XEN,KVM,EARLY] Disables the qspinlock slow path using PV optimizations which allow the hypervisor to 'idle' the guest on lock contention. diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fadec20b87a8e3..7d29370e631890 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1111,6 +1111,18 @@ config PARAVIRT_TIME_ACCOUNTING If in doubt, say N here. +config PARAVIRT_SPINLOCKS + bool "Paravirtualization layer for spinlocks" + depends on QUEUED_SPINLOCKS + default y + help + Paravirtualized spinlocks allow a unfair qspinlock to replace the + test-set kvm-guest virt spinlock implementation with something + virtualization-friendly, for example, halt the virtual CPU rather + than spinning. + + If you are unsure how to answer this question, answer Y. + config RELOCATABLE bool "Build a relocatable kernel" depends on !XIP_KERNEL diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index bd5fc940329534..1258bd239b4925 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -13,6 +13,5 @@ generic-y += spinlock_types.h generic-y += ticket_spinlock.h generic-y += qrwlock.h generic-y += qrwlock_types.h -generic-y += qspinlock.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 3497489e04db48..d0df83ecd9fddb 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -107,6 +107,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_sta; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_fwft; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pvlock; #ifdef CONFIG_RISCV_PMU_SBI extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu; diff --git a/arch/riscv/include/asm/qspinlock.h b/arch/riscv/include/asm/qspinlock.h new file mode 100644 index 00000000000000..70ad7679fce0a4 --- /dev/null +++ b/arch/riscv/include/asm/qspinlock.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c), 2025 Alibaba Damo Academy + * Authors: + * Guo Ren + */ + +#ifndef _ASM_RISCV_QSPINLOCK_H +#define _ASM_RISCV_QSPINLOCK_H + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#include + +/* How long a lock should spin before we consider blocking */ +#define SPIN_THRESHOLD (1 << 15) + +extern bool nopvspin; + +void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +void __pv_init_lock_hash(void); +void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); + +static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) +{ + static_call(pv_queued_spin_lock_slowpath)(lock, val); +} + +#define queued_spin_unlock queued_spin_unlock +static inline void queued_spin_unlock(struct qspinlock *lock) +{ + static_call(pv_queued_spin_unlock)(lock); +} +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +#include +#include + +/* + * The KVM guests fall back to a Test-and-Set spinlock, because fair locks + * have horrible lock 'holder' preemption issues. The test_and_set_spinlock_key + * would shortcut for the queued_spin_lock_slowpath() function that allow + * virt_spin_lock to hijack it. + */ +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); + +#define virt_spin_lock rv_virt_spin_lock +static inline bool rv_virt_spin_lock(struct qspinlock *lock) +{ + if (!static_branch_likely(&virt_spin_lock_key)) + return false; + + do { + smp_cond_load_relaxed((s32 *)&lock->val, VAL == 0); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); + + return true; +} + +#endif /* _ASM_RISCV_QSPINLOCK_H */ diff --git a/arch/riscv/include/asm/qspinlock_paravirt.h b/arch/riscv/include/asm/qspinlock_paravirt.h new file mode 100644 index 00000000000000..ded8c5a399bb6c --- /dev/null +++ b/arch/riscv/include/asm/qspinlock_paravirt.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c), 2025 Alibaba Damo Academy + * Authors: + * Guo Ren + */ + +#ifndef _ASM_RISCV_QSPINLOCK_PARAVIRT_H +#define _ASM_RISCV_QSPINLOCK_PARAVIRT_H + +void pv_wait(u8 *ptr, u8 val); +void pv_kick(int cpu); + +void dummy_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +void dummy_queued_spin_unlock(struct qspinlock *lock); + +DECLARE_STATIC_CALL(pv_queued_spin_lock_slowpath, dummy_queued_spin_lock_slowpath); +DECLARE_STATIC_CALL(pv_queued_spin_unlock, dummy_queued_spin_unlock); + +bool __init pv_qspinlock_init(void); + +void __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked); + +bool pv_is_native_spin_unlock(void); + +void __pv_queued_spin_unlock(struct qspinlock *lock); + +#endif /* _ASM_RISCV_QSPINLOCK_PARAVIRT_H */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index ccc77a89b1e221..dd0734e1ebb646 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -37,6 +37,7 @@ enum sbi_ext_id { SBI_EXT_NACL = 0x4E41434C, SBI_EXT_FWFT = 0x46574654, SBI_EXT_MPXY = 0x4D505859, + SBI_EXT_PVLOCK = 0x50564C4B, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x08000000, @@ -505,6 +506,10 @@ enum sbi_mpxy_rpmi_attribute_id { #define SBI_MPXY_CHAN_CAP_SEND_WITHOUT_RESP BIT(4) #define SBI_MPXY_CHAN_CAP_GET_NOTIFICATIONS BIT(5) +enum sbi_ext_pvlock_fid { + SBI_EXT_PVLOCK_KICK_CPU = 0, +}; + /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 #define SBI_SPEC_VERSION_MAJOR_SHIFT 24 diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 759a4852c09a56..9d447995de84b7 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -211,6 +211,7 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_STA, KVM_RISCV_SBI_EXT_SUSP, KVM_RISCV_SBI_EXT_FWFT, + KVM_RISCV_SBI_EXT_PVLOCK, KVM_RISCV_SBI_EXT_MAX, }; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f60fce69b7259f..6ea874bcd447e0 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -125,3 +125,5 @@ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o + +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += qspinlock_paravirt.o diff --git a/arch/riscv/kernel/qspinlock_paravirt.c b/arch/riscv/kernel/qspinlock_paravirt.c new file mode 100644 index 00000000000000..b89f13d0d6e82a --- /dev/null +++ b/arch/riscv/kernel/qspinlock_paravirt.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c), 2025 Alibaba Damo Academy + * Authors: + * Guo Ren + */ + +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include "trace_events_filter_paravirt.h" + +void pv_kick(int cpu) +{ + trace_pv_kick(smp_processor_id(), cpu); + + sbi_ecall(SBI_EXT_PVLOCK, SBI_EXT_PVLOCK_KICK_CPU, + cpuid_to_hartid_map(cpu), 0, 0, 0, 0, 0); + return; +} + +void pv_wait(u8 *ptr, u8 val) +{ + unsigned long flags; + + if (in_nmi()) + return; + + local_irq_save(flags); + if (READ_ONCE(*ptr) != val) + goto out; + + wait_for_interrupt(); + + trace_pv_wait(smp_processor_id()); +out: + local_irq_restore(flags); +} + +static void native_queued_spin_unlock(struct qspinlock *lock) +{ + smp_store_release(&lock->locked, 0); +} + +DEFINE_STATIC_CALL(pv_queued_spin_lock_slowpath, native_queued_spin_lock_slowpath); +EXPORT_STATIC_CALL(pv_queued_spin_lock_slowpath); + +DEFINE_STATIC_CALL(pv_queued_spin_unlock, native_queued_spin_unlock); +EXPORT_STATIC_CALL(pv_queued_spin_unlock); + +DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); + +bool __init pv_qspinlock_init(void) +{ + if (num_possible_cpus() == 1) + return false; + + if (!sbi_probe_extension(SBI_EXT_PVLOCK)) + return false; + + if (nopvspin) { + static_branch_enable(&virt_spin_lock_key); + pr_info("virt_spin_lock enabled by nopvspin\n"); + return true; + } + + pr_info("PV qspinlocks enabled\n"); + __pv_init_lock_hash(); + + static_call_update(pv_queued_spin_lock_slowpath, __pv_queued_spin_lock_slowpath); + static_call_update(pv_queued_spin_unlock, __pv_queued_spin_unlock); + + return true; +} + +bool pv_is_native_spin_unlock(void) +{ + if (static_call_query(pv_queued_spin_unlock) == native_queued_spin_unlock) + return true; + else + return false; +} diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b5bc5fc65cea65..0df27501e28ddf 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -288,6 +288,11 @@ static void __init riscv_spinlock_init(void) return; } +#ifdef CONFIG_PARAVIRT_SPINLOCKS + if (pv_qspinlock_init()) + return; +#endif + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && diff --git a/arch/riscv/kernel/trace_events_filter_paravirt.h b/arch/riscv/kernel/trace_events_filter_paravirt.h new file mode 100644 index 00000000000000..db5e702a1f12cd --- /dev/null +++ b/arch/riscv/kernel/trace_events_filter_paravirt.h @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c), 2025 Alibaba Damo Academy + * Authors: + * Guo Ren + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM paravirt + +#if !defined(_TRACE_PARAVIRT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PARAVIRT_H + +#include + +TRACE_EVENT(pv_kick, + TP_PROTO(int cpu, int target), + TP_ARGS(cpu, target), + + TP_STRUCT__entry( + __field(int, cpu) + __field(int, target) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->target = target; + ), + + TP_printk("cpu %d pv_kick target cpu %d", + __entry->cpu, + __entry->target + ) +); + +TRACE_EVENT(pv_wait, + TP_PROTO(int cpu), + TP_ARGS(cpu), + + TP_STRUCT__entry( + __field(int, cpu) + ), + + TP_fast_assign( + __entry->cpu = cpu; + ), + + TP_printk("cpu %d out of wfi", + __entry->cpu + ) +); + +#endif /* _TRACE_PARAVIRT_H || TRACE_HEADER_MULTI_READ */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../../arch/riscv/kernel/ +#define TRACE_INCLUDE_FILE trace_events_filter_paravirt + +/* This part must be outside protection */ +#include diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 07197395750e38..40ddb7c06ffef3 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -35,6 +35,7 @@ kvm-y += vcpu_sbi_sta.o kvm-y += vcpu_sbi_system.o kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o kvm-y += vcpu_switch.o +kvm-y += vcpu_sbi_pvlock.o kvm-y += vcpu_timer.o kvm-y += vcpu_vector.o kvm-y += vm.o diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 1b13623380e158..dd74f789f44cdc 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -90,6 +90,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .ext_idx = KVM_RISCV_SBI_EXT_VENDOR, .ext_ptr = &vcpu_sbi_ext_vendor, }, + { + .ext_idx = KVM_RISCV_SBI_EXT_PVLOCK, + .ext_ptr = &vcpu_sbi_ext_pvlock, + }, }; static const struct kvm_riscv_sbi_extension_entry * diff --git a/arch/riscv/kvm/vcpu_sbi_pvlock.c b/arch/riscv/kvm/vcpu_sbi_pvlock.c new file mode 100644 index 00000000000000..aeb48c3fca50a7 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_pvlock.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c), 2025 Alibaba Damo Academy + * + * Authors: + * Guo Ren + */ + +#include +#include +#include +#include +#include + +static int kvm_sbi_ext_pvlock_kick_cpu(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *target; + + target = kvm_get_vcpu_by_id(kvm, cp->a0); + if (!target) + return SBI_ERR_INVALID_PARAM; + + kvm_vcpu_kick(target); + + if (READ_ONCE(target->ready)) + kvm_vcpu_yield_to(target); + + return SBI_SUCCESS; +} + +static int kvm_sbi_ext_pvlock_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + int ret = 0; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + + switch (funcid) { + case SBI_EXT_PVLOCK_KICK_CPU: + ret = kvm_sbi_ext_pvlock_kick_cpu(vcpu); + break; + default: + ret = SBI_ERR_NOT_SUPPORTED; + } + + retdata->err_val = ret; + + return 0; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pvlock = { + .extid_start = SBI_EXT_PVLOCK, + .extid_end = SBI_EXT_PVLOCK, + .handler = kvm_sbi_ext_pvlock_handler, +};