Skip to content

Commit dd3e401

Browse files
committed
Merge tag 'x86_urgent_for_v5.13_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Borislav Petkov: "A bunch of things accumulated for x86 in the last two weeks: - Fix guest vtime accounting so that ticks happening while the guest is running can also be accounted to it. Along with a consolidation to the guest-specific context tracking helpers. - Provide for the host NMI handler running after a VMX VMEXIT to be able to run on the kernel stack correctly. - Initialize MSR_TSC_AUX when RDPID is supported and not RDTSCP (virt relevant - real hw supports both) - A code generation improvement to TASK_SIZE_MAX through the use of alternatives - The usual misc and related cleanups and improvements" * tag 'x86_urgent_for_v5.13_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: KVM: x86: Consolidate guest enter/exit logic to common helpers context_tracking: KVM: Move guest enter/exit wrappers to KVM's domain context_tracking: Consolidate guest enter/exit wrappers sched/vtime: Move guest enter/exit vtime accounting to vtime.h sched/vtime: Move vtime accounting external declarations above inlines KVM: x86: Defer vtime accounting 'til after IRQ handling context_tracking: Move guest exit vtime accounting to separate helpers context_tracking: Move guest exit context tracking to separate helpers KVM/VMX: Invoke NMI non-IST entry instead of IST entry x86/cpu: Remove write_tsc() and write_rdtscp_aux() wrappers x86/cpu: Initialize MSR_TSC_AUX if RDTSCP *or* RDPID is supported x86/resctrl: Fix init const confusion x86: Delete UD0, UD1 traces x86/smpboot: Remove duplicate includes x86/cpu: Use alternative to generate the TASK_SIZE_MAX constant
2 parents b741596 + bc908e0 commit dd3e401

File tree

16 files changed

+263
-233
lines changed

16 files changed

+263
-233
lines changed

arch/x86/include/asm/bug.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,9 @@
77

88
/*
99
* Despite that some emulators terminate on UD2, we use it for WARN().
10-
*
11-
* Since various instruction decoders/specs disagree on the encoding of
12-
* UD0/UD1.
1310
*/
14-
15-
#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
16-
#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */
1711
#define ASM_UD2 ".byte 0x0f, 0x0b"
18-
19-
#define INSN_UD0 0xff0f
2012
#define INSN_UD2 0x0b0f
21-
2213
#define LEN_UD2 2
2314

2415
#ifdef CONFIG_GENERIC_BUG

arch/x86/include/asm/idtentry.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,21 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check);
588588
#endif
589589

590590
/* NMI */
591+
592+
#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
593+
/*
594+
* Special NOIST entry point for VMX which invokes this on the kernel
595+
* stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI
596+
* 'executing' marker.
597+
*
598+
* On 32bit this just uses the regular NMI entry point because 32-bit does
599+
* not have ISTs.
600+
*/
601+
DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_noist);
602+
#else
603+
#define asm_exc_nmi_noist asm_exc_nmi
604+
#endif
605+
591606
DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi);
592607
#ifdef CONFIG_XEN_PV
593608
DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi);

arch/x86/include/asm/msr.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,6 @@ static inline int wrmsrl_safe(u32 msr, u64 val)
324324
return wrmsr_safe(msr, (u32)val, (u32)(val >> 32));
325325
}
326326

327-
#define write_tsc(low, high) wrmsr(MSR_IA32_TSC, (low), (high))
328-
329-
#define write_rdtscp_aux(val) wrmsr(MSR_TSC_AUX, (val), 0)
330-
331327
struct msr *msrs_alloc(void);
332328
void msrs_free(struct msr *msrs);
333329
int msr_set_bit(u32 msr, u8 bit);

arch/x86/include/asm/page_64.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,39 @@ static inline void clear_page(void *page)
5656

5757
void copy_page(void *to, void *from);
5858

59+
#ifdef CONFIG_X86_5LEVEL
60+
/*
61+
* User space process size. This is the first address outside the user range.
62+
* There are a few constraints that determine this:
63+
*
64+
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
65+
* address, then that syscall will enter the kernel with a
66+
* non-canonical return address, and SYSRET will explode dangerously.
67+
* We avoid this particular problem by preventing anything
68+
* from being mapped at the maximum canonical address.
69+
*
70+
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
71+
* CPUs malfunction if they execute code from the highest canonical page.
72+
* They'll speculate right off the end of the canonical space, and
73+
* bad things happen. This is worked around in the same way as the
74+
* Intel problem.
75+
*
76+
* With page table isolation enabled, we map the LDT in ... [stay tuned]
77+
*/
78+
static inline unsigned long task_size_max(void)
79+
{
80+
unsigned long ret;
81+
82+
alternative_io("movq %[small],%0","movq %[large],%0",
83+
X86_FEATURE_LA57,
84+
"=r" (ret),
85+
[small] "i" ((1ul << 47)-PAGE_SIZE),
86+
[large] "i" ((1ul << 56)-PAGE_SIZE));
87+
88+
return ret;
89+
}
90+
#endif /* CONFIG_X86_5LEVEL */
91+
5992
#endif /* !__ASSEMBLY__ */
6093

6194
#ifdef CONFIG_X86_VSYSCALL_EMULATION

arch/x86/include/asm/page_64_types.h

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -55,30 +55,13 @@
5555

5656
#ifdef CONFIG_X86_5LEVEL
5757
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
58+
/* See task_size_max() in <asm/page_64.h> */
5859
#else
5960
#define __VIRTUAL_MASK_SHIFT 47
61+
#define task_size_max() ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
6062
#endif
6163

62-
/*
63-
* User space process size. This is the first address outside the user range.
64-
* There are a few constraints that determine this:
65-
*
66-
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
67-
* address, then that syscall will enter the kernel with a
68-
* non-canonical return address, and SYSRET will explode dangerously.
69-
* We avoid this particular problem by preventing anything
70-
* from being mapped at the maximum canonical address.
71-
*
72-
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
73-
* CPUs malfunction if they execute code from the highest canonical page.
74-
* They'll speculate right off the end of the canonical space, and
75-
* bad things happen. This is worked around in the same way as the
76-
* Intel problem.
77-
*
78-
* With page table isolation enabled, we map the LDT in ... [stay tuned]
79-
*/
80-
#define TASK_SIZE_MAX ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
81-
64+
#define TASK_SIZE_MAX task_size_max()
8265
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
8366

8467
/* This decides where the kernel will search for a free chunk of vm

arch/x86/kernel/cpu/common.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1851,8 +1851,8 @@ static inline void setup_getcpu(int cpu)
18511851
unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
18521852
struct desc_struct d = { };
18531853

1854-
if (boot_cpu_has(X86_FEATURE_RDTSCP))
1855-
write_rdtscp_aux(cpudata);
1854+
if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID))
1855+
wrmsr(MSR_TSC_AUX, cpudata, 0);
18561856

18571857
/* Store CPU and node number in limit. */
18581858
d.limit0 = cpudata;

arch/x86/kernel/cpu/resctrl/monitor.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ unsigned int resctrl_cqm_threshold;
8484
static const struct mbm_correction_factor_table {
8585
u32 rmidthreshold;
8686
u64 cf;
87-
} mbm_cf_table[] __initdata = {
87+
} mbm_cf_table[] __initconst = {
8888
{7, CF(1.000000)},
8989
{15, CF(1.000000)},
9090
{15, CF(0.969650)},

arch/x86/kernel/nmi.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,16 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
524524
mds_user_clear_cpu_buffers();
525525
}
526526

527+
#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
528+
DEFINE_IDTENTRY_RAW(exc_nmi_noist)
529+
{
530+
exc_nmi(regs);
531+
}
532+
#endif
533+
#if IS_MODULE(CONFIG_KVM_INTEL)
534+
EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
535+
#endif
536+
527537
void stop_nmi(void)
528538
{
529539
ignore_nmis++;

arch/x86/kernel/smpboot.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,9 +1865,6 @@ static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
18651865
return true;
18661866
}
18671867

1868-
#include <asm/cpu_device_id.h>
1869-
#include <asm/intel-family.h>
1870-
18711868
#define X86_MATCH(model) \
18721869
X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
18731870
INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)

arch/x86/kvm/svm/svm.c

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3710,25 +3710,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
37103710
struct vcpu_svm *svm = to_svm(vcpu);
37113711
unsigned long vmcb_pa = svm->current_vmcb->pa;
37123712

3713-
/*
3714-
* VMENTER enables interrupts (host state), but the kernel state is
3715-
* interrupts disabled when this is invoked. Also tell RCU about
3716-
* it. This is the same logic as for exit_to_user_mode().
3717-
*
3718-
* This ensures that e.g. latency analysis on the host observes
3719-
* guest mode as interrupt enabled.
3720-
*
3721-
* guest_enter_irqoff() informs context tracking about the
3722-
* transition to guest mode and if enabled adjusts RCU state
3723-
* accordingly.
3724-
*/
3725-
instrumentation_begin();
3726-
trace_hardirqs_on_prepare();
3727-
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
3728-
instrumentation_end();
3729-
3730-
guest_enter_irqoff();
3731-
lockdep_hardirqs_on(CALLER_ADDR0);
3713+
kvm_guest_enter_irqoff();
37323714

37333715
if (sev_es_guest(vcpu->kvm)) {
37343716
__svm_sev_es_vcpu_run(vmcb_pa);
@@ -3748,24 +3730,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
37483730
vmload(__sme_page_pa(sd->save_area));
37493731
}
37503732

3751-
/*
3752-
* VMEXIT disables interrupts (host state), but tracing and lockdep
3753-
* have them in state 'on' as recorded before entering guest mode.
3754-
* Same as enter_from_user_mode().
3755-
*
3756-
* guest_exit_irqoff() restores host context and reinstates RCU if
3757-
* enabled and required.
3758-
*
3759-
* This needs to be done before the below as native_read_msr()
3760-
* contains a tracepoint and x86_spec_ctrl_restore_host() calls
3761-
* into world and some more.
3762-
*/
3763-
lockdep_hardirqs_off(CALLER_ADDR0);
3764-
guest_exit_irqoff();
3765-
3766-
instrumentation_begin();
3767-
trace_hardirqs_off_finish();
3768-
instrumentation_end();
3733+
kvm_guest_exit_irqoff();
37693734
}
37703735

37713736
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)

0 commit comments

Comments
 (0)