Skip to content

Commit 465a7e2

Browse files
committed
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "Mostly tooling fixes, but also a couple of updates for new Intel models (which are technically hw-enablement, but to users it's a fix to perf behavior on those new CPUs - hope this is fine), an AUX inheritance fix, event time-sharing fix, and a fix for lost non-perf NMI events on AMD systems" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits) perf/x86/cstate: Add Tiger Lake CPU support perf/x86/msr: Add Tiger Lake CPU support perf/x86/intel: Add Tiger Lake CPU support perf/x86/cstate: Update C-state counters for Ice Lake perf/x86/msr: Add new CPU model numbers for Ice Lake perf/x86/cstate: Add Comet Lake CPU support perf/x86/msr: Add Comet Lake CPU support perf/x86/intel: Add Comet Lake CPU support perf/x86/amd: Change/fix NMI latency mitigation to use a timestamp perf/core: Fix corner case in perf_rotate_context() perf/core: Rework memory accounting in perf_mmap() perf/core: Fix inheritance of aux_output groups perf annotate: Don't return -1 for error when doing BPF disassembly perf annotate: Return appropriate error code for allocation failures perf annotate: Fix arch specific ->init() failure errors perf annotate: Propagate the symbol__annotate() error return perf annotate: Fix the signedness of failure returns perf annotate: Propagate perf_env__arch() error perf evsel: Fall back to global 'perf_env' in perf_evsel__env() perf tools: Propagate get_cpuid() error ...
2 parents 9b4e40c + 52e92f4 commit 465a7e2

File tree

43 files changed

+411
-131
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+411
-131
lines changed

arch/x86/events/amd/core.c

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
#include <linux/init.h>
66
#include <linux/slab.h>
77
#include <linux/delay.h>
8+
#include <linux/jiffies.h>
89
#include <asm/apicdef.h>
910
#include <asm/nmi.h>
1011

1112
#include "../perf_event.h"
1213

13-
static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
14+
static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
15+
static unsigned long perf_nmi_window;
1416

1517
static __initconst const u64 amd_hw_cache_event_ids
1618
[PERF_COUNT_HW_CACHE_MAX]
@@ -641,11 +643,12 @@ static void amd_pmu_disable_event(struct perf_event *event)
641643
* handler when multiple PMCs are active or PMC overflow while handling some
642644
* other source of an NMI.
643645
*
644-
* Attempt to mitigate this by using the number of active PMCs to determine
645-
* whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
646-
* any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
647-
* number of active PMCs or 2. The value of 2 is used in case an NMI does not
648-
* arrive at the LAPIC in time to be collapsed into an already pending NMI.
646+
* Attempt to mitigate this by creating an NMI window in which un-handled NMIs
647+
* received during this window will be claimed. This prevents extending the
648+
* window past when it is possible that latent NMIs should be received. The
649+
* per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
650+
* handled a counter. When an un-handled NMI is received, it will be claimed
651+
* only if arriving within that window.
649652
*/
650653
static int amd_pmu_handle_irq(struct pt_regs *regs)
651654
{
@@ -663,21 +666,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
663666
handled = x86_pmu_handle_irq(regs);
664667

665668
/*
666-
* If a counter was handled, record the number of possible remaining
667-
* NMIs that can occur.
669+
* If a counter was handled, record a timestamp such that un-handled
670+
* NMIs will be claimed if arriving within that window.
668671
*/
669672
if (handled) {
670-
this_cpu_write(perf_nmi_counter,
671-
min_t(unsigned int, 2, active));
673+
this_cpu_write(perf_nmi_tstamp,
674+
jiffies + perf_nmi_window);
672675

673676
return handled;
674677
}
675678

676-
if (!this_cpu_read(perf_nmi_counter))
679+
if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
677680
return NMI_DONE;
678681

679-
this_cpu_dec(perf_nmi_counter);
680-
681682
return NMI_HANDLED;
682683
}
683684

@@ -909,6 +910,9 @@ static int __init amd_core_pmu_init(void)
909910
if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
910911
return 0;
911912

913+
/* Avoid calulating the value each time in the NMI handler */
914+
perf_nmi_window = msecs_to_jiffies(100);
915+
912916
switch (boot_cpu_data.x86) {
913917
case 0x15:
914918
pr_cont("Fam15h ");

arch/x86/events/intel/core.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4983,6 +4983,8 @@ __init int intel_pmu_init(void)
49834983
case INTEL_FAM6_SKYLAKE:
49844984
case INTEL_FAM6_KABYLAKE_L:
49854985
case INTEL_FAM6_KABYLAKE:
4986+
case INTEL_FAM6_COMETLAKE_L:
4987+
case INTEL_FAM6_COMETLAKE:
49864988
x86_add_quirk(intel_pebs_isolation_quirk);
49874989
x86_pmu.late_ack = true;
49884990
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -5031,6 +5033,8 @@ __init int intel_pmu_init(void)
50315033
/* fall through */
50325034
case INTEL_FAM6_ICELAKE_L:
50335035
case INTEL_FAM6_ICELAKE:
5036+
case INTEL_FAM6_TIGERLAKE_L:
5037+
case INTEL_FAM6_TIGERLAKE:
50345038
x86_pmu.late_ack = true;
50355039
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
50365040
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));

arch/x86/events/intel/cstate.c

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,46 +45,49 @@
4545
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
4646
* perf code: 0x01
4747
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
48-
CNL
48+
* CNL,KBL,CML
4949
* Scope: Core
5050
* MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
5151
* perf code: 0x02
5252
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
53-
* SKL,KNL,GLM,CNL
53+
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
5454
* Scope: Core
5555
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
5656
* perf code: 0x03
57-
* Available model: SNB,IVB,HSW,BDW,SKL,CNL
57+
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
58+
* ICL,TGL
5859
* Scope: Core
5960
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
6061
* perf code: 0x00
61-
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL
62+
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
63+
* KBL,CML,ICL,TGL
6264
* Scope: Package (physical package)
6365
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
6466
* perf code: 0x01
6567
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
66-
* GLM,CNL
68+
* GLM,CNL,KBL,CML,ICL,TGL
6769
* Scope: Package (physical package)
6870
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
6971
* perf code: 0x02
7072
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
71-
* SKL,KNL,GLM,CNL
73+
* SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
7274
* Scope: Package (physical package)
7375
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
7476
* perf code: 0x03
75-
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL
77+
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
78+
* KBL,CML,ICL,TGL
7679
* Scope: Package (physical package)
7780
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
7881
* perf code: 0x04
79-
* Available model: HSW ULT,KBL,CNL
82+
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
8083
* Scope: Package (physical package)
8184
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
8285
* perf code: 0x05
83-
* Available model: HSW ULT,KBL,CNL
86+
* Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
8487
* Scope: Package (physical package)
8588
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
8689
* perf code: 0x06
87-
* Available model: HSW ULT,KBL,GLM,CNL
90+
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL
8891
* Scope: Package (physical package)
8992
*
9093
*/
@@ -544,6 +547,19 @@ static const struct cstate_model cnl_cstates __initconst = {
544547
BIT(PERF_CSTATE_PKG_C10_RES),
545548
};
546549

550+
static const struct cstate_model icl_cstates __initconst = {
551+
.core_events = BIT(PERF_CSTATE_CORE_C6_RES) |
552+
BIT(PERF_CSTATE_CORE_C7_RES),
553+
554+
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
555+
BIT(PERF_CSTATE_PKG_C3_RES) |
556+
BIT(PERF_CSTATE_PKG_C6_RES) |
557+
BIT(PERF_CSTATE_PKG_C7_RES) |
558+
BIT(PERF_CSTATE_PKG_C8_RES) |
559+
BIT(PERF_CSTATE_PKG_C9_RES) |
560+
BIT(PERF_CSTATE_PKG_C10_RES),
561+
};
562+
547563
static const struct cstate_model slm_cstates __initconst = {
548564
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
549565
BIT(PERF_CSTATE_CORE_C6_RES),
@@ -614,6 +630,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
614630

615631
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates),
616632
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates),
633+
X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates),
634+
X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates),
617635

618636
X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates),
619637

@@ -625,8 +643,10 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
625643

626644
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
627645

628-
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, snb_cstates),
629-
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, snb_cstates),
646+
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
647+
X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates),
648+
X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates),
649+
X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates),
630650
{ },
631651
};
632652
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);

arch/x86/events/msr.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,14 @@ static bool test_intel(int idx, void *data)
8989
case INTEL_FAM6_SKYLAKE_X:
9090
case INTEL_FAM6_KABYLAKE_L:
9191
case INTEL_FAM6_KABYLAKE:
92+
case INTEL_FAM6_COMETLAKE_L:
93+
case INTEL_FAM6_COMETLAKE:
9294
case INTEL_FAM6_ICELAKE_L:
95+
case INTEL_FAM6_ICELAKE:
96+
case INTEL_FAM6_ICELAKE_X:
97+
case INTEL_FAM6_ICELAKE_D:
98+
case INTEL_FAM6_TIGERLAKE_L:
99+
case INTEL_FAM6_TIGERLAKE:
93100
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
94101
return true;
95102
break;

kernel/events/core.c

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3779,11 +3779,23 @@ static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
37793779
perf_event_groups_insert(&ctx->flexible_groups, event);
37803780
}
37813781

3782+
/* pick an event from the flexible_groups to rotate */
37823783
static inline struct perf_event *
3783-
ctx_first_active(struct perf_event_context *ctx)
3784+
ctx_event_to_rotate(struct perf_event_context *ctx)
37843785
{
3785-
return list_first_entry_or_null(&ctx->flexible_active,
3786-
struct perf_event, active_list);
3786+
struct perf_event *event;
3787+
3788+
/* pick the first active flexible event */
3789+
event = list_first_entry_or_null(&ctx->flexible_active,
3790+
struct perf_event, active_list);
3791+
3792+
/* if no active flexible event, pick the first event */
3793+
if (!event) {
3794+
event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
3795+
typeof(*event), group_node);
3796+
}
3797+
3798+
return event;
37873799
}
37883800

37893801
static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
@@ -3808,9 +3820,9 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
38083820
perf_pmu_disable(cpuctx->ctx.pmu);
38093821

38103822
if (task_rotate)
3811-
task_event = ctx_first_active(task_ctx);
3823+
task_event = ctx_event_to_rotate(task_ctx);
38123824
if (cpu_rotate)
3813-
cpu_event = ctx_first_active(&cpuctx->ctx);
3825+
cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
38143826

38153827
/*
38163828
* As per the order given at ctx_resched() first 'pop' task flexible
@@ -5668,7 +5680,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
56685680
* undo the VM accounting.
56695681
*/
56705682

5671-
atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
5683+
atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
5684+
&mmap_user->locked_vm);
56725685
atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
56735686
free_uid(mmap_user);
56745687

@@ -5812,8 +5825,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
58125825

58135826
user_locked = atomic_long_read(&user->locked_vm) + user_extra;
58145827

5815-
if (user_locked > user_lock_limit)
5828+
if (user_locked <= user_lock_limit) {
5829+
/* charge all to locked_vm */
5830+
} else if (atomic_long_read(&user->locked_vm) >= user_lock_limit) {
5831+
/* charge all to pinned_vm */
5832+
extra = user_extra;
5833+
user_extra = 0;
5834+
} else {
5835+
/*
5836+
* charge locked_vm until it hits user_lock_limit;
5837+
* charge the rest from pinned_vm
5838+
*/
58165839
extra = user_locked - user_lock_limit;
5840+
user_extra -= extra;
5841+
}
58175842

58185843
lock_limit = rlimit(RLIMIT_MEMLOCK);
58195844
lock_limit >>= PAGE_SHIFT;
@@ -11862,6 +11887,10 @@ static int inherit_group(struct perf_event *parent_event,
1186211887
child, leader, child_ctx);
1186311888
if (IS_ERR(child_ctr))
1186411889
return PTR_ERR(child_ctr);
11890+
11891+
if (sub->aux_event == parent_event &&
11892+
!perf_get_aux_event(child_ctr, leader))
11893+
return -EINVAL;
1186511894
}
1186611895
return 0;
1186711896
}

tools/arch/arm/include/uapi/asm/kvm.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,10 @@ struct kvm_vcpu_events {
266266
#define KVM_DEV_ARM_ITS_CTRL_RESET 4
267267

268268
/* KVM_IRQ_LINE irq field index values */
269+
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
270+
#define KVM_ARM_IRQ_VCPU2_MASK 0xf
269271
#define KVM_ARM_IRQ_TYPE_SHIFT 24
270-
#define KVM_ARM_IRQ_TYPE_MASK 0xff
272+
#define KVM_ARM_IRQ_TYPE_MASK 0xf
271273
#define KVM_ARM_IRQ_VCPU_SHIFT 16
272274
#define KVM_ARM_IRQ_VCPU_MASK 0xff
273275
#define KVM_ARM_IRQ_NUM_SHIFT 0

tools/arch/arm64/include/uapi/asm/kvm.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,10 @@ struct kvm_vcpu_events {
325325
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
326326

327327
/* KVM_IRQ_LINE irq field index values */
328+
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
329+
#define KVM_ARM_IRQ_VCPU2_MASK 0xf
328330
#define KVM_ARM_IRQ_TYPE_SHIFT 24
329-
#define KVM_ARM_IRQ_TYPE_MASK 0xff
331+
#define KVM_ARM_IRQ_TYPE_MASK 0xf
330332
#define KVM_ARM_IRQ_VCPU_SHIFT 16
331333
#define KVM_ARM_IRQ_VCPU_MASK 0xff
332334
#define KVM_ARM_IRQ_NUM_SHIFT 0

tools/arch/s390/include/uapi/asm/kvm.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,12 @@ struct kvm_guest_debug_arch {
231231
#define KVM_SYNC_GSCB (1UL << 9)
232232
#define KVM_SYNC_BPBC (1UL << 10)
233233
#define KVM_SYNC_ETOKEN (1UL << 11)
234+
235+
#define KVM_SYNC_S390_VALID_FIELDS \
236+
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
237+
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
238+
KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
239+
234240
/* length and alignment of the sdnx as a power of two */
235241
#define SDNXC 8
236242
#define SDNXL (1UL << SDNXC)

tools/arch/x86/include/uapi/asm/vmx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#define EXIT_REASON_EXCEPTION_NMI 0
3232
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
3333
#define EXIT_REASON_TRIPLE_FAULT 2
34+
#define EXIT_REASON_INIT_SIGNAL 3
3435

3536
#define EXIT_REASON_PENDING_INTERRUPT 7
3637
#define EXIT_REASON_NMI_WINDOW 8
@@ -90,6 +91,7 @@
9091
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
9192
{ EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
9293
{ EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
94+
{ EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \
9395
{ EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
9496
{ EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
9597
{ EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \

tools/include/uapi/asm-generic/mman-common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@
6767
#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
6868
#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
6969

70+
#define MADV_COLD 20 /* deactivate these pages */
71+
#define MADV_PAGEOUT 21 /* reclaim these pages */
72+
7073
/* compatibility flags */
7174
#define MAP_FILE 0
7275

0 commit comments

Comments
 (0)