Skip to content

Commit c0e809e

Browse files
committed
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel side changes: - Ftrace is one of the last W^X violators (after this only KLP is left). These patches move it over to the generic text_poke() interface and thereby get rid of this oddity. This requires a surprising amount of surgery, by Peter Zijlstra. - x86/AMD PMUs: add support for 'Large Increment per Cycle Events' to count certain types of events that have a special, quirky hw ABI (by Kim Phillips) - kprobes fixes by Masami Hiramatsu Lots of tooling updates as well, the following subcommands were updated: annotate/report/top, c2c, clang, record, report/top TUI, sched timehist, tests; plus updates were done to the gtk ui, libperf, headers and the parser" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (57 commits) perf/x86/amd: Add support for Large Increment per Cycle Events perf/x86/amd: Constrain Large Increment per Cycle events perf/x86/intel/rapl: Add Comet Lake support tracing: Initialize ret in syscall_enter_define_fields() perf header: Use last modification time for timestamp perf c2c: Fix return type for histogram sorting comparision functions perf beauty sockaddr: Fix augmented syscall format warning perf/ui/gtk: Fix gtk2 build perf ui gtk: Add missing zalloc object perf tools: Use %define api.pure full instead of %pure-parser libperf: Setup initial evlist::all_cpus value perf report: Fix no libunwind compiled warning break s390 issue perf tools: Support --prefix/--prefix-strip perf report: Clarify in help that --children is default tools build: Fix test-clang.cpp with Clang 8+ perf clang: Fix build with Clang 9 kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic tools lib: Fix builds when glibc contains strlcpy() perf report/top: Make 'e' visible in the help and make it toggle showing callchains perf report/top: Do not offer annotation for symbols without samples ...
2 parents 2180f21 + 0cc4bd8 commit c0e809e

File tree

128 files changed

+2515
-1591
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+2515
-1591
lines changed

arch/arm/kernel/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o
5353
obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o
5454
obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o
5555
obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o
56-
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o
57-
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o
56+
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o
57+
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o
5858
obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o
5959
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
6060
# Main staffs in KPROBES are in arch/arm/probes/ .

arch/arm/kernel/ftrace.c

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <asm/ftrace.h>
2323
#include <asm/insn.h>
2424
#include <asm/set_memory.h>
25+
#include <asm/patch.h>
2526

2627
#ifdef CONFIG_THUMB2_KERNEL
2728
#define NOP 0xf85deb04 /* pop.w {lr} */
@@ -35,9 +36,7 @@ static int __ftrace_modify_code(void *data)
3536
{
3637
int *command = data;
3738

38-
set_kernel_text_rw();
3939
ftrace_modify_all_code(*command);
40-
set_kernel_text_ro();
4140

4241
return 0;
4342
}
@@ -59,13 +58,11 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
5958

6059
int ftrace_arch_code_modify_prepare(void)
6160
{
62-
set_all_modules_text_rw();
6361
return 0;
6462
}
6563

6664
int ftrace_arch_code_modify_post_process(void)
6765
{
68-
set_all_modules_text_ro();
6966
/* Make sure any TLB misses during machine stop are cleared. */
7067
flush_tlb_all();
7168
return 0;
@@ -97,10 +94,7 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old,
9794
return -EINVAL;
9895
}
9996

100-
if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE))
101-
return -EPERM;
102-
103-
flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
97+
__patch_text((void *)pc, new);
10498

10599
return 0;
106100
}

arch/nds32/kernel/ftrace.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,6 @@ int __init ftrace_dyn_arch_init(void)
8989
return 0;
9090
}
9191

92-
int ftrace_arch_code_modify_prepare(void)
93-
{
94-
set_all_modules_text_rw();
95-
return 0;
96-
}
97-
98-
int ftrace_arch_code_modify_post_process(void)
99-
{
100-
set_all_modules_text_ro();
101-
return 0;
102-
}
103-
10492
static unsigned long gen_sethi_insn(unsigned long addr)
10593
{
10694
unsigned long opcode = 0x46000000;

arch/x86/events/amd/core.c

Lines changed: 79 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
1515
static unsigned long perf_nmi_window;
1616

17+
/* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */
18+
#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL)
19+
#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE)
20+
1721
static __initconst const u64 amd_hw_cache_event_ids
1822
[PERF_COUNT_HW_CACHE_MAX]
1923
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -301,6 +305,25 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel)
301305
return offset;
302306
}
303307

308+
/*
309+
* AMD64 events are detected based on their event codes.
310+
*/
311+
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
312+
{
313+
return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
314+
}
315+
316+
static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
317+
{
318+
if (!(x86_pmu.flags & PMU_FL_PAIR))
319+
return false;
320+
321+
switch (amd_get_event_code(hwc)) {
322+
case 0x003: return true; /* Retired SSE/AVX FLOPs */
323+
default: return false;
324+
}
325+
}
326+
304327
static int amd_core_hw_config(struct perf_event *event)
305328
{
306329
if (event->attr.exclude_host && event->attr.exclude_guest)
@@ -316,15 +339,10 @@ static int amd_core_hw_config(struct perf_event *event)
316339
else if (event->attr.exclude_guest)
317340
event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
318341

319-
return 0;
320-
}
342+
if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
343+
event->hw.flags |= PERF_X86_EVENT_PAIR;
321344

322-
/*
323-
* AMD64 events are detected based on their event codes.
324-
*/
325-
static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
326-
{
327-
return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
345+
return 0;
328346
}
329347

330348
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
@@ -855,6 +873,29 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx,
855873
}
856874
}
857875

876+
static struct event_constraint pair_constraint;
877+
878+
static struct event_constraint *
879+
amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx,
880+
struct perf_event *event)
881+
{
882+
struct hw_perf_event *hwc = &event->hw;
883+
884+
if (amd_is_pair_event_code(hwc))
885+
return &pair_constraint;
886+
887+
return &unconstrained;
888+
}
889+
890+
static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc,
891+
struct perf_event *event)
892+
{
893+
struct hw_perf_event *hwc = &event->hw;
894+
895+
if (is_counter_pair(hwc))
896+
--cpuc->n_pair;
897+
}
898+
858899
static ssize_t amd_event_sysfs_show(char *page, u64 config)
859900
{
860901
u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
@@ -898,33 +939,15 @@ static __initconst const struct x86_pmu amd_pmu = {
898939

899940
static int __init amd_core_pmu_init(void)
900941
{
942+
u64 even_ctr_mask = 0ULL;
943+
int i;
944+
901945
if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
902946
return 0;
903947

904-
/* Avoid calulating the value each time in the NMI handler */
948+
/* Avoid calculating the value each time in the NMI handler */
905949
perf_nmi_window = msecs_to_jiffies(100);
906950

907-
switch (boot_cpu_data.x86) {
908-
case 0x15:
909-
pr_cont("Fam15h ");
910-
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
911-
break;
912-
case 0x17:
913-
pr_cont("Fam17h ");
914-
/*
915-
* In family 17h, there are no event constraints in the PMC hardware.
916-
* We fallback to using default amd_get_event_constraints.
917-
*/
918-
break;
919-
case 0x18:
920-
pr_cont("Fam18h ");
921-
/* Using default amd_get_event_constraints. */
922-
break;
923-
default:
924-
pr_err("core perfctr but no constraints; unknown hardware!\n");
925-
return -ENODEV;
926-
}
927-
928951
/*
929952
* If core performance counter extensions exists, we must use
930953
* MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
@@ -939,6 +962,32 @@ static int __init amd_core_pmu_init(void)
939962
*/
940963
x86_pmu.amd_nb_constraints = 0;
941964

965+
if (boot_cpu_data.x86 == 0x15) {
966+
pr_cont("Fam15h ");
967+
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
968+
}
969+
if (boot_cpu_data.x86 >= 0x17) {
970+
pr_cont("Fam17h+ ");
971+
/*
972+
* Family 17h and compatibles have constraints for Large
973+
* Increment per Cycle events: they may only be assigned an
974+
* even numbered counter that has a consecutive adjacent odd
975+
* numbered counter following it.
976+
*/
977+
for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
978+
even_ctr_mask |= 1 << i;
979+
980+
pair_constraint = (struct event_constraint)
981+
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
982+
x86_pmu.num_counters / 2, 0,
983+
PERF_X86_EVENT_PAIR);
984+
985+
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;
986+
x86_pmu.put_event_constraints = amd_put_event_constraints_f17h;
987+
x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE;
988+
x86_pmu.flags |= PMU_FL_PAIR;
989+
}
990+
942991
pr_cont("core perfctr, ");
943992
return 0;
944993
}

0 commit comments

Comments
 (0)