Skip to content

Commit 7d137e6

Browse files
committed
fgraph: Remove unnecessary disabling of interrupts and recursion
The function graph tracer disables interrupts as well as prevents recursion via NMIs when recording the graph tracer code. There's no reason to do this today. That disabling goes back to 2008 when the function graph tracer was first introduced and recursion protection wasn't part of the code. Today, there's no reason to disable interrupts or prevent the code from recursing as the infrastructure can easily handle it. Before this change: ~# echo function_graph > /sys/kernel/tracing/current_tracer ~# perf stat -r 10 ./hackbench 10 Time: 4.240 Time: 4.236 Time: 4.106 Time: 4.014 Time: 4.314 Time: 3.830 Time: 4.063 Time: 4.323 Time: 3.763 Time: 3.727 Performance counter stats for '/work/c/hackbench 10' (10 runs): 33,937.20 msec task-clock # 7.008 CPUs utilized ( +- 1.85% ) 18,220 context-switches # 536.874 /sec ( +- 6.41% ) 624 cpu-migrations # 18.387 /sec ( +- 9.07% ) 11,319 page-faults # 333.528 /sec ( +- 1.97% ) 76,657,643,617 cycles # 2.259 GHz ( +- 0.40% ) 141,403,302,768 instructions # 1.84 insn per cycle ( +- 0.37% ) 25,518,463,888 branches # 751.932 M/sec ( +- 0.35% ) 156,151,050 branch-misses # 0.61% of all branches ( +- 0.63% ) 4.8423 +- 0.0892 seconds time elapsed ( +- 1.84% ) After this change: ~# echo function_graph > /sys/kernel/tracing/current_tracer ~# perf stat -r 10 ./hackbench 10 Time: 3.340 Time: 3.192 Time: 3.129 Time: 2.579 Time: 2.589 Time: 2.798 Time: 2.791 Time: 2.955 Time: 3.044 Time: 3.065 Performance counter stats for './hackbench 10' (10 runs): 24,416.30 msec task-clock # 6.996 CPUs utilized ( +- 2.74% ) 16,764 context-switches # 686.590 /sec ( +- 5.85% ) 469 cpu-migrations # 19.208 /sec ( +- 6.14% ) 11,519 page-faults # 471.775 /sec ( +- 1.92% ) 53,895,628,450 cycles # 2.207 GHz ( +- 0.52% ) 105,552,664,638 instructions # 1.96 insn per cycle ( +- 0.47% ) 17,808,672,667 branches # 729.376 M/sec ( +- 0.48% ) 133,075,435 branch-misses # 0.75% of all branches ( +- 0.59% ) 3.490 +- 0.112 seconds time elapsed ( +- 3.22% ) Also removed unneeded "unlikely()" around the retaddr code. Cc: Masami Hiramatsu <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Andrew Morton <[email protected]> Link: https://lore.kernel.org/[email protected] Fixes: 9cd2992 ("fgraph: Have set_graph_notrace only affect function_graph tracer") # Performance only Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent 4bbf902 commit 7d137e6

File tree

1 file changed

+15
-22
lines changed

1 file changed

+15
-22
lines changed

kernel/trace/trace_functions_graph.c

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
181181
struct trace_array *tr = gops->private;
182182
struct trace_array_cpu *data;
183183
struct fgraph_times *ftimes;
184-
unsigned long flags;
185184
unsigned int trace_ctx;
186185
long disabled;
187-
int ret;
186+
int ret = 0;
188187
int cpu;
189188

190189
if (*task_var & TRACE_GRAPH_NOTRACE)
@@ -235,25 +234,21 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
235234
if (tracing_thresh)
236235
return 1;
237236

238-
local_irq_save(flags);
237+
preempt_disable_notrace();
239238
cpu = raw_smp_processor_id();
240239
data = per_cpu_ptr(tr->array_buffer.data, cpu);
241-
disabled = atomic_inc_return(&data->disabled);
242-
if (likely(disabled == 1)) {
243-
trace_ctx = tracing_gen_ctx_flags(flags);
244-
if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
245-
tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) {
240+
disabled = atomic_read(&data->disabled);
241+
if (likely(!disabled)) {
242+
trace_ctx = tracing_gen_ctx();
243+
if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
244+
tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR)) {
246245
unsigned long retaddr = ftrace_graph_top_ret_addr(current);
247-
248246
ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
249-
} else
247+
} else {
250248
ret = __trace_graph_entry(tr, trace, trace_ctx);
251-
} else {
252-
ret = 0;
249+
}
253250
}
254-
255-
atomic_dec(&data->disabled);
256-
local_irq_restore(flags);
251+
preempt_enable_notrace();
257252

258253
return ret;
259254
}
@@ -320,7 +315,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
320315
struct trace_array *tr = gops->private;
321316
struct trace_array_cpu *data;
322317
struct fgraph_times *ftimes;
323-
unsigned long flags;
324318
unsigned int trace_ctx;
325319
long disabled;
326320
int size;
@@ -341,16 +335,15 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
341335

342336
trace->calltime = ftimes->calltime;
343337

344-
local_irq_save(flags);
338+
preempt_disable_notrace();
345339
cpu = raw_smp_processor_id();
346340
data = per_cpu_ptr(tr->array_buffer.data, cpu);
347-
disabled = atomic_inc_return(&data->disabled);
348-
if (likely(disabled == 1)) {
349-
trace_ctx = tracing_gen_ctx_flags(flags);
341+
disabled = atomic_read(&data->disabled);
342+
if (likely(!disabled)) {
343+
trace_ctx = tracing_gen_ctx();
350344
__trace_graph_return(tr, trace, trace_ctx);
351345
}
352-
atomic_dec(&data->disabled);
353-
local_irq_restore(flags);
346+
preempt_enable_notrace();
354347
}
355348

356349
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,

0 commit comments

Comments
 (0)