Skip to content

Commit 21fbefc

Browse files
committed
Merge tag 'trace-v6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing updates from Steven Rostedt: - Use READ_ONCE() and WRITE_ONCE() instead of RCU for syscall tracepoints Individual system call trace events are pseudo events attached to the raw_syscall trace events that just trace the entry and exit of all system calls. When any of these individual system call trace events get enabled, an element in an array indexed by the system call number is assigned to the trace file that defines how to trace it. When the trace event triggers, it reads this array and if the array has an element, it uses that trace file to know what to write it (the trace file defines the output format of the corresponding system call). The issue is that it uses rcu_dereference_ptr() and marks the elements of the array as using RCU. This is incorrect. There is no RCU synchronization here. The event file that is pointed to has a completely different way to make sure its freed properly. The reading of the array during the system call trace event is only to know if there is a value or not. If not, it does nothing (it means this system call isn't being traced). If it does, it uses the information to store the system call data. The RCU usage here can simply be replaced by READ_ONCE() and WRITE_ONCE() macros. - Have the system call trace events use "0x" for hex values Some system call trace events display hex values but do not have "0x" in front of it. Seeing "count: 44" can be assumed that it is 44 decimal when in actuality it is 44 hex (68 decimal). Display "0x44" instead. - Use vmalloc_array() in tracing_map_sort_entries() The function tracing_map_sort_entries() used array_size() and vmalloc() when it could have simply used vmalloc_array(). - Use for_each_online_cpu() in trace_osnoise.c() Instead of open coding for_each_cpu(cpu, cpu_online_mask), use for_each_online_cpu(). - Move the buffer field in struct trace_seq to the end The buffer field in struct trace_seq is architecture dependent in size, and caused padding for the fields after it. By moving the buffer to the end of the structure, it compacts the trace_seq structure better. - Remove redundant zeroing of cmdline_idx field in saved_cmdlines_buffer() The structure that contains cmdline_idx is zeroed by memset(), no need to explicitly zero any of its fields after that. - Use system_percpu_wq instead of system_wq in user_event_mm_remove() As system_wq is being deprecated, use the new wq. - Add cond_resched() is ftrace_module_enable() Some modules have a lot of functions (thousands of them), and the enabling of those functions can take some time. On non preemtable kernels, it was triggering a watchdog timeout. Add a cond_resched() to prevent that. - Add a BUILD_BUG_ON() to make sure PID_MAX_DEFAULT is always a power of 2 There's code that depends on PID_MAX_DEFAULT being a power of 2 or it will break. If in the future that changes, make sure the build fails to ensure that the code is fixed that depends on this. - Grab mutex_lock() before ever exiting s_start() The s_start() function is a seq_file start routine. As s_stop() is always called even if s_start() fails, and s_stop() expects the event_mutex to be held as it will always release it. That mutex must always be taken in s_start() even if that function fails. * tag 'trace-v6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: tracing: Fix lock imbalance in s_start() memory allocation failure path tracing: Ensure optimized hashing works ftrace: Fix softlockup in ftrace_module_enable tracing: replace use of system_wq with system_percpu_wq tracing: Remove redundant 0 value initialization tracing: Move buffer in trace_seq to end of struct tracing/osnoise: Use for_each_online_cpu() instead of for_each_cpu() tracing: Use vmalloc_array() to improve code tracing: Have syscall trace events show "0x" for values greater than 10 tracing: Replace syscall RCU pointer assignment with READ/WRITE_ONCE()
2 parents d9f24f8 + 61e19cd commit 21fbefc

File tree

9 files changed

+27
-21
lines changed

9 files changed

+27
-21
lines changed

include/linux/trace_seq.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
(sizeof(struct seq_buf) + sizeof(size_t) + sizeof(int)))
2222

2323
struct trace_seq {
24-
char buffer[TRACE_SEQ_BUFFER_SIZE];
2524
struct seq_buf seq;
2625
size_t readpos;
2726
int full;
27+
char buffer[TRACE_SEQ_BUFFER_SIZE];
2828
};
2929

3030
static inline void

kernel/trace/ftrace.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7535,6 +7535,8 @@ void ftrace_module_enable(struct module *mod)
75357535
if (!within_module(rec->ip, mod))
75367536
break;
75377537

7538+
cond_resched();
7539+
75387540
/* Weak functions should still be ignored */
75397541
if (!test_for_valid_rec(rec)) {
75407542
/* Clear all other flags. Should not be enabled anyway */

kernel/trace/trace.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,8 +380,8 @@ struct trace_array {
380380
#ifdef CONFIG_FTRACE_SYSCALLS
381381
int sys_refcount_enter;
382382
int sys_refcount_exit;
383-
struct trace_event_file __rcu *enter_syscall_files[NR_syscalls];
384-
struct trace_event_file __rcu *exit_syscall_files[NR_syscalls];
383+
struct trace_event_file *enter_syscall_files[NR_syscalls];
384+
struct trace_event_file *exit_syscall_files[NR_syscalls];
385385
#endif
386386
int stop_count;
387387
int clock_id;

kernel/trace/trace_events.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,11 +1629,10 @@ static void *s_start(struct seq_file *m, loff_t *pos)
16291629
loff_t l;
16301630

16311631
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1632+
mutex_lock(&event_mutex);
16321633
if (!iter)
16331634
return NULL;
16341635

1635-
mutex_lock(&event_mutex);
1636-
16371636
iter->type = SET_EVENT_FILE;
16381637
iter->file = list_entry(&tr->events, struct trace_event_file, list);
16391638

kernel/trace/trace_events_user.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -835,7 +835,7 @@ void user_event_mm_remove(struct task_struct *t)
835835
* so we use a work queue after call_rcu() to run within.
836836
*/
837837
INIT_RCU_WORK(&mm->put_rwork, delayed_user_event_mm_put);
838-
queue_rcu_work(system_wq, &mm->put_rwork);
838+
queue_rcu_work(system_percpu_wq, &mm->put_rwork);
839839
}
840840

841841
void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)

kernel/trace/trace_osnoise.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ static inline void tlat_var_reset(void)
271271
* So far, all the values are initialized as 0, so
272272
* zeroing the structure is perfect.
273273
*/
274-
for_each_cpu(cpu, cpu_online_mask) {
274+
for_each_online_cpu(cpu) {
275275
tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
276276
if (tlat_var->kthread)
277277
hrtimer_cancel(&tlat_var->timer);
@@ -295,7 +295,7 @@ static inline void osn_var_reset(void)
295295
* So far, all the values are initialized as 0, so
296296
* zeroing the structure is perfect.
297297
*/
298-
for_each_cpu(cpu, cpu_online_mask) {
298+
for_each_online_cpu(cpu) {
299299
osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
300300
memset(osn_var, 0, sizeof(*osn_var));
301301
}

kernel/trace/trace_sched_switch.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,6 @@ static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
224224
/* Place map_cmdline_to_pid array right after saved_cmdlines */
225225
s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
226226

227-
s->cmdline_idx = 0;
228227
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
229228
sizeof(s->map_pid_to_cmdline));
230229
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
@@ -248,6 +247,8 @@ int trace_save_cmdline(struct task_struct *tsk)
248247
if (!tsk->pid)
249248
return 1;
250249

250+
BUILD_BUG_ON(!is_power_of_2(PID_MAX_DEFAULT));
251+
251252
tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
252253

253254
/*

kernel/trace/trace_syscalls.c

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,20 @@ print_syscall_enter(struct trace_iterator *iter, int flags,
153153
if (trace_seq_has_overflowed(s))
154154
goto end;
155155

156+
if (i)
157+
trace_seq_puts(s, ", ");
158+
156159
/* parameter types */
157160
if (tr && tr->trace_flags & TRACE_ITER_VERBOSE)
158161
trace_seq_printf(s, "%s ", entry->types[i]);
159162

160163
/* parameter values */
161-
trace_seq_printf(s, "%s: %lx%s", entry->args[i],
162-
trace->args[i],
163-
i == entry->nb_args - 1 ? "" : ", ");
164+
if (trace->args[i] < 10)
165+
trace_seq_printf(s, "%s: %lu", entry->args[i],
166+
trace->args[i]);
167+
else
168+
trace_seq_printf(s, "%s: 0x%lx", entry->args[i],
169+
trace->args[i]);
164170
}
165171

166172
trace_seq_putc(s, ')');
@@ -310,8 +316,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
310316
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
311317
return;
312318

313-
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
314-
trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
319+
trace_file = READ_ONCE(tr->enter_syscall_files[syscall_nr]);
315320
if (!trace_file)
316321
return;
317322

@@ -356,8 +361,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
356361
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
357362
return;
358363

359-
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
360-
trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
364+
trace_file = READ_ONCE(tr->exit_syscall_files[syscall_nr]);
361365
if (!trace_file)
362366
return;
363367

@@ -393,7 +397,7 @@ static int reg_event_syscall_enter(struct trace_event_file *file,
393397
if (!tr->sys_refcount_enter)
394398
ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
395399
if (!ret) {
396-
rcu_assign_pointer(tr->enter_syscall_files[num], file);
400+
WRITE_ONCE(tr->enter_syscall_files[num], file);
397401
tr->sys_refcount_enter++;
398402
}
399403
mutex_unlock(&syscall_trace_lock);
@@ -411,7 +415,7 @@ static void unreg_event_syscall_enter(struct trace_event_file *file,
411415
return;
412416
mutex_lock(&syscall_trace_lock);
413417
tr->sys_refcount_enter--;
414-
RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
418+
WRITE_ONCE(tr->enter_syscall_files[num], NULL);
415419
if (!tr->sys_refcount_enter)
416420
unregister_trace_sys_enter(ftrace_syscall_enter, tr);
417421
mutex_unlock(&syscall_trace_lock);
@@ -431,7 +435,7 @@ static int reg_event_syscall_exit(struct trace_event_file *file,
431435
if (!tr->sys_refcount_exit)
432436
ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
433437
if (!ret) {
434-
rcu_assign_pointer(tr->exit_syscall_files[num], file);
438+
WRITE_ONCE(tr->exit_syscall_files[num], file);
435439
tr->sys_refcount_exit++;
436440
}
437441
mutex_unlock(&syscall_trace_lock);
@@ -449,7 +453,7 @@ static void unreg_event_syscall_exit(struct trace_event_file *file,
449453
return;
450454
mutex_lock(&syscall_trace_lock);
451455
tr->sys_refcount_exit--;
452-
RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
456+
WRITE_ONCE(tr->exit_syscall_files[num], NULL);
453457
if (!tr->sys_refcount_exit)
454458
unregister_trace_sys_exit(ftrace_syscall_exit, tr);
455459
mutex_unlock(&syscall_trace_lock);

kernel/trace/tracing_map.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1076,7 +1076,7 @@ int tracing_map_sort_entries(struct tracing_map *map,
10761076
struct tracing_map_sort_entry *sort_entry, **entries;
10771077
int i, n_entries, ret;
10781078

1079-
entries = vmalloc(array_size(sizeof(sort_entry), map->max_elts));
1079+
entries = vmalloc_array(map->max_elts, sizeof(sort_entry));
10801080
if (!entries)
10811081
return -ENOMEM;
10821082

0 commit comments

Comments
 (0)