Skip to content

Commit 7b382ef

Browse files
committed
tracing: Allow the top level trace_marker to write into another instances
There are applications that have it hard coded to write into the top level trace_marker instance (/sys/kernel/tracing/trace_marker). This can be annoying if a profiler is using that instance for other work, or if it needs all writes to go into a new instance. A new option is created called "copy_trace_marker". By default, the top level has this set, as that is the default buffer that writing into the top level trace_marker file will go to. But now if an instance is created and sets this option, all writes into the top level trace_marker will also be written into that instance buffer just as if an application were to write into the instance's trace_marker file. If the top level instance disables this option, then writes to its own trace_marker and trace_marker_raw files will not go into its buffer. If no instance has this option set, then the write will return an error and errno will contain ENODEV. Cc: Masami Hiramatsu <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Link: https://lore.kernel.org/[email protected] Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent 6956ea9 commit 7b382ef

File tree

3 files changed

+128
-31
lines changed

3 files changed

+128
-31
lines changed

Documentation/trace/ftrace.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,19 @@ Here are the available options:
12051205
default instance. The only way the top level instance has this flag
12061206
cleared, is by it being set in another instance.
12071207

1208+
copy_trace_marker
1209+
If there are applications that hard code writing into the top level
1210+
trace_marker file (/sys/kernel/tracing/trace_marker or trace_marker_raw),
1211+
and the tooling would like it to go into an instance, this option can
1212+
be used. Create an instance and set this option, and then all writes
1213+
into the top level trace_marker file will also be redirected into this
1214+
instance.
1215+
1216+
Note, by default this option is set for the top level instance. If it
1217+
is disabled, then writes to the trace_marker or trace_marker_raw files
1218+
will not be written into the top level file. If no instance has this
1219+
option set, then a write will error with the errno of ENODEV.
1220+
12081221
annotate
12091222
It is sometimes confusing when the CPU buffers are full
12101223
and one CPU buffer had a lot of events recently, thus

kernel/trace/trace.c

Lines changed: 113 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -493,15 +493,17 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export);
493493
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
494494
TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
495495
TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
496-
TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
496+
TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK | \
497+
TRACE_ITER_COPY_MARKER)
497498

498499
/* trace_options that are only supported by global_trace */
499500
#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
500501
TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
501502

502503
/* trace_flags that are default zero for instances */
503504
#define ZEROED_TRACE_FLAGS \
504-
(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
505+
(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
506+
TRACE_ITER_COPY_MARKER)
505507

506508
/*
507509
* The global_trace is the descriptor that holds the top-level tracing
@@ -513,6 +515,9 @@ static struct trace_array global_trace = {
513515

514516
static struct trace_array *printk_trace = &global_trace;
515517

518+
/* List of trace_arrays interested in the top level trace_marker */
519+
static LIST_HEAD(marker_copies);
520+
516521
static __always_inline bool printk_binsafe(struct trace_array *tr)
517522
{
518523
/*
@@ -534,6 +539,28 @@ static void update_printk_trace(struct trace_array *tr)
534539
tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
535540
}
536541

542+
/* Returns true if the status of tr changed */
543+
static bool update_marker_trace(struct trace_array *tr, int enabled)
544+
{
545+
lockdep_assert_held(&event_mutex);
546+
547+
if (enabled) {
548+
if (!list_empty(&tr->marker_list))
549+
return false;
550+
551+
list_add_rcu(&tr->marker_list, &marker_copies);
552+
tr->trace_flags |= TRACE_ITER_COPY_MARKER;
553+
return true;
554+
}
555+
556+
if (list_empty(&tr->marker_list))
557+
return false;
558+
559+
list_del_init(&tr->marker_list);
560+
tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
561+
return true;
562+
}
563+
537564
void trace_set_ring_buffer_expanded(struct trace_array *tr)
538565
{
539566
if (!tr)
@@ -5220,7 +5247,8 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
52205247
{
52215248
if ((mask == TRACE_ITER_RECORD_TGID) ||
52225249
(mask == TRACE_ITER_RECORD_CMD) ||
5223-
(mask == TRACE_ITER_TRACE_PRINTK))
5250+
(mask == TRACE_ITER_TRACE_PRINTK) ||
5251+
(mask == TRACE_ITER_COPY_MARKER))
52245252
lockdep_assert_held(&event_mutex);
52255253

52265254
/* do nothing if flag is already set */
@@ -5251,6 +5279,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
52515279
}
52525280
}
52535281

5282+
if (mask == TRACE_ITER_COPY_MARKER)
5283+
update_marker_trace(tr, enabled);
5284+
52545285
if (enabled)
52555286
tr->trace_flags |= mask;
52565287
else
@@ -7134,11 +7165,9 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
71347165

71357166
#define TRACE_MARKER_MAX_SIZE 4096
71367167

7137-
static ssize_t
7138-
tracing_mark_write(struct file *filp, const char __user *ubuf,
7139-
size_t cnt, loff_t *fpos)
7168+
static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7169+
size_t cnt, unsigned long ip)
71407170
{
7141-
struct trace_array *tr = filp->private_data;
71427171
struct ring_buffer_event *event;
71437172
enum event_trigger_type tt = ETT_NONE;
71447173
struct trace_buffer *buffer;
@@ -7152,18 +7181,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
71527181
#define FAULTED_STR "<faulted>"
71537182
#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
71547183

7155-
if (tracing_disabled)
7156-
return -EINVAL;
7157-
7158-
if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7159-
return -EINVAL;
7160-
7161-
if ((ssize_t)cnt < 0)
7162-
return -EINVAL;
7163-
7164-
if (cnt > TRACE_MARKER_MAX_SIZE)
7165-
cnt = TRACE_MARKER_MAX_SIZE;
7166-
71677184
meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
71687185
again:
71697186
size = cnt + meta_size;
@@ -7196,7 +7213,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
71967213
}
71977214

71987215
entry = ring_buffer_event_data(event);
7199-
entry->ip = _THIS_IP_;
7216+
entry->ip = ip;
72007217

72017218
len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
72027219
if (len) {
@@ -7229,29 +7246,55 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
72297246
}
72307247

72317248
static ssize_t
7232-
tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7249+
tracing_mark_write(struct file *filp, const char __user *ubuf,
72337250
size_t cnt, loff_t *fpos)
72347251
{
72357252
struct trace_array *tr = filp->private_data;
7236-
struct ring_buffer_event *event;
7237-
struct trace_buffer *buffer;
7238-
struct raw_data_entry *entry;
7239-
ssize_t written;
7240-
int size;
7241-
int len;
7242-
7243-
#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7253+
ssize_t written = -ENODEV;
7254+
unsigned long ip;
72447255

72457256
if (tracing_disabled)
72467257
return -EINVAL;
72477258

72487259
if (!(tr->trace_flags & TRACE_ITER_MARKERS))
72497260
return -EINVAL;
72507261

7251-
/* The marker must at least have a tag id */
7252-
if (cnt < sizeof(unsigned int))
7262+
if ((ssize_t)cnt < 0)
72537263
return -EINVAL;
72547264

7265+
if (cnt > TRACE_MARKER_MAX_SIZE)
7266+
cnt = TRACE_MARKER_MAX_SIZE;
7267+
7268+
/* The selftests expect this function to be the IP address */
7269+
ip = _THIS_IP_;
7270+
7271+
/* The global trace_marker can go to multiple instances */
7272+
if (tr == &global_trace) {
7273+
guard(rcu)();
7274+
list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7275+
written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7276+
if (written < 0)
7277+
break;
7278+
}
7279+
} else {
7280+
written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7281+
}
7282+
7283+
return written;
7284+
}
7285+
7286+
static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7287+
const char __user *ubuf, size_t cnt)
7288+
{
7289+
struct ring_buffer_event *event;
7290+
struct trace_buffer *buffer;
7291+
struct raw_data_entry *entry;
7292+
ssize_t written;
7293+
int size;
7294+
int len;
7295+
7296+
#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7297+
72557298
size = sizeof(*entry) + cnt;
72567299
if (cnt < FAULT_SIZE_ID)
72577300
size += FAULT_SIZE_ID - cnt;
@@ -7282,6 +7325,40 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
72827325
return written;
72837326
}
72847327

7328+
static ssize_t
7329+
tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7330+
size_t cnt, loff_t *fpos)
7331+
{
7332+
struct trace_array *tr = filp->private_data;
7333+
ssize_t written = -ENODEV;
7334+
7335+
#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7336+
7337+
if (tracing_disabled)
7338+
return -EINVAL;
7339+
7340+
if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7341+
return -EINVAL;
7342+
7343+
/* The marker must at least have a tag id */
7344+
if (cnt < sizeof(unsigned int))
7345+
return -EINVAL;
7346+
7347+
/* The global trace_marker_raw can go to multiple instances */
7348+
if (tr == &global_trace) {
7349+
guard(rcu)();
7350+
list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7351+
written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7352+
if (written < 0)
7353+
break;
7354+
}
7355+
} else {
7356+
written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7357+
}
7358+
7359+
return written;
7360+
}
7361+
72857362
static int tracing_clock_show(struct seq_file *m, void *v)
72867363
{
72877364
struct trace_array *tr = m->private;
@@ -9775,6 +9852,7 @@ trace_array_create_systems(const char *name, const char *systems,
97759852
INIT_LIST_HEAD(&tr->events);
97769853
INIT_LIST_HEAD(&tr->hist_vars);
97779854
INIT_LIST_HEAD(&tr->err_log);
9855+
INIT_LIST_HEAD(&tr->marker_list);
97789856

97799857
#ifdef CONFIG_MODULES
97809858
INIT_LIST_HEAD(&tr->mod_events);
@@ -9934,6 +10012,9 @@ static int __remove_instance(struct trace_array *tr)
993410012
if (printk_trace == tr)
993510013
update_printk_trace(&global_trace);
993610014

10015+
if (update_marker_trace(tr, 0))
10016+
synchronize_rcu();
10017+
993710018
tracing_set_nop(tr);
993810019
clear_ftrace_function_probes(tr);
993910020
event_trace_del_tracer(tr);
@@ -10999,6 +11080,7 @@ __init static int tracer_alloc_buffers(void)
1099911080
INIT_LIST_HEAD(&global_trace.events);
1100011081
INIT_LIST_HEAD(&global_trace.hist_vars);
1100111082
INIT_LIST_HEAD(&global_trace.err_log);
11083+
list_add(&global_trace.marker_list, &marker_copies);
1100211084
list_add(&global_trace.list, &ftrace_trace_arrays);
1100311085

1100411086
apply_trace_boot_options();

kernel/trace/trace.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ struct trace_array {
403403
struct trace_options *topts;
404404
struct list_head systems;
405405
struct list_head events;
406+
struct list_head marker_list;
406407
struct trace_event_file *trace_marker_file;
407408
cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
408409
/* one per_cpu trace_pipe can be opened by only one user */
@@ -1384,6 +1385,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
13841385
C(MARKERS, "markers"), \
13851386
C(EVENT_FORK, "event-fork"), \
13861387
C(TRACE_PRINTK, "trace_printk_dest"), \
1388+
C(COPY_MARKER, "copy_trace_marker"),\
13871389
C(PAUSE_ON_TRACE, "pause-on-trace"), \
13881390
C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \
13891391
FUNCTION_FLAGS \

0 commit comments

Comments
 (0)