Skip to content

Commit 2aa043a

Browse files
committed
tracing/ring-buffer: Fix wait_on_pipe() race
When the trace_pipe_raw file is closed, there should be no new readers on the file descriptor. This is mostly handled with the waking and wait_index fields of the iterator. But there's still a slight race. CPU 0 CPU 1 ----- ----- wait_index++; index = wait_index; ring_buffer_wake_waiters(); wait_on_pipe() ring_buffer_wait(); The ring_buffer_wait() will miss the wakeup from CPU 1. The problem is that the ring_buffer_wait() needs the logic of: prepare_to_wait(); if (!condition) schedule(); Where the missing condition check is the iter->wait_index update. Have the ring_buffer_wait() take a conditional callback function and a data parameter that can be used within the wait_event_interruptible() of the ring_buffer_wait() function. In wait_on_pipe(), pass a condition function that will check if the wait_index has been updated, if it has, it will return true to break out of the wait_event_interruptible() loop. Create a new field "closed" in the trace_iterator and set it in the .flush() callback before calling ring_buffer_wake_waiters(). This will keep any new readers from waiting on a closed file descriptor. Have the wait_on_pipe() condition callback also check the closed field. Change the wait_index field of the trace_iterator to atomic_t. There's no reason it needs to be 'long' and making it atomic and using atomic_read_acquire() and atomic_fetch_inc_release() will provide the necessary memory barriers. Add a "woken" flag to tracing_buffers_splice_read() to exit the loop after one more try to fetch data. That is, if it waited for data and something woke it up, it should try to collect any new data and then exit back to user space. Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=wgsNgewHFxZAJiAQznwPMqEtQmi1waeS2O1v6L4c_Um5A@mail.gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/[email protected] Cc: [email protected] Cc: Masami Hiramatsu <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: linke li <[email protected]> Cc: Rabin Vincent <[email protected]> Fixes: f3ddb74 ("tracing: Wake up ring buffer waiters on closing of the file") Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent 7af9ded commit 2aa043a

File tree

4 files changed

+45
-19
lines changed

4 files changed

+45
-19
lines changed

include/linux/ring_buffer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
9999
})
100100

101101
typedef bool (*ring_buffer_cond_fn)(void *data);
102-
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
102+
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
103+
ring_buffer_cond_fn cond, void *data);
103104
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
104105
struct file *filp, poll_table *poll_table, int full);
105106
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);

include/linux/trace_events.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,16 @@ struct trace_iterator {
103103
unsigned int temp_size;
104104
char *fmt; /* modified format holder */
105105
unsigned int fmt_size;
106-
long wait_index;
106+
atomic_t wait_index;
107107

108108
/* trace_seq for __print_flags() and __print_symbolic() etc. */
109109
struct trace_seq tmp_seq;
110110

111111
cpumask_var_t started;
112112

113+
/* Set when the file is closed to prevent new waiters */
114+
bool closed;
115+
113116
/* it's true when current open file is snapshot */
114117
bool snapshot;
115118

kernel/trace/ring_buffer.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -902,23 +902,26 @@ static bool rb_wait_once(void *data)
902902
* @buffer: buffer to wait on
903903
* @cpu: the cpu buffer to wait on
904904
* @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
905+
* @cond: condition function to break out of wait (NULL to run once)
906+
* @data: the data to pass to @cond.
905907
*
906908
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
907909
* as data is added to any of the @buffer's cpu buffers. Otherwise
908910
* it will wait for data to be added to a specific cpu buffer.
909911
*/
910-
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
912+
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
913+
ring_buffer_cond_fn cond, void *data)
911914
{
912915
struct ring_buffer_per_cpu *cpu_buffer;
913916
struct wait_queue_head *waitq;
914-
ring_buffer_cond_fn cond;
915917
struct rb_irq_work *rbwork;
916-
void *data;
917918
long once = 0;
918919
int ret = 0;
919920

920-
cond = rb_wait_once;
921-
data = &once;
921+
if (!cond) {
922+
cond = rb_wait_once;
923+
data = &once;
924+
}
922925

923926
/*
924927
* Depending on what the caller is waiting for, either any

kernel/trace/trace.c

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1955,15 +1955,36 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
19551955

19561956
#endif /* CONFIG_TRACER_MAX_TRACE */
19571957

1958+
struct pipe_wait {
1959+
struct trace_iterator *iter;
1960+
int wait_index;
1961+
};
1962+
1963+
static bool wait_pipe_cond(void *data)
1964+
{
1965+
struct pipe_wait *pwait = data;
1966+
struct trace_iterator *iter = pwait->iter;
1967+
1968+
if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1969+
return true;
1970+
1971+
return iter->closed;
1972+
}
1973+
19581974
static int wait_on_pipe(struct trace_iterator *iter, int full)
19591975
{
1976+
struct pipe_wait pwait;
19601977
int ret;
19611978

19621979
/* Iterators are static, they should be filled or empty */
19631980
if (trace_buffer_iter(iter, iter->cpu_file))
19641981
return 0;
19651982

1966-
ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
1983+
pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1984+
pwait.iter = iter;
1985+
1986+
ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1987+
wait_pipe_cond, &pwait);
19671988

19681989
#ifdef CONFIG_TRACER_MAX_TRACE
19691990
/*
@@ -8398,9 +8419,9 @@ static int tracing_buffers_flush(struct file *file, fl_owner_t id)
83988419
struct ftrace_buffer_info *info = file->private_data;
83998420
struct trace_iterator *iter = &info->iter;
84008421

8401-
iter->wait_index++;
8422+
iter->closed = true;
84028423
/* Make sure the waiters see the new wait_index */
8403-
smp_wmb();
8424+
(void)atomic_fetch_inc_release(&iter->wait_index);
84048425

84058426
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
84068427

@@ -8500,6 +8521,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
85008521
.spd_release = buffer_spd_release,
85018522
};
85028523
struct buffer_ref *ref;
8524+
bool woken = false;
85038525
int page_size;
85048526
int entries, i;
85058527
ssize_t ret = 0;
@@ -8573,17 +8595,17 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
85738595

85748596
/* did we read anything? */
85758597
if (!spd.nr_pages) {
8576-
long wait_index;
85778598

85788599
if (ret)
85798600
goto out;
85808601

8602+
if (woken)
8603+
goto out;
8604+
85818605
ret = -EAGAIN;
85828606
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
85838607
goto out;
85848608

8585-
wait_index = READ_ONCE(iter->wait_index);
8586-
85878609
ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
85888610
if (ret)
85898611
goto out;
@@ -8592,10 +8614,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
85928614
if (!tracer_tracing_is_on(iter->tr))
85938615
goto out;
85948616

8595-
/* Make sure we see the new wait_index */
8596-
smp_rmb();
8597-
if (wait_index != iter->wait_index)
8598-
goto out;
8617+
/* Iterate one more time to collect any new data then exit */
8618+
woken = true;
85998619

86008620
goto again;
86018621
}
@@ -8618,9 +8638,8 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned
86188638

86198639
mutex_lock(&trace_types_lock);
86208640

8621-
iter->wait_index++;
86228641
/* Make sure the waiters see the new wait_index */
8623-
smp_wmb();
8642+
(void)atomic_fetch_inc_release(&iter->wait_index);
86248643

86258644
ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
86268645

0 commit comments

Comments
 (0)