Skip to content

Commit d50b07d

Browse files
committed
Merge tag 'trace-ringbuffer-v6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull ring-buffer updates from Steven Rostedt: - Rewind persistent ring buffer on boot When the persistent ring buffer is being used for live kernel tracing and the system crashes, the tool that is reading the trace may not have recorded the data when the system crashed. Although the persistent ring buffer still has that data, when reading it after a reboot, it will start where it left off. That is, what was read will not be accessible. Instead, on reboot, have the persistent ring buffer restart where the data starts and this will allow the tooling to recover what was lost when the crash occurred. - Remove the ring_buffer_read_prepare_sync() logic Reading the trace file required stopping writing to the ring buffer as the trace file is only an iterator and does not consume what it read. It was originally not safe to read the ring buffer in this mode and required disabling writing. The ring_buffer_read_prepare_sync() logic was used to stop each per_cpu ring buffer, call synchronize_rcu() and then start the iterator. This was used instead of calling synchronize_rcu() for each per_cpu buffer. Today, the iterator has been updated where it is safe to read the trace file while writing to the ring buffer is still occurring. There is no more need to do this synchronization and it is causing large delays on machines with many CPUs. Remove this unneeded synchronization. - Make static string array a constant in show_irq_str() Making the string array into a constant has shown to decrease code text/data size. * tag 'trace-ringbuffer-v6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: ring-buffer: Make the const read-only 'type' static ring-buffer: Remove ring_buffer_read_prepare_sync() tracing: ring_buffer: Rewind persistent ring buffer on reboot
2 parents 90a871f + 6443cdf commit d50b07d

File tree

4 files changed

+119
-75
lines changed

4 files changed

+119
-75
lines changed

include/linux/ring_buffer.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,7 @@ ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts,
152152
unsigned long *lost_events);
153153

154154
struct ring_buffer_iter *
155-
ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags);
156-
void ring_buffer_read_prepare_sync(void);
157-
void ring_buffer_read_start(struct ring_buffer_iter *iter);
155+
ring_buffer_read_start(struct trace_buffer *buffer, int cpu, gfp_t flags);
158156
void ring_buffer_read_finish(struct ring_buffer_iter *iter);
159157

160158
struct ring_buffer_event *

kernel/trace/ring_buffer.c

Lines changed: 111 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,6 +1358,13 @@ static inline void rb_inc_page(struct buffer_page **bpage)
13581358
*bpage = list_entry(p, struct buffer_page, list);
13591359
}
13601360

1361+
static inline void rb_dec_page(struct buffer_page **bpage)
1362+
{
1363+
struct list_head *p = rb_list_head((*bpage)->list.prev);
1364+
1365+
*bpage = list_entry(p, struct buffer_page, list);
1366+
}
1367+
13611368
static struct buffer_page *
13621369
rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
13631370
{
@@ -1866,10 +1873,11 @@ static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu)
18661873
static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
18671874
{
18681875
struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
1869-
struct buffer_page *head_page;
1876+
struct buffer_page *head_page, *orig_head;
18701877
unsigned long entry_bytes = 0;
18711878
unsigned long entries = 0;
18721879
int ret;
1880+
u64 ts;
18731881
int i;
18741882

18751883
if (!meta || !meta->head_buffer)
@@ -1885,8 +1893,98 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
18851893
entry_bytes += local_read(&cpu_buffer->reader_page->page->commit);
18861894
local_set(&cpu_buffer->reader_page->entries, ret);
18871895

1888-
head_page = cpu_buffer->head_page;
1896+
orig_head = head_page = cpu_buffer->head_page;
1897+
ts = head_page->page->time_stamp;
1898+
1899+
/*
1900+
* Try to rewind the head so that we can read the pages which already
1901+
* read in the previous boot.
1902+
*/
1903+
if (head_page == cpu_buffer->tail_page)
1904+
goto skip_rewind;
1905+
1906+
rb_dec_page(&head_page);
1907+
for (i = 0; i < meta->nr_subbufs + 1; i++, rb_dec_page(&head_page)) {
1908+
1909+
/* Rewind until tail (writer) page. */
1910+
if (head_page == cpu_buffer->tail_page)
1911+
break;
1912+
1913+
/* Ensure the page has older data than head. */
1914+
if (ts < head_page->page->time_stamp)
1915+
break;
1916+
1917+
ts = head_page->page->time_stamp;
1918+
/* Ensure the page has correct timestamp and some data. */
1919+
if (!ts || rb_page_commit(head_page) == 0)
1920+
break;
1921+
1922+
/* Stop rewind if the page is invalid. */
1923+
ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu);
1924+
if (ret < 0)
1925+
break;
1926+
1927+
/* Recover the number of entries and update stats. */
1928+
local_set(&head_page->entries, ret);
1929+
if (ret)
1930+
local_inc(&cpu_buffer->pages_touched);
1931+
entries += ret;
1932+
entry_bytes += rb_page_commit(head_page);
1933+
}
1934+
if (i)
1935+
pr_info("Ring buffer [%d] rewound %d pages\n", cpu_buffer->cpu, i);
1936+
1937+
/* The last rewound page must be skipped. */
1938+
if (head_page != orig_head)
1939+
rb_inc_page(&head_page);
1940+
1941+
/*
1942+
* If the ring buffer was rewound, then inject the reader page
1943+
* into the location just before the original head page.
1944+
*/
1945+
if (head_page != orig_head) {
1946+
struct buffer_page *bpage = orig_head;
1947+
1948+
rb_dec_page(&bpage);
1949+
/*
1950+
* Insert the reader_page before the original head page.
1951+
* Since the list encode RB_PAGE flags, general list
1952+
* operations should be avoided.
1953+
*/
1954+
cpu_buffer->reader_page->list.next = &orig_head->list;
1955+
cpu_buffer->reader_page->list.prev = orig_head->list.prev;
1956+
orig_head->list.prev = &cpu_buffer->reader_page->list;
1957+
bpage->list.next = &cpu_buffer->reader_page->list;
1958+
1959+
/* Make the head_page the reader page */
1960+
cpu_buffer->reader_page = head_page;
1961+
bpage = head_page;
1962+
rb_inc_page(&head_page);
1963+
head_page->list.prev = bpage->list.prev;
1964+
rb_dec_page(&bpage);
1965+
bpage->list.next = &head_page->list;
1966+
rb_set_list_to_head(&bpage->list);
1967+
cpu_buffer->pages = &head_page->list;
1968+
1969+
cpu_buffer->head_page = head_page;
1970+
meta->head_buffer = (unsigned long)head_page->page;
1971+
1972+
/* Reset all the indexes */
1973+
bpage = cpu_buffer->reader_page;
1974+
meta->buffers[0] = rb_meta_subbuf_idx(meta, bpage->page);
1975+
bpage->id = 0;
1976+
1977+
for (i = 1, bpage = head_page; i < meta->nr_subbufs;
1978+
i++, rb_inc_page(&bpage)) {
1979+
meta->buffers[i] = rb_meta_subbuf_idx(meta, bpage->page);
1980+
bpage->id = i;
1981+
}
1982+
1983+
/* We'll restart verifying from orig_head */
1984+
head_page = orig_head;
1985+
}
18891986

1987+
skip_rewind:
18901988
/* If the commit_buffer is the reader page, update the commit page */
18911989
if (meta->commit_buffer == (unsigned long)cpu_buffer->reader_page->page) {
18921990
cpu_buffer->commit_page = cpu_buffer->reader_page;
@@ -4118,7 +4216,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
41184216

41194217
static const char *show_irq_str(int bits)
41204218
{
4121-
const char *type[] = {
4219+
static const char * type[] = {
41224220
".", // 0
41234221
"s", // 1
41244222
"h", // 2
@@ -5342,7 +5440,6 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
53425440
*/
53435441
local_set(&cpu_buffer->reader_page->write, 0);
53445442
local_set(&cpu_buffer->reader_page->entries, 0);
5345-
local_set(&cpu_buffer->reader_page->page->commit, 0);
53465443
cpu_buffer->reader_page->real_end = 0;
53475444

53485445
spin:
@@ -5846,24 +5943,20 @@ ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts,
58465943
EXPORT_SYMBOL_GPL(ring_buffer_consume);
58475944

58485945
/**
5849-
* ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
5946+
* ring_buffer_read_start - start a non consuming read of the buffer
58505947
* @buffer: The ring buffer to read from
58515948
* @cpu: The cpu buffer to iterate over
58525949
* @flags: gfp flags to use for memory allocation
58535950
*
5854-
* This performs the initial preparations necessary to iterate
5855-
* through the buffer. Memory is allocated, buffer resizing
5856-
* is disabled, and the iterator pointer is returned to the caller.
5951+
* This creates an iterator to allow non-consuming iteration through
5952+
* the buffer. If the buffer is disabled for writing, it will produce
5953+
* the same information each time, but if the buffer is still writing
5954+
* then the first hit of a write will cause the iteration to stop.
58575955
*
5858-
* After a sequence of ring_buffer_read_prepare calls, the user is
5859-
* expected to make at least one call to ring_buffer_read_prepare_sync.
5860-
* Afterwards, ring_buffer_read_start is invoked to get things going
5861-
* for real.
5862-
*
5863-
* This overall must be paired with ring_buffer_read_finish.
5956+
* Must be paired with ring_buffer_read_finish.
58645957
*/
58655958
struct ring_buffer_iter *
5866-
ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
5959+
ring_buffer_read_start(struct trace_buffer *buffer, int cpu, gfp_t flags)
58675960
{
58685961
struct ring_buffer_per_cpu *cpu_buffer;
58695962
struct ring_buffer_iter *iter;
@@ -5889,51 +5982,12 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
58895982

58905983
atomic_inc(&cpu_buffer->resize_disabled);
58915984

5892-
return iter;
5893-
}
5894-
EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
5895-
5896-
/**
5897-
* ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
5898-
*
5899-
* All previously invoked ring_buffer_read_prepare calls to prepare
5900-
* iterators will be synchronized. Afterwards, read_buffer_read_start
5901-
* calls on those iterators are allowed.
5902-
*/
5903-
void
5904-
ring_buffer_read_prepare_sync(void)
5905-
{
5906-
synchronize_rcu();
5907-
}
5908-
EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
5909-
5910-
/**
5911-
* ring_buffer_read_start - start a non consuming read of the buffer
5912-
* @iter: The iterator returned by ring_buffer_read_prepare
5913-
*
5914-
* This finalizes the startup of an iteration through the buffer.
5915-
* The iterator comes from a call to ring_buffer_read_prepare and
5916-
* an intervening ring_buffer_read_prepare_sync must have been
5917-
* performed.
5918-
*
5919-
* Must be paired with ring_buffer_read_finish.
5920-
*/
5921-
void
5922-
ring_buffer_read_start(struct ring_buffer_iter *iter)
5923-
{
5924-
struct ring_buffer_per_cpu *cpu_buffer;
5925-
unsigned long flags;
5926-
5927-
if (!iter)
5928-
return;
5929-
5930-
cpu_buffer = iter->cpu_buffer;
5931-
5932-
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
5985+
guard(raw_spinlock_irqsave)(&cpu_buffer->reader_lock);
59335986
arch_spin_lock(&cpu_buffer->lock);
59345987
rb_iter_reset(iter);
59355988
arch_spin_unlock(&cpu_buffer->lock);
5936-
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
5989+
5990+
return iter;
59375991
}
59385992
EXPORT_SYMBOL_GPL(ring_buffer_read_start);
59395993

kernel/trace/trace.c

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4735,21 +4735,15 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
47354735
if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
47364736
for_each_tracing_cpu(cpu) {
47374737
iter->buffer_iter[cpu] =
4738-
ring_buffer_read_prepare(iter->array_buffer->buffer,
4739-
cpu, GFP_KERNEL);
4740-
}
4741-
ring_buffer_read_prepare_sync();
4742-
for_each_tracing_cpu(cpu) {
4743-
ring_buffer_read_start(iter->buffer_iter[cpu]);
4738+
ring_buffer_read_start(iter->array_buffer->buffer,
4739+
cpu, GFP_KERNEL);
47444740
tracing_iter_reset(iter, cpu);
47454741
}
47464742
} else {
47474743
cpu = iter->cpu_file;
47484744
iter->buffer_iter[cpu] =
4749-
ring_buffer_read_prepare(iter->array_buffer->buffer,
4750-
cpu, GFP_KERNEL);
4751-
ring_buffer_read_prepare_sync();
4752-
ring_buffer_read_start(iter->buffer_iter[cpu]);
4745+
ring_buffer_read_start(iter->array_buffer->buffer,
4746+
cpu, GFP_KERNEL);
47534747
tracing_iter_reset(iter, cpu);
47544748
}
47554749

kernel/trace/trace_kdb.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,15 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file)
4343
if (cpu_file == RING_BUFFER_ALL_CPUS) {
4444
for_each_tracing_cpu(cpu) {
4545
iter.buffer_iter[cpu] =
46-
ring_buffer_read_prepare(iter.array_buffer->buffer,
47-
cpu, GFP_ATOMIC);
48-
ring_buffer_read_start(iter.buffer_iter[cpu]);
46+
ring_buffer_read_start(iter.array_buffer->buffer,
47+
cpu, GFP_ATOMIC);
4948
tracing_iter_reset(&iter, cpu);
5049
}
5150
} else {
5251
iter.cpu_file = cpu_file;
5352
iter.buffer_iter[cpu_file] =
54-
ring_buffer_read_prepare(iter.array_buffer->buffer,
53+
ring_buffer_read_start(iter.array_buffer->buffer,
5554
cpu_file, GFP_ATOMIC);
56-
ring_buffer_read_start(iter.buffer_iter[cpu_file]);
5755
tracing_iter_reset(&iter, cpu_file);
5856
}
5957

0 commit comments

Comments
 (0)