@@ -503,11 +503,13 @@ struct trace_buffer {
503
503
struct ring_buffer_iter {
504
504
struct ring_buffer_per_cpu * cpu_buffer ;
505
505
unsigned long head ;
506
+ unsigned long next_event ;
506
507
struct buffer_page * head_page ;
507
508
struct buffer_page * cache_reader_page ;
508
509
unsigned long cache_read ;
509
510
u64 read_stamp ;
510
511
u64 page_stamp ;
512
+ struct ring_buffer_event * event ;
511
513
};
512
514
513
515
/**
@@ -1914,15 +1916,59 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
1914
1916
cpu_buffer -> reader_page -> read );
1915
1917
}
1916
1918
1917
- static __always_inline struct ring_buffer_event *
1918
- rb_iter_head_event (struct ring_buffer_iter * iter )
1919
+ static __always_inline unsigned rb_page_commit (struct buffer_page * bpage )
1919
1920
{
1920
- return __rb_page_index ( iter -> head_page , iter -> head );
1921
+ return local_read ( & bpage -> page -> commit );
1921
1922
}
1922
1923
1923
- static __always_inline unsigned rb_page_commit (struct buffer_page * bpage )
1924
+ static struct ring_buffer_event *
1925
+ rb_iter_head_event (struct ring_buffer_iter * iter )
1924
1926
{
1925
- return local_read (& bpage -> page -> commit );
1927
+ struct ring_buffer_event * event ;
1928
+ struct buffer_page * iter_head_page = iter -> head_page ;
1929
+ unsigned long commit ;
1930
+ unsigned length ;
1931
+
1932
+ /*
1933
+ * When the writer goes across pages, it issues a cmpxchg which
1934
+ * is a mb(), which will synchronize with the rmb here.
1935
+ * (see rb_tail_page_update() and __rb_reserve_next())
1936
+ */
1937
+ commit = rb_page_commit (iter_head_page );
1938
+ smp_rmb ();
1939
+ event = __rb_page_index (iter_head_page , iter -> head );
1940
+ length = rb_event_length (event );
1941
+
1942
+ /*
1943
+ * READ_ONCE() doesn't work on functions and we don't want the
1944
+ * compiler doing any crazy optimizations with length.
1945
+ */
1946
+ barrier ();
1947
+
1948
+ if ((iter -> head + length ) > commit || length > BUF_MAX_DATA_SIZE )
1949
+ /* Writer corrupted the read? */
1950
+ goto reset ;
1951
+
1952
+ memcpy (iter -> event , event , length );
1953
+ /*
1954
+ * If the page stamp is still the same after this rmb() then the
1955
+ * event was safely copied without the writer entering the page.
1956
+ */
1957
+ smp_rmb ();
1958
+
1959
+ /* Make sure the page didn't change since we read this */
1960
+ if (iter -> page_stamp != iter_head_page -> page -> time_stamp ||
1961
+ commit > rb_page_commit (iter_head_page ))
1962
+ goto reset ;
1963
+
1964
+ iter -> next_event = iter -> head + length ;
1965
+ return iter -> event ;
1966
+ reset :
1967
+ /* Reset to the beginning */
1968
+ iter -> page_stamp = iter -> read_stamp = iter -> head_page -> page -> time_stamp ;
1969
+ iter -> head = 0 ;
1970
+ iter -> next_event = 0 ;
1971
+ return NULL ;
1926
1972
}
1927
1973
1928
1974
/* Size is determined by what has been committed */
@@ -1962,6 +2008,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1962
2008
1963
2009
iter -> page_stamp = iter -> read_stamp = iter -> head_page -> page -> time_stamp ;
1964
2010
iter -> head = 0 ;
2011
+ iter -> next_event = 0 ;
1965
2012
}
1966
2013
1967
2014
/*
@@ -3548,6 +3595,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
3548
3595
/* Iterator usage is expected to have record disabled */
3549
3596
iter -> head_page = cpu_buffer -> reader_page ;
3550
3597
iter -> head = cpu_buffer -> reader_page -> read ;
3598
+ iter -> next_event = iter -> head ;
3551
3599
3552
3600
iter -> cache_reader_page = iter -> head_page ;
3553
3601
iter -> cache_read = cpu_buffer -> read ;
@@ -3625,7 +3673,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
3625
3673
return 0 ;
3626
3674
3627
3675
/* Still racy, as it may return a false positive, but that's OK */
3628
- return ((iter -> head_page == commit_page && iter -> head = = commit ) ||
3676
+ return ((iter -> head_page == commit_page && iter -> head > = commit ) ||
3629
3677
(iter -> head_page == reader && commit_page == head_page &&
3630
3678
head_page -> read == commit &&
3631
3679
iter -> head == rb_page_commit (cpu_buffer -> reader_page )));
@@ -3853,43 +3901,30 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
3853
3901
static void rb_advance_iter (struct ring_buffer_iter * iter )
3854
3902
{
3855
3903
struct ring_buffer_per_cpu * cpu_buffer ;
3856
- struct ring_buffer_event * event ;
3857
- unsigned length ;
3858
3904
3859
3905
cpu_buffer = iter -> cpu_buffer ;
3860
3906
3907
+ /* If head == next_event then we need to jump to the next event */
3908
+ if (iter -> head == iter -> next_event ) {
3909
+ /* If the event gets overwritten again, there's nothing to do */
3910
+ if (rb_iter_head_event (iter ) == NULL )
3911
+ return ;
3912
+ }
3913
+
3914
+ iter -> head = iter -> next_event ;
3915
+
3861
3916
/*
3862
3917
* Check if we are at the end of the buffer.
3863
3918
*/
3864
- if (iter -> head >= rb_page_size (iter -> head_page )) {
3919
+ if (iter -> next_event >= rb_page_size (iter -> head_page )) {
3865
3920
/* discarded commits can make the page empty */
3866
3921
if (iter -> head_page == cpu_buffer -> commit_page )
3867
3922
return ;
3868
3923
rb_inc_iter (iter );
3869
3924
return ;
3870
3925
}
3871
3926
3872
- event = rb_iter_head_event (iter );
3873
-
3874
- length = rb_event_length (event );
3875
-
3876
- /*
3877
- * This should not be called to advance the header if we are
3878
- * at the tail of the buffer.
3879
- */
3880
- if (RB_WARN_ON (cpu_buffer ,
3881
- (iter -> head_page == cpu_buffer -> commit_page ) &&
3882
- (iter -> head + length > rb_commit_index (cpu_buffer ))))
3883
- return ;
3884
-
3885
- rb_update_iter_read_stamp (iter , event );
3886
-
3887
- iter -> head += length ;
3888
-
3889
- /* check for end of page padding */
3890
- if ((iter -> head >= rb_page_size (iter -> head_page )) &&
3891
- (iter -> head_page != cpu_buffer -> commit_page ))
3892
- rb_inc_iter (iter );
3927
+ rb_update_iter_read_stamp (iter , iter -> event );
3893
3928
}
3894
3929
3895
3930
static int rb_lost_events (struct ring_buffer_per_cpu * cpu_buffer )
@@ -4017,6 +4052,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
4017
4052
}
4018
4053
4019
4054
event = rb_iter_head_event (iter );
4055
+ if (!event )
4056
+ goto again ;
4020
4057
4021
4058
switch (event -> type_len ) {
4022
4059
case RINGBUF_TYPE_PADDING :
@@ -4233,10 +4270,16 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
4233
4270
if (!cpumask_test_cpu (cpu , buffer -> cpumask ))
4234
4271
return NULL ;
4235
4272
4236
- iter = kmalloc (sizeof (* iter ), flags );
4273
+ iter = kzalloc (sizeof (* iter ), flags );
4237
4274
if (!iter )
4238
4275
return NULL ;
4239
4276
4277
+ iter -> event = kmalloc (BUF_MAX_DATA_SIZE , flags );
4278
+ if (!iter -> event ) {
4279
+ kfree (iter );
4280
+ return NULL ;
4281
+ }
4282
+
4240
4283
cpu_buffer = buffer -> buffers [cpu ];
4241
4284
4242
4285
iter -> cpu_buffer = cpu_buffer ;
@@ -4317,6 +4360,7 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
4317
4360
4318
4361
atomic_dec (& cpu_buffer -> record_disabled );
4319
4362
atomic_dec (& cpu_buffer -> buffer -> resize_disabled );
4363
+ kfree (iter -> event );
4320
4364
kfree (iter );
4321
4365
}
4322
4366
EXPORT_SYMBOL_GPL (ring_buffer_read_finish );
0 commit comments