44#include <linux/bpf.h>
55#include <linux/filter.h>
66#include <linux/bpf_mem_alloc.h>
7- #include <linux/percpu.h>
8- #include <linux/refcount.h>
97#include <linux/gfp.h>
108#include <linux/memory.h>
11- #include <linux/local_lock.h>
129#include <linux/mutex.h>
1310
14- /*
15- * Simple per-CPU NMI-safe bump allocation mechanism, backed by the NMI-safe
16- * try_alloc_pages()/free_pages_nolock() primitives. We allocate a page and
17- * stash it in a local per-CPU variable, and bump allocate from the page
18- * whenever items need to be printed to a stream. Each page holds a global
19- * atomic refcount in its first 4 bytes, and then records of variable length
20- * that describe the printed messages. Once the global refcount has dropped to
21- * zero, it is a signal to free the page back to the kernel's page allocator,
22- * given all the individual records in it have been consumed.
23- *
24- * It is possible the same page is used to serve allocations across different
25- * programs, which may be consumed at different times individually, hence
26- * maintaining a reference count per-page is critical for correct lifetime
27- * tracking.
28- *
29- * The bpf_stream_page code will be replaced to use kmalloc_nolock() once it
30- * lands.
31- */
32- struct bpf_stream_page {
33- refcount_t ref ;
34- u32 consumed ;
35- char buf [];
36- };
37-
38- /* Available room to add data to a refcounted page. */
39- #define BPF_STREAM_PAGE_SZ (PAGE_SIZE - offsetofend(struct bpf_stream_page, consumed))
40-
41- static DEFINE_PER_CPU (local_trylock_t , stream_local_lock ) = INIT_LOCAL_TRYLOCK (stream_local_lock );
42- static DEFINE_PER_CPU (struct bpf_stream_page * , stream_pcpu_page ) ;
43-
44- static bool bpf_stream_page_local_lock (unsigned long * flags )
45- {
46- return local_trylock_irqsave (& stream_local_lock , * flags );
47- }
48-
49- static void bpf_stream_page_local_unlock (unsigned long * flags )
50- {
51- local_unlock_irqrestore (& stream_local_lock , * flags );
52- }
53-
54- static void bpf_stream_page_free (struct bpf_stream_page * stream_page )
55- {
56- struct page * p ;
57-
58- if (!stream_page )
59- return ;
60- p = virt_to_page (stream_page );
61- free_pages_nolock (p , 0 );
62- }
63-
64- static void bpf_stream_page_get (struct bpf_stream_page * stream_page )
65- {
66- refcount_inc (& stream_page -> ref );
67- }
68-
69- static void bpf_stream_page_put (struct bpf_stream_page * stream_page )
70- {
71- if (refcount_dec_and_test (& stream_page -> ref ))
72- bpf_stream_page_free (stream_page );
73- }
74-
75- static void bpf_stream_page_init (struct bpf_stream_page * stream_page )
76- {
77- refcount_set (& stream_page -> ref , 1 );
78- stream_page -> consumed = 0 ;
79- }
80-
81- static struct bpf_stream_page * bpf_stream_page_replace (void )
82- {
83- struct bpf_stream_page * stream_page , * old_stream_page ;
84- struct page * page ;
85-
86- page = alloc_pages_nolock (/* Don't account */ 0 , NUMA_NO_NODE , 0 );
87- if (!page )
88- return NULL ;
89- stream_page = page_address (page );
90- bpf_stream_page_init (stream_page );
91-
92- old_stream_page = this_cpu_read (stream_pcpu_page );
93- if (old_stream_page )
94- bpf_stream_page_put (old_stream_page );
95- this_cpu_write (stream_pcpu_page , stream_page );
96- return stream_page ;
97- }
98-
99- static int bpf_stream_page_check_room (struct bpf_stream_page * stream_page , int len )
100- {
101- int min = offsetof(struct bpf_stream_elem , str [0 ]);
102- int consumed = stream_page -> consumed ;
103- int total = BPF_STREAM_PAGE_SZ ;
104- int rem = max (0 , total - consumed - min );
105-
106- /* Let's give room of at least 8 bytes. */
107- WARN_ON_ONCE (rem % 8 != 0 );
108- rem = rem < 8 ? 0 : rem ;
109- return min (len , rem );
110- }
111-
11211static void bpf_stream_elem_init (struct bpf_stream_elem * elem , int len )
11312{
11413 init_llist_node (& elem -> node );
11514 elem -> total_len = len ;
11615 elem -> consumed_len = 0 ;
11716}
11817
119- static struct bpf_stream_page * bpf_stream_page_from_elem (struct bpf_stream_elem * elem )
120- {
121- unsigned long addr = (unsigned long )elem ;
122-
123- return (struct bpf_stream_page * )PAGE_ALIGN_DOWN (addr );
124- }
125-
126- static struct bpf_stream_elem * bpf_stream_page_push_elem (struct bpf_stream_page * stream_page , int len )
127- {
128- u32 consumed = stream_page -> consumed ;
129-
130- stream_page -> consumed += round_up (offsetof(struct bpf_stream_elem , str [len ]), 8 );
131- return (struct bpf_stream_elem * )& stream_page -> buf [consumed ];
132- }
133-
134- static struct bpf_stream_elem * bpf_stream_page_reserve_elem (int len )
135- {
136- struct bpf_stream_elem * elem = NULL ;
137- struct bpf_stream_page * page ;
138- int room = 0 ;
139-
140- page = this_cpu_read (stream_pcpu_page );
141- if (!page )
142- page = bpf_stream_page_replace ();
143- if (!page )
144- return NULL ;
145-
146- room = bpf_stream_page_check_room (page , len );
147- if (room != len )
148- page = bpf_stream_page_replace ();
149- if (!page )
150- return NULL ;
151- bpf_stream_page_get (page );
152- room = bpf_stream_page_check_room (page , len );
153- WARN_ON_ONCE (room != len );
154-
155- elem = bpf_stream_page_push_elem (page , room );
156- bpf_stream_elem_init (elem , room );
157- return elem ;
158- }
159-
16018static struct bpf_stream_elem * bpf_stream_elem_alloc (int len )
16119{
16220 const int max_len = ARRAY_SIZE ((struct bpf_bprintf_buffers ){}.buf );
16321 struct bpf_stream_elem * elem ;
164- unsigned long flags ;
22+ size_t alloc_size ;
16523
166- BUILD_BUG_ON (max_len > BPF_STREAM_PAGE_SZ );
16724 /*
16825 * Length denotes the amount of data to be written as part of stream element,
16926 * thus includes '\0' byte. We're capped by how much bpf_bprintf_buffers can
@@ -172,10 +29,13 @@ static struct bpf_stream_elem *bpf_stream_elem_alloc(int len)
17229 if (len < 0 || len > max_len )
17330 return NULL ;
17431
175- if (!bpf_stream_page_local_lock (& flags ))
32+ alloc_size = round_up (offsetof(struct bpf_stream_elem , str [len ]), 8 );
33+ elem = kmalloc_nolock (alloc_size , __GFP_ZERO , -1 );
34+ if (!elem )
17635 return NULL ;
177- elem = bpf_stream_page_reserve_elem (len );
178- bpf_stream_page_local_unlock (& flags );
36+
37+ bpf_stream_elem_init (elem , len );
38+
17939 return elem ;
18040}
18141
@@ -231,10 +91,7 @@ static struct bpf_stream *bpf_stream_get(enum bpf_stream_id stream_id, struct bp
23191
23292static void bpf_stream_free_elem (struct bpf_stream_elem * elem )
23393{
234- struct bpf_stream_page * p ;
235-
236- p = bpf_stream_page_from_elem (elem );
237- bpf_stream_page_put (p );
94+ kfree_nolock (elem );
23895}
23996
24097static void bpf_stream_free_list (struct llist_node * list )
0 commit comments