Skip to content

Commit 80f9607

Browse files
puranjaymohanKernel Patches Daemon
authored andcommitted
bpf: stream: start using kmalloc_nolock()
BPF stream kfuncs need to be non-sleeping as they can be called from programs running in any context, this requires a way to allocate memory from any context. Currently, this is done by a custom per-CPU NMI-safe bump allocation mechanism, backed by try_alloc_pages() and free_pages_nolock() primitives. As kmalloc_nolock() and kfree_nolock() primitives are available now, the custom allocator can be removed in favor of these. Signed-off-by: Puranjay Mohan <[email protected]> Acked-by: Kumar Kartikeya Dwivedi <[email protected]>
1 parent 1f11231 commit 80f9607

File tree

1 file changed

+8
-151
lines changed

1 file changed

+8
-151
lines changed

kernel/bpf/stream.c

Lines changed: 8 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -4,166 +4,23 @@
44
#include <linux/bpf.h>
55
#include <linux/filter.h>
66
#include <linux/bpf_mem_alloc.h>
7-
#include <linux/percpu.h>
8-
#include <linux/refcount.h>
97
#include <linux/gfp.h>
108
#include <linux/memory.h>
11-
#include <linux/local_lock.h>
129
#include <linux/mutex.h>
1310

14-
/*
15-
* Simple per-CPU NMI-safe bump allocation mechanism, backed by the NMI-safe
16-
* try_alloc_pages()/free_pages_nolock() primitives. We allocate a page and
17-
* stash it in a local per-CPU variable, and bump allocate from the page
18-
* whenever items need to be printed to a stream. Each page holds a global
19-
* atomic refcount in its first 4 bytes, and then records of variable length
20-
* that describe the printed messages. Once the global refcount has dropped to
21-
* zero, it is a signal to free the page back to the kernel's page allocator,
22-
* given all the individual records in it have been consumed.
23-
*
24-
* It is possible the same page is used to serve allocations across different
25-
* programs, which may be consumed at different times individually, hence
26-
* maintaining a reference count per-page is critical for correct lifetime
27-
* tracking.
28-
*
29-
* The bpf_stream_page code will be replaced to use kmalloc_nolock() once it
30-
* lands.
31-
*/
32-
struct bpf_stream_page {
33-
refcount_t ref;
34-
u32 consumed;
35-
char buf[];
36-
};
37-
38-
/* Available room to add data to a refcounted page. */
39-
#define BPF_STREAM_PAGE_SZ (PAGE_SIZE - offsetofend(struct bpf_stream_page, consumed))
40-
41-
static DEFINE_PER_CPU(local_trylock_t, stream_local_lock) = INIT_LOCAL_TRYLOCK(stream_local_lock);
42-
static DEFINE_PER_CPU(struct bpf_stream_page *, stream_pcpu_page);
43-
44-
static bool bpf_stream_page_local_lock(unsigned long *flags)
45-
{
46-
return local_trylock_irqsave(&stream_local_lock, *flags);
47-
}
48-
49-
static void bpf_stream_page_local_unlock(unsigned long *flags)
50-
{
51-
local_unlock_irqrestore(&stream_local_lock, *flags);
52-
}
53-
54-
static void bpf_stream_page_free(struct bpf_stream_page *stream_page)
55-
{
56-
struct page *p;
57-
58-
if (!stream_page)
59-
return;
60-
p = virt_to_page(stream_page);
61-
free_pages_nolock(p, 0);
62-
}
63-
64-
static void bpf_stream_page_get(struct bpf_stream_page *stream_page)
65-
{
66-
refcount_inc(&stream_page->ref);
67-
}
68-
69-
static void bpf_stream_page_put(struct bpf_stream_page *stream_page)
70-
{
71-
if (refcount_dec_and_test(&stream_page->ref))
72-
bpf_stream_page_free(stream_page);
73-
}
74-
75-
static void bpf_stream_page_init(struct bpf_stream_page *stream_page)
76-
{
77-
refcount_set(&stream_page->ref, 1);
78-
stream_page->consumed = 0;
79-
}
80-
81-
static struct bpf_stream_page *bpf_stream_page_replace(void)
82-
{
83-
struct bpf_stream_page *stream_page, *old_stream_page;
84-
struct page *page;
85-
86-
page = alloc_pages_nolock(/* Don't account */ 0, NUMA_NO_NODE, 0);
87-
if (!page)
88-
return NULL;
89-
stream_page = page_address(page);
90-
bpf_stream_page_init(stream_page);
91-
92-
old_stream_page = this_cpu_read(stream_pcpu_page);
93-
if (old_stream_page)
94-
bpf_stream_page_put(old_stream_page);
95-
this_cpu_write(stream_pcpu_page, stream_page);
96-
return stream_page;
97-
}
98-
99-
static int bpf_stream_page_check_room(struct bpf_stream_page *stream_page, int len)
100-
{
101-
int min = offsetof(struct bpf_stream_elem, str[0]);
102-
int consumed = stream_page->consumed;
103-
int total = BPF_STREAM_PAGE_SZ;
104-
int rem = max(0, total - consumed - min);
105-
106-
/* Let's give room of at least 8 bytes. */
107-
WARN_ON_ONCE(rem % 8 != 0);
108-
rem = rem < 8 ? 0 : rem;
109-
return min(len, rem);
110-
}
111-
11211
static void bpf_stream_elem_init(struct bpf_stream_elem *elem, int len)
11312
{
11413
init_llist_node(&elem->node);
11514
elem->total_len = len;
11615
elem->consumed_len = 0;
11716
}
11817

119-
static struct bpf_stream_page *bpf_stream_page_from_elem(struct bpf_stream_elem *elem)
120-
{
121-
unsigned long addr = (unsigned long)elem;
122-
123-
return (struct bpf_stream_page *)PAGE_ALIGN_DOWN(addr);
124-
}
125-
126-
static struct bpf_stream_elem *bpf_stream_page_push_elem(struct bpf_stream_page *stream_page, int len)
127-
{
128-
u32 consumed = stream_page->consumed;
129-
130-
stream_page->consumed += round_up(offsetof(struct bpf_stream_elem, str[len]), 8);
131-
return (struct bpf_stream_elem *)&stream_page->buf[consumed];
132-
}
133-
134-
static struct bpf_stream_elem *bpf_stream_page_reserve_elem(int len)
135-
{
136-
struct bpf_stream_elem *elem = NULL;
137-
struct bpf_stream_page *page;
138-
int room = 0;
139-
140-
page = this_cpu_read(stream_pcpu_page);
141-
if (!page)
142-
page = bpf_stream_page_replace();
143-
if (!page)
144-
return NULL;
145-
146-
room = bpf_stream_page_check_room(page, len);
147-
if (room != len)
148-
page = bpf_stream_page_replace();
149-
if (!page)
150-
return NULL;
151-
bpf_stream_page_get(page);
152-
room = bpf_stream_page_check_room(page, len);
153-
WARN_ON_ONCE(room != len);
154-
155-
elem = bpf_stream_page_push_elem(page, room);
156-
bpf_stream_elem_init(elem, room);
157-
return elem;
158-
}
159-
16018
static struct bpf_stream_elem *bpf_stream_elem_alloc(int len)
16119
{
16220
const int max_len = ARRAY_SIZE((struct bpf_bprintf_buffers){}.buf);
16321
struct bpf_stream_elem *elem;
164-
unsigned long flags;
22+
size_t alloc_size;
16523

166-
BUILD_BUG_ON(max_len > BPF_STREAM_PAGE_SZ);
16724
/*
16825
* Length denotes the amount of data to be written as part of stream element,
16926
* thus includes '\0' byte. We're capped by how much bpf_bprintf_buffers can
@@ -172,10 +29,13 @@ static struct bpf_stream_elem *bpf_stream_elem_alloc(int len)
17229
if (len < 0 || len > max_len)
17330
return NULL;
17431

175-
if (!bpf_stream_page_local_lock(&flags))
32+
alloc_size = round_up(offsetof(struct bpf_stream_elem, str[len]), 8);
33+
elem = kmalloc_nolock(alloc_size, __GFP_ZERO, -1);
34+
if (!elem)
17635
return NULL;
177-
elem = bpf_stream_page_reserve_elem(len);
178-
bpf_stream_page_local_unlock(&flags);
36+
37+
bpf_stream_elem_init(elem, len);
38+
17939
return elem;
18040
}
18141

@@ -231,10 +91,7 @@ static struct bpf_stream *bpf_stream_get(enum bpf_stream_id stream_id, struct bp
23191

23292
static void bpf_stream_free_elem(struct bpf_stream_elem *elem)
23393
{
234-
struct bpf_stream_page *p;
235-
236-
p = bpf_stream_page_from_elem(elem);
237-
bpf_stream_page_put(p);
94+
kfree_nolock(elem);
23895
}
23996

24097
static void bpf_stream_free_list(struct llist_node *list)

0 commit comments

Comments
 (0)