Skip to content

Commit 6fd815b

Browse files
committed
Merge branch 'wireguard-fixes'
Jason A. Donenfeld says: ==================== wireguard fixes for 5.13-rc5 Here are bug fixes to WireGuard for 5.13-rc5: 1-2,6) These are small, trivial tweaks to our test harness. 3) Linus thinks -O3 is still dangerous to enable. The code gen wasn't so much different with -O2 either. 4) We were accidentally calling synchronize_rcu instead of synchronize_net while holding the rtnl_lock, resulting in some rather large stalls that hit production machines. 5) Peer allocation was wasting literally hundreds of megabytes on real world deployments, due to oddly sized large objects not fitting nicely into a kmalloc slab. 7-9) We move from an insanely expensive O(n) algorithm to a fast O(1) algorithm, and cleanup a massive memory leak in the process, in which allowed ips churn would leave danging nodes hanging around without cleanup until the interface was removed. The O(1) algorithm eliminates packet stalls and high latency issues, in addition to bringing operations that took as much as 10 minutes down to less than a second. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 579028d + bf7b042 commit 6fd815b

File tree

10 files changed

+227
-195
lines changed

10 files changed

+227
-195
lines changed

drivers/net/wireguard/Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
ccflags-y := -O3
2-
ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
1+
ccflags-y := -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
32
ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG
43
wireguard-y := main.o
54
wireguard-y += noise.o

drivers/net/wireguard/allowedips.c

Lines changed: 99 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include "allowedips.h"
77
#include "peer.h"
88

9+
static struct kmem_cache *node_cache;
10+
911
static void swap_endian(u8 *dst, const u8 *src, u8 bits)
1012
{
1113
if (bits == 32) {
@@ -28,8 +30,11 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
2830
node->bitlen = bits;
2931
memcpy(node->bits, src, bits / 8U);
3032
}
31-
#define CHOOSE_NODE(parent, key) \
32-
parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
33+
34+
static inline u8 choose(struct allowedips_node *node, const u8 *key)
35+
{
36+
return (key[node->bit_at_a] >> node->bit_at_b) & 1;
37+
}
3338

3439
static void push_rcu(struct allowedips_node **stack,
3540
struct allowedips_node __rcu *p, unsigned int *len)
@@ -40,6 +45,11 @@ static void push_rcu(struct allowedips_node **stack,
4045
}
4146
}
4247

48+
static void node_free_rcu(struct rcu_head *rcu)
49+
{
50+
kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu));
51+
}
52+
4353
static void root_free_rcu(struct rcu_head *rcu)
4454
{
4555
struct allowedips_node *node, *stack[128] = {
@@ -49,7 +59,7 @@ static void root_free_rcu(struct rcu_head *rcu)
4959
while (len > 0 && (node = stack[--len])) {
5060
push_rcu(stack, node->bit[0], &len);
5161
push_rcu(stack, node->bit[1], &len);
52-
kfree(node);
62+
kmem_cache_free(node_cache, node);
5363
}
5464
}
5565

@@ -66,60 +76,6 @@ static void root_remove_peer_lists(struct allowedips_node *root)
6676
}
6777
}
6878

69-
static void walk_remove_by_peer(struct allowedips_node __rcu **top,
70-
struct wg_peer *peer, struct mutex *lock)
71-
{
72-
#define REF(p) rcu_access_pointer(p)
73-
#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
74-
#define PUSH(p) ({ \
75-
WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \
76-
stack[len++] = p; \
77-
})
78-
79-
struct allowedips_node __rcu **stack[128], **nptr;
80-
struct allowedips_node *node, *prev;
81-
unsigned int len;
82-
83-
if (unlikely(!peer || !REF(*top)))
84-
return;
85-
86-
for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
87-
nptr = stack[len - 1];
88-
node = DEREF(nptr);
89-
if (!node) {
90-
--len;
91-
continue;
92-
}
93-
if (!prev || REF(prev->bit[0]) == node ||
94-
REF(prev->bit[1]) == node) {
95-
if (REF(node->bit[0]))
96-
PUSH(&node->bit[0]);
97-
else if (REF(node->bit[1]))
98-
PUSH(&node->bit[1]);
99-
} else if (REF(node->bit[0]) == prev) {
100-
if (REF(node->bit[1]))
101-
PUSH(&node->bit[1]);
102-
} else {
103-
if (rcu_dereference_protected(node->peer,
104-
lockdep_is_held(lock)) == peer) {
105-
RCU_INIT_POINTER(node->peer, NULL);
106-
list_del_init(&node->peer_list);
107-
if (!node->bit[0] || !node->bit[1]) {
108-
rcu_assign_pointer(*nptr, DEREF(
109-
&node->bit[!REF(node->bit[0])]));
110-
kfree_rcu(node, rcu);
111-
node = DEREF(nptr);
112-
}
113-
}
114-
--len;
115-
}
116-
}
117-
118-
#undef REF
119-
#undef DEREF
120-
#undef PUSH
121-
}
122-
12379
static unsigned int fls128(u64 a, u64 b)
12480
{
12581
return a ? fls64(a) + 64U : fls64(b);
@@ -159,7 +115,7 @@ static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
159115
found = node;
160116
if (node->cidr == bits)
161117
break;
162-
node = rcu_dereference_bh(CHOOSE_NODE(node, key));
118+
node = rcu_dereference_bh(node->bit[choose(node, key)]);
163119
}
164120
return found;
165121
}
@@ -191,8 +147,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
191147
u8 cidr, u8 bits, struct allowedips_node **rnode,
192148
struct mutex *lock)
193149
{
194-
struct allowedips_node *node = rcu_dereference_protected(trie,
195-
lockdep_is_held(lock));
150+
struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
196151
struct allowedips_node *parent = NULL;
197152
bool exact = false;
198153

@@ -202,13 +157,24 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
202157
exact = true;
203158
break;
204159
}
205-
node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
206-
lockdep_is_held(lock));
160+
node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock));
207161
}
208162
*rnode = parent;
209163
return exact;
210164
}
211165

166+
static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
167+
{
168+
node->parent_bit_packed = (unsigned long)parent | bit;
169+
rcu_assign_pointer(*parent, node);
170+
}
171+
172+
static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node)
173+
{
174+
u8 bit = choose(parent, node->bits);
175+
connect_node(&parent->bit[bit], bit, node);
176+
}
177+
212178
static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
213179
u8 cidr, struct wg_peer *peer, struct mutex *lock)
214180
{
@@ -218,13 +184,13 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
218184
return -EINVAL;
219185

220186
if (!rcu_access_pointer(*trie)) {
221-
node = kzalloc(sizeof(*node), GFP_KERNEL);
187+
node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
222188
if (unlikely(!node))
223189
return -ENOMEM;
224190
RCU_INIT_POINTER(node->peer, peer);
225191
list_add_tail(&node->peer_list, &peer->allowedips_list);
226192
copy_and_assign_cidr(node, key, cidr, bits);
227-
rcu_assign_pointer(*trie, node);
193+
connect_node(trie, 2, node);
228194
return 0;
229195
}
230196
if (node_placement(*trie, key, cidr, bits, &node, lock)) {
@@ -233,7 +199,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
233199
return 0;
234200
}
235201

236-
newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
202+
newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL);
237203
if (unlikely(!newnode))
238204
return -ENOMEM;
239205
RCU_INIT_POINTER(newnode->peer, peer);
@@ -243,41 +209,40 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
243209
if (!node) {
244210
down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
245211
} else {
246-
down = rcu_dereference_protected(CHOOSE_NODE(node, key),
247-
lockdep_is_held(lock));
212+
const u8 bit = choose(node, key);
213+
down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock));
248214
if (!down) {
249-
rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
215+
connect_node(&node->bit[bit], bit, newnode);
250216
return 0;
251217
}
252218
}
253219
cidr = min(cidr, common_bits(down, key, bits));
254220
parent = node;
255221

256222
if (newnode->cidr == cidr) {
257-
rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
223+
choose_and_connect_node(newnode, down);
258224
if (!parent)
259-
rcu_assign_pointer(*trie, newnode);
225+
connect_node(trie, 2, newnode);
260226
else
261-
rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
262-
newnode);
263-
} else {
264-
node = kzalloc(sizeof(*node), GFP_KERNEL);
265-
if (unlikely(!node)) {
266-
list_del(&newnode->peer_list);
267-
kfree(newnode);
268-
return -ENOMEM;
269-
}
270-
INIT_LIST_HEAD(&node->peer_list);
271-
copy_and_assign_cidr(node, newnode->bits, cidr, bits);
227+
choose_and_connect_node(parent, newnode);
228+
return 0;
229+
}
272230

273-
rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
274-
rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
275-
if (!parent)
276-
rcu_assign_pointer(*trie, node);
277-
else
278-
rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
279-
node);
231+
node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
232+
if (unlikely(!node)) {
233+
list_del(&newnode->peer_list);
234+
kmem_cache_free(node_cache, newnode);
235+
return -ENOMEM;
280236
}
237+
INIT_LIST_HEAD(&node->peer_list);
238+
copy_and_assign_cidr(node, newnode->bits, cidr, bits);
239+
240+
choose_and_connect_node(node, down);
241+
choose_and_connect_node(node, newnode);
242+
if (!parent)
243+
connect_node(trie, 2, node);
244+
else
245+
choose_and_connect_node(parent, node);
281246
return 0;
282247
}
283248

@@ -335,9 +300,41 @@ int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
335300
void wg_allowedips_remove_by_peer(struct allowedips *table,
336301
struct wg_peer *peer, struct mutex *lock)
337302
{
303+
struct allowedips_node *node, *child, **parent_bit, *parent, *tmp;
304+
bool free_parent;
305+
306+
if (list_empty(&peer->allowedips_list))
307+
return;
338308
++table->seq;
339-
walk_remove_by_peer(&table->root4, peer, lock);
340-
walk_remove_by_peer(&table->root6, peer, lock);
309+
list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) {
310+
list_del_init(&node->peer_list);
311+
RCU_INIT_POINTER(node->peer, NULL);
312+
if (node->bit[0] && node->bit[1])
313+
continue;
314+
child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])],
315+
lockdep_is_held(lock));
316+
if (child)
317+
child->parent_bit_packed = node->parent_bit_packed;
318+
parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL);
319+
*parent_bit = child;
320+
parent = (void *)parent_bit -
321+
offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]);
322+
free_parent = !rcu_access_pointer(node->bit[0]) &&
323+
!rcu_access_pointer(node->bit[1]) &&
324+
(node->parent_bit_packed & 3) <= 1 &&
325+
!rcu_access_pointer(parent->peer);
326+
if (free_parent)
327+
child = rcu_dereference_protected(
328+
parent->bit[!(node->parent_bit_packed & 1)],
329+
lockdep_is_held(lock));
330+
call_rcu(&node->rcu, node_free_rcu);
331+
if (!free_parent)
332+
continue;
333+
if (child)
334+
child->parent_bit_packed = parent->parent_bit_packed;
335+
*(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child;
336+
call_rcu(&parent->rcu, node_free_rcu);
337+
}
341338
}
342339

343340
int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
@@ -374,4 +371,16 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
374371
return NULL;
375372
}
376373

374+
int __init wg_allowedips_slab_init(void)
375+
{
376+
node_cache = KMEM_CACHE(allowedips_node, 0);
377+
return node_cache ? 0 : -ENOMEM;
378+
}
379+
380+
void wg_allowedips_slab_uninit(void)
381+
{
382+
rcu_barrier();
383+
kmem_cache_destroy(node_cache);
384+
}
385+
377386
#include "selftest/allowedips.c"

drivers/net/wireguard/allowedips.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,11 @@ struct wg_peer;
1515
struct allowedips_node {
1616
struct wg_peer __rcu *peer;
1717
struct allowedips_node __rcu *bit[2];
18-
/* While it may seem scandalous that we waste space for v4,
19-
* we're alloc'ing to the nearest power of 2 anyway, so this
20-
* doesn't actually make a difference.
21-
*/
22-
u8 bits[16] __aligned(__alignof(u64));
2318
u8 cidr, bit_at_a, bit_at_b, bitlen;
19+
u8 bits[16] __aligned(__alignof(u64));
2420

25-
/* Keep rarely used list at bottom to be beyond cache line. */
21+
/* Keep rarely used members at bottom to be beyond cache line. */
22+
unsigned long parent_bit_packed;
2623
union {
2724
struct list_head peer_list;
2825
struct rcu_head rcu;
@@ -33,7 +30,7 @@ struct allowedips {
3330
struct allowedips_node __rcu *root4;
3431
struct allowedips_node __rcu *root6;
3532
u64 seq;
36-
};
33+
} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */
3734

3835
void wg_allowedips_init(struct allowedips *table);
3936
void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
@@ -56,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
5653
bool wg_allowedips_selftest(void);
5754
#endif
5855

56+
int wg_allowedips_slab_init(void);
57+
void wg_allowedips_slab_uninit(void);
58+
5959
#endif /* _WG_ALLOWEDIPS_H */

drivers/net/wireguard/main.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,22 @@ static int __init mod_init(void)
2121
{
2222
int ret;
2323

24+
ret = wg_allowedips_slab_init();
25+
if (ret < 0)
26+
goto err_allowedips;
27+
2428
#ifdef DEBUG
29+
ret = -ENOTRECOVERABLE;
2530
if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
2631
!wg_ratelimiter_selftest())
27-
return -ENOTRECOVERABLE;
32+
goto err_peer;
2833
#endif
2934
wg_noise_init();
3035

36+
ret = wg_peer_init();
37+
if (ret < 0)
38+
goto err_peer;
39+
3140
ret = wg_device_init();
3241
if (ret < 0)
3342
goto err_device;
@@ -44,13 +53,19 @@ static int __init mod_init(void)
4453
err_netlink:
4554
wg_device_uninit();
4655
err_device:
56+
wg_peer_uninit();
57+
err_peer:
58+
wg_allowedips_slab_uninit();
59+
err_allowedips:
4760
return ret;
4861
}
4962

5063
static void __exit mod_exit(void)
5164
{
5265
wg_genetlink_uninit();
5366
wg_device_uninit();
67+
wg_peer_uninit();
68+
wg_allowedips_slab_uninit();
5469
}
5570

5671
module_init(mod_init);

0 commit comments

Comments
 (0)