Skip to content

Commit 9e0703a

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2020-01-27 The following pull-request contains BPF updates for your *net-next* tree. We've added 20 non-merge commits during the last 5 day(s) which contain a total of 24 files changed, 433 insertions(+), 104 deletions(-). The main changes are: 1) Make BPF trampolines and dispatcher aware for the stack unwinder, from Jiri Olsa. 2) Improve handling of failed CO-RE relocations in libbpf, from Andrii Nakryiko. 3) Several fixes to BPF sockmap and reuseport selftests, from Lorenz Bauer. 4) Various cleanups in BPF devmap's XDP flush code, from John Fastabend. 5) Fix BPF flow dissector when used with port ranges, from Yoshiki Komachi. 6) Fix bpffs' map_seq_next callback to always inc position index, from Vasily Averin. 7) Allow overriding LLVM tooling for runqslower utility, from Andrey Ignatov. 8) Silence false-positive lockdep splats in devmap hash lookup, from Amol Grover. 9) Fix fentry/fexit selftests to initialize a variable before use, from John Sperbeck. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents c312840 + 82650da commit 9e0703a

File tree

24 files changed

+433
-104
lines changed

24 files changed

+433
-104
lines changed

drivers/net/veth.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
377377
unsigned int max_len;
378378
struct veth_rq *rq;
379379

380+
rcu_read_lock();
380381
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
381382
ret = -EINVAL;
382383
goto drop;
@@ -418,11 +419,14 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
418419
if (flags & XDP_XMIT_FLUSH)
419420
__veth_xdp_flush(rq);
420421

421-
if (likely(!drops))
422+
if (likely(!drops)) {
423+
rcu_read_unlock();
422424
return n;
425+
}
423426

424427
ret = n - drops;
425428
drop:
429+
rcu_read_unlock();
426430
atomic64_add(drops, &priv->dropped);
427431

428432
return ret;

drivers/net/virtio_net.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
501501
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
502502
* indicate XDP resources have been successfully allocated.
503503
*/
504-
xdp_prog = rcu_dereference(rq->xdp_prog);
504+
xdp_prog = rcu_access_pointer(rq->xdp_prog);
505505
if (!xdp_prog)
506506
return -ENXIO;
507507

include/linux/bpf.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
525525
int bpf_trampoline_link_prog(struct bpf_prog *prog);
526526
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
527527
void bpf_trampoline_put(struct bpf_trampoline *tr);
528-
void *bpf_jit_alloc_exec_page(void);
529528
#define BPF_DISPATCHER_INIT(name) { \
530529
.mutex = __MUTEX_INITIALIZER(name.mutex), \
531530
.func = &name##func, \
@@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void);
557556
#define BPF_DISPATCHER_PTR(name) (&name)
558557
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
559558
struct bpf_prog *to);
559+
struct bpf_image {
560+
struct latch_tree_node tnode;
561+
unsigned char data[];
562+
};
563+
#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image))
564+
bool is_bpf_image_address(unsigned long address);
565+
void *bpf_image_alloc(void);
560566
#else
561567
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
562568
{
@@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
578584
static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
579585
struct bpf_prog *from,
580586
struct bpf_prog *to) {}
587+
static inline bool is_bpf_image_address(unsigned long address)
588+
{
589+
return false;
590+
}
581591
#endif
582592

583593
struct bpf_func_info_aux {

kernel/bpf/btf.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3669,6 +3669,19 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
36693669
}
36703670
}
36713671

3672+
static bool is_string_ptr(struct btf *btf, const struct btf_type *t)
3673+
{
3674+
/* t comes in already as a pointer */
3675+
t = btf_type_by_id(btf, t->type);
3676+
3677+
/* allow const */
3678+
if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
3679+
t = btf_type_by_id(btf, t->type);
3680+
3681+
/* char, signed char, unsigned char */
3682+
return btf_type_is_int(t) && t->size == 1;
3683+
}
3684+
36723685
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
36733686
const struct bpf_prog *prog,
36743687
struct bpf_insn_access_aux *info)
@@ -3735,6 +3748,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
37353748
*/
37363749
return true;
37373750

3751+
if (is_string_ptr(btf, t))
3752+
return true;
3753+
37383754
/* this is a pointer to another type */
37393755
info->reg_type = PTR_TO_BTF_ID;
37403756

kernel/bpf/devmap.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,12 @@ static void dev_map_free(struct bpf_map *map)
190190

191191
/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
192192
* so the programs (can be more than one that used this map) were
193-
* disconnected from events. Wait for outstanding critical sections in
194-
* these programs to complete. The rcu critical section only guarantees
195-
* no further reads against netdev_map. It does __not__ ensure pending
196-
* flush operations (if any) are complete.
193+
* disconnected from events. The following synchronize_rcu() guarantees
194+
* both rcu read critical sections complete and waits for
195+
* preempt-disable regions (NAPI being the relevant context here) so we
196+
* are certain there will be no further reads against the netdev_map and
197+
* all flush operations are complete. Flush operations can only be done
198+
* from NAPI context for this reason.
197199
*/
198200

199201
spin_lock(&dev_map_lock);
@@ -263,7 +265,8 @@ struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
263265
struct hlist_head *head = dev_map_index_hash(dtab, key);
264266
struct bpf_dtab_netdev *dev;
265267

266-
hlist_for_each_entry_rcu(dev, head, index_hlist)
268+
hlist_for_each_entry_rcu(dev, head, index_hlist,
269+
lockdep_is_held(&dtab->index_lock))
267270
if (dev->idx == key)
268271
return dev;
269272

@@ -363,16 +366,17 @@ static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
363366
* from NET_RX_SOFTIRQ. Either way the poll routine must complete before the
364367
* net device can be torn down. On devmap tear down we ensure the flush list
365368
* is empty before completing to ensure all flush operations have completed.
369+
* When drivers update the bpf program they may need to ensure any flush ops
370+
* are also complete. Using synchronize_rcu or call_rcu will suffice for this
371+
* because both wait for napi context to exit.
366372
*/
367373
void __dev_flush(void)
368374
{
369375
struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
370376
struct xdp_dev_bulk_queue *bq, *tmp;
371377

372-
rcu_read_lock();
373378
list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
374379
bq_xmit_all(bq, XDP_XMIT_FLUSH);
375-
rcu_read_unlock();
376380
}
377381

378382
/* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
@@ -502,12 +506,11 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
502506
return -EINVAL;
503507

504508
/* Use call_rcu() here to ensure any rcu critical sections have
505-
* completed, but this does not guarantee a flush has happened
506-
* yet. Because driver side rcu_read_lock/unlock only protects the
507-
* running XDP program. However, for pending flush operations the
508-
* dev and ctx are stored in another per cpu map. And additionally,
509-
* the driver tear down ensures all soft irqs are complete before
510-
* removing the net device in the case of dev_put equals zero.
509+
* completed as well as any flush operations because call_rcu
510+
* will wait for preempt-disable region to complete, NAPI in this
511+
* context. And additionally, the driver tear down ensures all
512+
* soft irqs are complete before removing the net device in the
513+
* case of dev_put equals zero.
511514
*/
512515
old_dev = xchg(&dtab->netdev_map[k], NULL);
513516
if (old_dev)

kernel/bpf/dispatcher.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
113113
noff = 0;
114114
} else {
115115
old = d->image + d->image_off;
116-
noff = d->image_off ^ (PAGE_SIZE / 2);
116+
noff = d->image_off ^ (BPF_IMAGE_SIZE / 2);
117117
}
118118

119119
new = d->num_progs ? d->image + noff : NULL;
@@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
140140

141141
mutex_lock(&d->mutex);
142142
if (!d->image) {
143-
d->image = bpf_jit_alloc_exec_page();
143+
d->image = bpf_image_alloc();
144144
if (!d->image)
145145
goto out;
146146
}

kernel/bpf/inode.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
196196
void *key = map_iter(m)->key;
197197
void *prev_key;
198198

199+
(*pos)++;
199200
if (map_iter(m)->done)
200201
return NULL;
201202

@@ -208,8 +209,6 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos)
208209
map_iter(m)->done = true;
209210
return NULL;
210211
}
211-
212-
++(*pos);
213212
return key;
214213
}
215214

kernel/bpf/trampoline.c

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <linux/bpf.h>
55
#include <linux/filter.h>
66
#include <linux/ftrace.h>
7+
#include <linux/rbtree_latch.h>
78

89
/* dummy _ops. The verifier will operate on target program's ops. */
910
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = {
1617
#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
1718

1819
static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
20+
static struct latch_tree_root image_tree __cacheline_aligned;
1921

20-
/* serializes access to trampoline_table */
22+
/* serializes access to trampoline_table and image_tree */
2123
static DEFINE_MUTEX(trampoline_mutex);
2224

23-
void *bpf_jit_alloc_exec_page(void)
25+
static void *bpf_jit_alloc_exec_page(void)
2426
{
2527
void *image;
2628

@@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void)
3638
return image;
3739
}
3840

41+
static __always_inline bool image_tree_less(struct latch_tree_node *a,
42+
struct latch_tree_node *b)
43+
{
44+
struct bpf_image *ia = container_of(a, struct bpf_image, tnode);
45+
struct bpf_image *ib = container_of(b, struct bpf_image, tnode);
46+
47+
return ia < ib;
48+
}
49+
50+
static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n)
51+
{
52+
void *image = container_of(n, struct bpf_image, tnode);
53+
54+
if (addr < image)
55+
return -1;
56+
if (addr >= image + PAGE_SIZE)
57+
return 1;
58+
59+
return 0;
60+
}
61+
62+
static const struct latch_tree_ops image_tree_ops = {
63+
.less = image_tree_less,
64+
.comp = image_tree_comp,
65+
};
66+
67+
static void *__bpf_image_alloc(bool lock)
68+
{
69+
struct bpf_image *image;
70+
71+
image = bpf_jit_alloc_exec_page();
72+
if (!image)
73+
return NULL;
74+
75+
if (lock)
76+
mutex_lock(&trampoline_mutex);
77+
latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops);
78+
if (lock)
79+
mutex_unlock(&trampoline_mutex);
80+
return image->data;
81+
}
82+
83+
void *bpf_image_alloc(void)
84+
{
85+
return __bpf_image_alloc(true);
86+
}
87+
88+
bool is_bpf_image_address(unsigned long addr)
89+
{
90+
bool ret;
91+
92+
rcu_read_lock();
93+
ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL;
94+
rcu_read_unlock();
95+
96+
return ret;
97+
}
98+
3999
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
40100
{
41101
struct bpf_trampoline *tr;
@@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
56116
goto out;
57117

58118
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
59-
image = bpf_jit_alloc_exec_page();
119+
image = __bpf_image_alloc(false);
60120
if (!image) {
61121
kfree(tr);
62122
tr = NULL;
@@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
131191
}
132192

133193
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
134-
* bytes on x86. Pick a number to fit into PAGE_SIZE / 2
194+
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
135195
*/
136196
#define BPF_MAX_TRAMP_PROGS 40
137197

138198
static int bpf_trampoline_update(struct bpf_trampoline *tr)
139199
{
140-
void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
141-
void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
200+
void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2;
201+
void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2;
142202
struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
143203
int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
144204
int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
@@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
174234
*/
175235
synchronize_rcu_tasks();
176236

177-
err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
237+
err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2,
178238
&tr->func.model, flags,
179239
fentry, fentry_cnt,
180240
fexit, fexit_cnt,
@@ -284,6 +344,8 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
284344

285345
void bpf_trampoline_put(struct bpf_trampoline *tr)
286346
{
347+
struct bpf_image *image;
348+
287349
if (!tr)
288350
return;
289351
mutex_lock(&trampoline_mutex);
@@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
294356
goto out;
295357
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
296358
goto out;
359+
image = container_of(tr->image, struct bpf_image, data);
360+
latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops);
297361
/* wait for tasks to get out of trampoline before freeing it */
298362
synchronize_rcu_tasks();
299-
bpf_jit_free_exec(tr->image);
363+
bpf_jit_free_exec(image);
300364
hlist_del(&tr->hlist);
301365
kfree(tr);
302366
out:

kernel/extable.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr)
131131
* triggers a stack trace, or a WARN() that happens during
132132
* coming back from idle, or cpu on or offlining.
133133
*
134-
* is_module_text_address() as well as the kprobe slots
135-
* and is_bpf_text_address() require RCU to be watching.
134+
* is_module_text_address() as well as the kprobe slots,
135+
* is_bpf_text_address() and is_bpf_image_address require
136+
* RCU to be watching.
136137
*/
137138
no_rcu = !rcu_is_watching();
138139

@@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr)
148149
goto out;
149150
if (is_bpf_text_address(addr))
150151
goto out;
152+
if (is_bpf_image_address(addr))
153+
goto out;
151154
ret = 0;
152155
out:
153156
if (no_rcu)

net/core/flow_dissector.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -834,10 +834,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
834834
struct flow_dissector *flow_dissector,
835835
void *target_container)
836836
{
837+
struct flow_dissector_key_ports *key_ports = NULL;
837838
struct flow_dissector_key_control *key_control;
838839
struct flow_dissector_key_basic *key_basic;
839840
struct flow_dissector_key_addrs *key_addrs;
840-
struct flow_dissector_key_ports *key_ports;
841841
struct flow_dissector_key_tags *key_tags;
842842

843843
key_control = skb_flow_dissector_target(flow_dissector,
@@ -876,10 +876,17 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
876876
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
877877
}
878878

879-
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
879+
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
880880
key_ports = skb_flow_dissector_target(flow_dissector,
881881
FLOW_DISSECTOR_KEY_PORTS,
882882
target_container);
883+
else if (dissector_uses_key(flow_dissector,
884+
FLOW_DISSECTOR_KEY_PORTS_RANGE))
885+
key_ports = skb_flow_dissector_target(flow_dissector,
886+
FLOW_DISSECTOR_KEY_PORTS_RANGE,
887+
target_container);
888+
889+
if (key_ports) {
883890
key_ports->src = flow_keys->sport;
884891
key_ports->dst = flow_keys->dport;
885892
}

0 commit comments

Comments
 (0)