Skip to content

Commit 906312c

Browse files
author
Alexei Starovoitov
committed
Merge branch 'xdp_devmap'
David Ahern says: ==================== Implementation of Daniel's proposal for allowing DEVMAP entries to be a device index, program fd pair. Programs are run after XDP_REDIRECT and have access to both Rx device and Tx device. v4 - moved struct bpf_devmap_val from uapi to devmap.c, named the union and dropped the prefix from the elements - Jesper - fixed 2 bugs in selftests v3 - renamed struct to bpf_devmap_val - used offsetofend to check for expected map size, modification of Toke's comment - check for explicit value sizes - adjusted switch statement in dev_map_run_prog per Andrii's comment - changed SEC shortcut to xdp_devmap - changed selftests to use skeleton and new map declaration v2 - moved dev_map_ext_val definition to uapi to formalize the API for devmap extensions; add bpf_ prefix to the prog_fd and prog_id entries - changed devmap code to handle struct in a way that it can support future extensions - fixed subject in libbpf patch v1 - fixed prog put on invalid program - Toke - changed write value from id to fd per Toke's comments about capabilities - add test cases ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents b36e62e + d39aec7 commit 906312c

File tree

11 files changed

+328
-18
lines changed

11 files changed

+328
-18
lines changed

include/linux/bpf.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
12501250
struct net_device *dev_rx);
12511251
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
12521252
struct bpf_prog *xdp_prog);
1253+
bool dev_map_can_have_prog(struct bpf_map *map);
12531254

12541255
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
12551256
void __cpu_map_flush(void);
@@ -1363,6 +1364,10 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map
13631364
{
13641365
return NULL;
13651366
}
1367+
static inline bool dev_map_can_have_prog(struct bpf_map *map)
1368+
{
1369+
return false;
1370+
}
13661371

13671372
static inline void __dev_flush(void)
13681373
{

include/net/xdp.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,17 @@ struct xdp_rxq_info {
6161
struct xdp_mem_info mem;
6262
} ____cacheline_aligned; /* perf critical, avoid false-sharing */
6363

64+
struct xdp_txq_info {
65+
struct net_device *dev;
66+
};
67+
6468
struct xdp_buff {
6569
void *data;
6670
void *data_end;
6771
void *data_meta;
6872
void *data_hard_start;
6973
struct xdp_rxq_info *rxq;
74+
struct xdp_txq_info *txq;
7075
u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
7176
};
7277

include/uapi/linux/bpf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ enum bpf_attach_type {
225225
BPF_CGROUP_INET6_GETPEERNAME,
226226
BPF_CGROUP_INET4_GETSOCKNAME,
227227
BPF_CGROUP_INET6_GETSOCKNAME,
228+
BPF_XDP_DEVMAP,
228229
__MAX_BPF_ATTACH_TYPE
229230
};
230231

@@ -3706,6 +3707,8 @@ struct xdp_md {
37063707
/* Below access go through struct xdp_rxq_info */
37073708
__u32 ingress_ifindex; /* rxq->dev->ifindex */
37083709
__u32 rx_queue_index; /* rxq->queue_index */
3710+
3711+
__u32 egress_ifindex; /* txq->dev->ifindex */
37093712
};
37103713

37113714
enum sk_action {

kernel/bpf/devmap.c

Lines changed: 112 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,23 @@ struct xdp_dev_bulk_queue {
6060
unsigned int count;
6161
};
6262

63+
/* DEVMAP values */
64+
struct bpf_devmap_val {
65+
u32 ifindex; /* device index */
66+
union {
67+
int fd; /* prog fd on map write */
68+
u32 id; /* prog id on map read */
69+
} bpf_prog;
70+
};
71+
6372
struct bpf_dtab_netdev {
6473
struct net_device *dev; /* must be first member, due to tracepoint */
6574
struct hlist_node index_hlist;
6675
struct bpf_dtab *dtab;
76+
struct bpf_prog *xdp_prog;
6777
struct rcu_head rcu;
6878
unsigned int idx;
79+
struct bpf_devmap_val val;
6980
};
7081

7182
struct bpf_dtab {
@@ -105,12 +116,18 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
105116

106117
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
107118
{
119+
u32 valsize = attr->value_size;
108120
u64 cost = 0;
109121
int err;
110122

111-
/* check sanity of attributes */
123+
/* check sanity of attributes. 2 value sizes supported:
124+
* 4 bytes: ifindex
125+
* 8 bytes: ifindex + prog fd
126+
*/
112127
if (attr->max_entries == 0 || attr->key_size != 4 ||
113-
attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
128+
(valsize != offsetofend(struct bpf_devmap_val, ifindex) &&
129+
valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) ||
130+
attr->map_flags & ~DEV_CREATE_FLAG_MASK)
114131
return -EINVAL;
115132

116133
/* Lookup returns a pointer straight to dev->ifindex, so make sure the
@@ -217,6 +234,8 @@ static void dev_map_free(struct bpf_map *map)
217234

218235
hlist_for_each_entry_safe(dev, next, head, index_hlist) {
219236
hlist_del_rcu(&dev->index_hlist);
237+
if (dev->xdp_prog)
238+
bpf_prog_put(dev->xdp_prog);
220239
dev_put(dev->dev);
221240
kfree(dev);
222241
}
@@ -231,6 +250,8 @@ static void dev_map_free(struct bpf_map *map)
231250
if (!dev)
232251
continue;
233252

253+
if (dev->xdp_prog)
254+
bpf_prog_put(dev->xdp_prog);
234255
dev_put(dev->dev);
235256
kfree(dev);
236257
}
@@ -317,6 +338,16 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
317338
return -ENOENT;
318339
}
319340

341+
bool dev_map_can_have_prog(struct bpf_map *map)
342+
{
343+
if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
344+
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
345+
map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
346+
return true;
347+
348+
return false;
349+
}
350+
320351
static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
321352
{
322353
struct net_device *dev = bq->dev;
@@ -441,6 +472,33 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
441472
return bq_enqueue(dev, xdpf, dev_rx);
442473
}
443474

475+
static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
476+
struct xdp_buff *xdp,
477+
struct bpf_prog *xdp_prog)
478+
{
479+
struct xdp_txq_info txq = { .dev = dev };
480+
u32 act;
481+
482+
xdp->txq = &txq;
483+
484+
act = bpf_prog_run_xdp(xdp_prog, xdp);
485+
switch (act) {
486+
case XDP_PASS:
487+
return xdp;
488+
case XDP_DROP:
489+
break;
490+
default:
491+
bpf_warn_invalid_xdp_action(act);
492+
fallthrough;
493+
case XDP_ABORTED:
494+
trace_xdp_exception(dev, xdp_prog, act);
495+
break;
496+
}
497+
498+
xdp_return_buff(xdp);
499+
return NULL;
500+
}
501+
444502
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
445503
struct net_device *dev_rx)
446504
{
@@ -452,6 +510,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
452510
{
453511
struct net_device *dev = dst->dev;
454512

513+
if (dst->xdp_prog) {
514+
xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
515+
if (!xdp)
516+
return 0;
517+
}
455518
return __xdp_enqueue(dev, xdp, dev_rx);
456519
}
457520

@@ -472,25 +535,24 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
472535
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
473536
{
474537
struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
475-
struct net_device *dev = obj ? obj->dev : NULL;
476538

477-
return dev ? &dev->ifindex : NULL;
539+
return obj ? &obj->val : NULL;
478540
}
479541

480542
static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
481543
{
482544
struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
483545
*(u32 *)key);
484-
struct net_device *dev = obj ? obj->dev : NULL;
485-
486-
return dev ? &dev->ifindex : NULL;
546+
return obj ? &obj->val : NULL;
487547
}
488548

489549
static void __dev_map_entry_free(struct rcu_head *rcu)
490550
{
491551
struct bpf_dtab_netdev *dev;
492552

493553
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
554+
if (dev->xdp_prog)
555+
bpf_prog_put(dev->xdp_prog);
494556
dev_put(dev->dev);
495557
kfree(dev);
496558
}
@@ -541,34 +603,57 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
541603

542604
static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
543605
struct bpf_dtab *dtab,
544-
u32 ifindex,
606+
struct bpf_devmap_val *val,
545607
unsigned int idx)
546608
{
609+
struct bpf_prog *prog = NULL;
547610
struct bpf_dtab_netdev *dev;
548611

549612
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
550613
dtab->map.numa_node);
551614
if (!dev)
552615
return ERR_PTR(-ENOMEM);
553616

554-
dev->dev = dev_get_by_index(net, ifindex);
555-
if (!dev->dev) {
556-
kfree(dev);
557-
return ERR_PTR(-EINVAL);
617+
dev->dev = dev_get_by_index(net, val->ifindex);
618+
if (!dev->dev)
619+
goto err_out;
620+
621+
if (val->bpf_prog.fd >= 0) {
622+
prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
623+
BPF_PROG_TYPE_XDP, false);
624+
if (IS_ERR(prog))
625+
goto err_put_dev;
626+
if (prog->expected_attach_type != BPF_XDP_DEVMAP)
627+
goto err_put_prog;
558628
}
559629

560630
dev->idx = idx;
561631
dev->dtab = dtab;
632+
if (prog) {
633+
dev->xdp_prog = prog;
634+
dev->val.bpf_prog.id = prog->aux->id;
635+
} else {
636+
dev->xdp_prog = NULL;
637+
dev->val.bpf_prog.id = 0;
638+
}
639+
dev->val.ifindex = val->ifindex;
562640

563641
return dev;
642+
err_put_prog:
643+
bpf_prog_put(prog);
644+
err_put_dev:
645+
dev_put(dev->dev);
646+
err_out:
647+
kfree(dev);
648+
return ERR_PTR(-EINVAL);
564649
}
565650

566651
static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
567652
void *key, void *value, u64 map_flags)
568653
{
569654
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
655+
struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
570656
struct bpf_dtab_netdev *dev, *old_dev;
571-
u32 ifindex = *(u32 *)value;
572657
u32 i = *(u32 *)key;
573658

574659
if (unlikely(map_flags > BPF_EXIST))
@@ -578,10 +663,16 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
578663
if (unlikely(map_flags == BPF_NOEXIST))
579664
return -EEXIST;
580665

581-
if (!ifindex) {
666+
/* already verified value_size <= sizeof val */
667+
memcpy(&val, value, map->value_size);
668+
669+
if (!val.ifindex) {
582670
dev = NULL;
671+
/* can not specify fd if ifindex is 0 */
672+
if (val.bpf_prog.fd != -1)
673+
return -EINVAL;
583674
} else {
584-
dev = __dev_map_alloc_node(net, dtab, ifindex, i);
675+
dev = __dev_map_alloc_node(net, dtab, &val, i);
585676
if (IS_ERR(dev))
586677
return PTR_ERR(dev);
587678
}
@@ -608,13 +699,16 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
608699
void *key, void *value, u64 map_flags)
609700
{
610701
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
702+
struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
611703
struct bpf_dtab_netdev *dev, *old_dev;
612-
u32 ifindex = *(u32 *)value;
613704
u32 idx = *(u32 *)key;
614705
unsigned long flags;
615706
int err = -EEXIST;
616707

617-
if (unlikely(map_flags > BPF_EXIST || !ifindex))
708+
/* already verified value_size <= sizeof val */
709+
memcpy(&val, value, map->value_size);
710+
711+
if (unlikely(map_flags > BPF_EXIST || !val.ifindex))
618712
return -EINVAL;
619713

620714
spin_lock_irqsave(&dtab->index_lock, flags);
@@ -623,7 +717,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
623717
if (old_dev && (map_flags & BPF_NOEXIST))
624718
goto out_err;
625719

626-
dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
720+
dev = __dev_map_alloc_node(net, dtab, &val, idx);
627721
if (IS_ERR(dev)) {
628722
err = PTR_ERR(dev);
629723
goto out_err;

net/core/dev.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5420,6 +5420,18 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
54205420
struct bpf_prog *new = xdp->prog;
54215421
int ret = 0;
54225422

5423+
if (new) {
5424+
u32 i;
5425+
5426+
/* generic XDP does not work with DEVMAPs that can
5427+
* have a bpf_prog installed on an entry
5428+
*/
5429+
for (i = 0; i < new->aux->used_map_cnt; i++) {
5430+
if (dev_map_can_have_prog(new->aux->used_maps[i]))
5431+
return -EINVAL;
5432+
}
5433+
}
5434+
54235435
switch (xdp->command) {
54245436
case XDP_SETUP_PROG:
54255437
rcu_assign_pointer(dev->xdp_prog, new);
@@ -8835,6 +8847,12 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
88358847
return -EINVAL;
88368848
}
88378849

8850+
if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
8851+
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
8852+
bpf_prog_put(prog);
8853+
return -EINVAL;
8854+
}
8855+
88388856
/* prog->aux->id may be 0 for orphaned device-bound progs */
88398857
if (prog->aux->id && prog->aux->id == prog_id) {
88408858
bpf_prog_put(prog);

net/core/filter.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7015,6 +7015,13 @@ static bool xdp_is_valid_access(int off, int size,
70157015
const struct bpf_prog *prog,
70167016
struct bpf_insn_access_aux *info)
70177017
{
7018+
if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
7019+
switch (off) {
7020+
case offsetof(struct xdp_md, egress_ifindex):
7021+
return false;
7022+
}
7023+
}
7024+
70187025
if (type == BPF_WRITE) {
70197026
if (bpf_prog_is_dev_bound(prog->aux)) {
70207027
switch (off) {
@@ -7985,6 +7992,16 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
79857992
offsetof(struct xdp_rxq_info,
79867993
queue_index));
79877994
break;
7995+
case offsetof(struct xdp_md, egress_ifindex):
7996+
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
7997+
si->dst_reg, si->src_reg,
7998+
offsetof(struct xdp_buff, txq));
7999+
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
8000+
si->dst_reg, si->dst_reg,
8001+
offsetof(struct xdp_txq_info, dev));
8002+
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8003+
offsetof(struct net_device, ifindex));
8004+
break;
79888005
}
79898006

79908007
return insn - insn_buf;

0 commit comments

Comments
 (0)