Skip to content

Commit 1209339

Browse files
Yonghong Songanakryiko
authored andcommitted
bpf: Implement mprog API on top of existing cgroup progs
Current cgroup prog ordering is appending at attachment time. This is not ideal. In some cases, users want specific ordering at a particular cgroup level. To address this, the existing mprog API seems an ideal solution with supporting BPF_F_BEFORE and BPF_F_AFTER flags. But there are a few obstacles to directly use kernel mprog interface. Currently cgroup bpf progs already support prog attach/detach/replace and link-based attach/detach/replace. For example, in struct bpf_prog_array_item, the cgroup_storage field needs to be together with bpf prog. But the mprog API struct bpf_mprog_fp only has bpf_prog as the member, which makes it difficult to use kernel mprog interface. In another case, the current cgroup prog detach tries to use the same flag as in attach. This is different from mprog kernel interface which uses flags passed from user space. So to avoid modifying existing behavior, I made the following changes to support mprog API for cgroup progs: - The support is for prog list at cgroup level. Cross-level prog list (a.k.a. effective prog list) is not supported. - Previously, BPF_F_PREORDER is supported only for prog attach, now BPF_F_PREORDER is also supported by link-based attach. - For attach, BPF_F_BEFORE/BPF_F_AFTER/BPF_F_ID/BPF_F_LINK is supported similar to kernel mprog but with different implementation. - For detach and replace, use the existing implementation. - For attach, detach and replace, the revision for a particular prog list, associated with a particular attach type, will be updated by increasing count by 1. Signed-off-by: Yonghong Song <[email protected]> Signed-off-by: Andrii Nakryiko <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 9b8367b commit 1209339

File tree

4 files changed

+205
-37
lines changed

4 files changed

+205
-37
lines changed

include/uapi/linux/bpf.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1794,6 +1794,13 @@ union bpf_attr {
17941794
};
17951795
__u64 expected_revision;
17961796
} netkit;
1797+
struct {
1798+
union {
1799+
__u32 relative_fd;
1800+
__u32 relative_id;
1801+
};
1802+
__u64 expected_revision;
1803+
} cgroup;
17971804
};
17981805
} link_create;
17991806

kernel/bpf/cgroup.c

Lines changed: 160 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,116 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
658658
return NULL;
659659
}
660660

661+
static struct bpf_link *bpf_get_anchor_link(u32 flags, u32 id_or_fd)
662+
{
663+
struct bpf_link *link = ERR_PTR(-EINVAL);
664+
665+
if (flags & BPF_F_ID)
666+
link = bpf_link_by_id(id_or_fd);
667+
else if (id_or_fd)
668+
link = bpf_link_get_from_fd(id_or_fd);
669+
return link;
670+
}
671+
672+
static struct bpf_prog *bpf_get_anchor_prog(u32 flags, u32 id_or_fd)
673+
{
674+
struct bpf_prog *prog = ERR_PTR(-EINVAL);
675+
676+
if (flags & BPF_F_ID)
677+
prog = bpf_prog_by_id(id_or_fd);
678+
else if (id_or_fd)
679+
prog = bpf_prog_get(id_or_fd);
680+
return prog;
681+
}
682+
683+
static struct bpf_prog_list *get_prog_list(struct hlist_head *progs, struct bpf_prog *prog,
684+
struct bpf_cgroup_link *link, u32 flags, u32 id_or_fd)
685+
{
686+
bool is_link = flags & BPF_F_LINK, is_id = flags & BPF_F_ID;
687+
struct bpf_prog_list *pltmp, *pl = ERR_PTR(-EINVAL);
688+
bool preorder = flags & BPF_F_PREORDER;
689+
struct bpf_link *anchor_link = NULL;
690+
struct bpf_prog *anchor_prog = NULL;
691+
bool is_before, is_after;
692+
693+
is_before = flags & BPF_F_BEFORE;
694+
is_after = flags & BPF_F_AFTER;
695+
if (is_link || is_id || id_or_fd) {
696+
/* flags must have either BPF_F_BEFORE or BPF_F_AFTER */
697+
if (is_before == is_after)
698+
return ERR_PTR(-EINVAL);
699+
if ((is_link && !link) || (!is_link && !prog))
700+
return ERR_PTR(-EINVAL);
701+
} else if (!hlist_empty(progs)) {
702+
/* flags cannot have both BPF_F_BEFORE and BPF_F_AFTER */
703+
if (is_before && is_after)
704+
return ERR_PTR(-EINVAL);
705+
}
706+
707+
if (is_link) {
708+
anchor_link = bpf_get_anchor_link(flags, id_or_fd);
709+
if (IS_ERR(anchor_link))
710+
return ERR_PTR(PTR_ERR(anchor_link));
711+
} else if (is_id || id_or_fd) {
712+
anchor_prog = bpf_get_anchor_prog(flags, id_or_fd);
713+
if (IS_ERR(anchor_prog))
714+
return ERR_PTR(PTR_ERR(anchor_prog));
715+
}
716+
717+
if (!anchor_prog && !anchor_link) {
718+
/* if there is no anchor_prog/anchor_link, then BPF_F_PREORDER
719+
* doesn't matter since either prepend or append to a combined
720+
* list of progs will end up with correct result.
721+
*/
722+
hlist_for_each_entry(pltmp, progs, node) {
723+
if (is_before)
724+
return pltmp;
725+
if (pltmp->node.next)
726+
continue;
727+
return pltmp;
728+
}
729+
return NULL;
730+
}
731+
732+
hlist_for_each_entry(pltmp, progs, node) {
733+
if ((anchor_prog && anchor_prog == pltmp->prog) ||
734+
(anchor_link && anchor_link == &pltmp->link->link)) {
735+
if (!!(pltmp->flags & BPF_F_PREORDER) != preorder)
736+
goto out;
737+
pl = pltmp;
738+
goto out;
739+
}
740+
}
741+
742+
pl = ERR_PTR(-ENOENT);
743+
out:
744+
if (anchor_link)
745+
bpf_link_put(anchor_link);
746+
else
747+
bpf_prog_put(anchor_prog);
748+
return pl;
749+
}
750+
751+
static int insert_pl_to_hlist(struct bpf_prog_list *pl, struct hlist_head *progs,
752+
struct bpf_prog *prog, struct bpf_cgroup_link *link,
753+
u32 flags, u32 id_or_fd)
754+
{
755+
struct bpf_prog_list *pltmp;
756+
757+
pltmp = get_prog_list(progs, prog, link, flags, id_or_fd);
758+
if (IS_ERR(pltmp))
759+
return PTR_ERR(pltmp);
760+
761+
if (!pltmp)
762+
hlist_add_head(&pl->node, progs);
763+
else if (flags & BPF_F_BEFORE)
764+
hlist_add_before(&pl->node, &pltmp->node);
765+
else
766+
hlist_add_behind(&pl->node, &pltmp->node);
767+
768+
return 0;
769+
}
770+
661771
/**
662772
* __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
663773
* propagate the change to descendants
@@ -667,14 +777,17 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
667777
* @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
668778
* @type: Type of attach operation
669779
* @flags: Option flags
780+
* @id_or_fd: Relative prog id or fd
781+
* @revision: bpf_prog_list revision
670782
*
671783
* Exactly one of @prog or @link can be non-null.
672784
* Must be called with cgroup_mutex held.
673785
*/
674786
static int __cgroup_bpf_attach(struct cgroup *cgrp,
675787
struct bpf_prog *prog, struct bpf_prog *replace_prog,
676788
struct bpf_cgroup_link *link,
677-
enum bpf_attach_type type, u32 flags)
789+
enum bpf_attach_type type, u32 flags, u32 id_or_fd,
790+
u64 revision)
678791
{
679792
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
680793
struct bpf_prog *old_prog = NULL;
@@ -690,6 +803,9 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
690803
((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
691804
/* invalid combination */
692805
return -EINVAL;
806+
if ((flags & BPF_F_REPLACE) && (flags & (BPF_F_BEFORE | BPF_F_AFTER)))
807+
/* only either replace or insertion with before/after */
808+
return -EINVAL;
693809
if (link && (prog || replace_prog))
694810
/* only either link or prog/replace_prog can be specified */
695811
return -EINVAL;
@@ -700,6 +816,8 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
700816
atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
701817
if (atype < 0)
702818
return -EINVAL;
819+
if (revision && revision != cgrp->bpf.revisions[atype])
820+
return -ESTALE;
703821

704822
progs = &cgrp->bpf.progs[atype];
705823

@@ -728,22 +846,18 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
728846
if (pl) {
729847
old_prog = pl->prog;
730848
} else {
731-
struct hlist_node *last = NULL;
732-
733849
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
734850
if (!pl) {
735851
bpf_cgroup_storages_free(new_storage);
736852
return -ENOMEM;
737853
}
738-
if (hlist_empty(progs))
739-
hlist_add_head(&pl->node, progs);
740-
else
741-
hlist_for_each(last, progs) {
742-
if (last->next)
743-
continue;
744-
hlist_add_behind(&pl->node, last);
745-
break;
746-
}
854+
855+
err = insert_pl_to_hlist(pl, progs, prog, link, flags, id_or_fd);
856+
if (err) {
857+
kfree(pl);
858+
bpf_cgroup_storages_free(new_storage);
859+
return err;
860+
}
747861
}
748862

749863
pl->prog = prog;
@@ -762,6 +876,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
762876
if (err)
763877
goto cleanup_trampoline;
764878

879+
cgrp->bpf.revisions[atype] += 1;
765880
if (old_prog) {
766881
if (type == BPF_LSM_CGROUP)
767882
bpf_trampoline_unlink_cgroup_shim(old_prog);
@@ -793,12 +908,13 @@ static int cgroup_bpf_attach(struct cgroup *cgrp,
793908
struct bpf_prog *prog, struct bpf_prog *replace_prog,
794909
struct bpf_cgroup_link *link,
795910
enum bpf_attach_type type,
796-
u32 flags)
911+
u32 flags, u32 id_or_fd, u64 revision)
797912
{
798913
int ret;
799914

800915
cgroup_lock();
801-
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
916+
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags,
917+
id_or_fd, revision);
802918
cgroup_unlock();
803919
return ret;
804920
}
@@ -886,6 +1002,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
8861002
if (!found)
8871003
return -ENOENT;
8881004

1005+
cgrp->bpf.revisions[atype] += 1;
8891006
old_prog = xchg(&link->link.prog, new_prog);
8901007
replace_effective_prog(cgrp, atype, link);
8911008
bpf_prog_put(old_prog);
@@ -1011,12 +1128,14 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
10111128
* @prog: A program to detach or NULL
10121129
* @link: A link to detach or NULL
10131130
* @type: Type of detach operation
1131+
* @revision: bpf_prog_list revision
10141132
*
10151133
* At most one of @prog or @link can be non-NULL.
10161134
* Must be called with cgroup_mutex held.
10171135
*/
10181136
static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
1019-
struct bpf_cgroup_link *link, enum bpf_attach_type type)
1137+
struct bpf_cgroup_link *link, enum bpf_attach_type type,
1138+
u64 revision)
10201139
{
10211140
enum cgroup_bpf_attach_type atype;
10221141
struct bpf_prog *old_prog;
@@ -1034,6 +1153,9 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
10341153
if (atype < 0)
10351154
return -EINVAL;
10361155

1156+
if (revision && revision != cgrp->bpf.revisions[atype])
1157+
return -ESTALE;
1158+
10371159
progs = &cgrp->bpf.progs[atype];
10381160
flags = cgrp->bpf.flags[atype];
10391161

@@ -1059,6 +1181,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
10591181

10601182
/* now can actually delete it from this cgroup list */
10611183
hlist_del(&pl->node);
1184+
cgrp->bpf.revisions[atype] += 1;
10621185

10631186
kfree(pl);
10641187
if (hlist_empty(progs))
@@ -1074,12 +1197,12 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
10741197
}
10751198

10761199
static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
1077-
enum bpf_attach_type type)
1200+
enum bpf_attach_type type, u64 revision)
10781201
{
10791202
int ret;
10801203

10811204
cgroup_lock();
1082-
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
1205+
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type, revision);
10831206
cgroup_unlock();
10841207
return ret;
10851208
}
@@ -1097,6 +1220,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
10971220
struct bpf_prog_array *effective;
10981221
int cnt, ret = 0, i;
10991222
int total_cnt = 0;
1223+
u64 revision = 0;
11001224
u32 flags;
11011225

11021226
if (effective_query && prog_attach_flags)
@@ -1134,6 +1258,10 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
11341258
return -EFAULT;
11351259
if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
11361260
return -EFAULT;
1261+
if (!effective_query && from_atype == to_atype)
1262+
revision = cgrp->bpf.revisions[from_atype];
1263+
if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision)))
1264+
return -EFAULT;
11371265
if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
11381266
/* return early if user requested only program count + flags */
11391267
return 0;
@@ -1216,7 +1344,8 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
12161344
}
12171345

12181346
ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
1219-
attr->attach_type, attr->attach_flags);
1347+
attr->attach_type, attr->attach_flags,
1348+
attr->relative_fd, attr->expected_revision);
12201349

12211350
if (replace_prog)
12221351
bpf_prog_put(replace_prog);
@@ -1238,7 +1367,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
12381367
if (IS_ERR(prog))
12391368
prog = NULL;
12401369

1241-
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
1370+
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, attr->expected_revision);
12421371
if (prog)
12431372
bpf_prog_put(prog);
12441373

@@ -1267,7 +1396,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
12671396
}
12681397

12691398
WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
1270-
cg_link->type));
1399+
cg_link->type, 0));
12711400
if (cg_link->type == BPF_LSM_CGROUP)
12721401
bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);
12731402

@@ -1339,14 +1468,21 @@ static const struct bpf_link_ops bpf_cgroup_link_lops = {
13391468
.fill_link_info = bpf_cgroup_link_fill_link_info,
13401469
};
13411470

1471+
#define BPF_F_LINK_ATTACH_MASK \
1472+
(BPF_F_ID | \
1473+
BPF_F_BEFORE | \
1474+
BPF_F_AFTER | \
1475+
BPF_F_PREORDER | \
1476+
BPF_F_LINK)
1477+
13421478
int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
13431479
{
13441480
struct bpf_link_primer link_primer;
13451481
struct bpf_cgroup_link *link;
13461482
struct cgroup *cgrp;
13471483
int err;
13481484

1349-
if (attr->link_create.flags)
1485+
if (attr->link_create.flags & (~BPF_F_LINK_ATTACH_MASK))
13501486
return -EINVAL;
13511487

13521488
cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
@@ -1370,7 +1506,9 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
13701506
}
13711507

13721508
err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
1373-
link->type, BPF_F_ALLOW_MULTI);
1509+
link->type, BPF_F_ALLOW_MULTI | attr->link_create.flags,
1510+
attr->link_create.cgroup.relative_fd,
1511+
attr->link_create.cgroup.expected_revision);
13741512
if (err) {
13751513
bpf_link_cleanup(&link_primer);
13761514
goto out_put_cgroup;

0 commit comments

Comments
 (0)