Skip to content

Commit 7f045a4

Browse files
jsitnickiAlexei Starovoitov
authored andcommitted
bpf: Add link-based BPF program attachment to network namespace
Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent b27f7bb commit 7f045a4

File tree

7 files changed

+267
-2
lines changed

7 files changed

+267
-2
lines changed

include/linux/bpf-netns.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ int netns_bpf_prog_query(const union bpf_attr *attr,
3434
int netns_bpf_prog_attach(const union bpf_attr *attr,
3535
struct bpf_prog *prog);
3636
int netns_bpf_prog_detach(const union bpf_attr *attr);
37+
int netns_bpf_link_create(const union bpf_attr *attr,
38+
struct bpf_prog *prog);
3739
#else
3840
static inline int netns_bpf_prog_query(const union bpf_attr *attr,
3941
union bpf_attr __user *uattr)
@@ -51,6 +53,12 @@ static inline int netns_bpf_prog_detach(const union bpf_attr *attr)
5153
{
5254
return -EOPNOTSUPP;
5355
}
56+
57+
static inline int netns_bpf_link_create(const union bpf_attr *attr,
58+
struct bpf_prog *prog)
59+
{
60+
return -EOPNOTSUPP;
61+
}
5462
#endif
5563

5664
#endif /* _BPF_NETNS_H */

include/linux/bpf_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,6 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
126126
BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
127127
#endif
128128
BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
129+
#ifdef CONFIG_NET
130+
BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
131+
#endif

include/net/netns/bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ struct bpf_prog;
1212

1313
struct netns_bpf {
1414
struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE];
15+
struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE];
1516
};
1617

1718
#endif /* __NETNS_BPF_H__ */

include/uapi/linux/bpf.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ enum bpf_link_type {
237237
BPF_LINK_TYPE_TRACING = 2,
238238
BPF_LINK_TYPE_CGROUP = 3,
239239
BPF_LINK_TYPE_ITER = 4,
240+
BPF_LINK_TYPE_NETNS = 5,
240241

241242
MAX_BPF_LINK_TYPE,
242243
};
@@ -3839,6 +3840,10 @@ struct bpf_link_info {
38393840
__u64 cgroup_id;
38403841
__u32 attach_type;
38413842
} cgroup;
3843+
struct {
3844+
__u32 netns_ino;
3845+
__u32 attach_type;
3846+
} netns;
38423847
};
38433848
} __attribute__((aligned(8)));
38443849

kernel/bpf/net_namespace.c

Lines changed: 242 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,140 @@
88
* Functions to manage BPF programs attached to netns
99
*/
1010

11+
struct bpf_netns_link {
12+
struct bpf_link link;
13+
enum bpf_attach_type type;
14+
enum netns_bpf_attach_type netns_type;
15+
16+
/* We don't hold a ref to net in order to auto-detach the link
17+
* when netns is going away. Instead we rely on pernet
18+
* pre_exit callback to clear this pointer. Must be accessed
19+
* with netns_bpf_mutex held.
20+
*/
21+
struct net *net;
22+
};
23+
1124
/* Protects updates to netns_bpf */
1225
DEFINE_MUTEX(netns_bpf_mutex);
1326

27+
/* Must be called with netns_bpf_mutex held. */
28+
static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link)
29+
{
30+
struct bpf_netns_link *net_link =
31+
container_of(link, struct bpf_netns_link, link);
32+
33+
net_link->net = NULL;
34+
}
35+
36+
static void bpf_netns_link_release(struct bpf_link *link)
37+
{
38+
struct bpf_netns_link *net_link =
39+
container_of(link, struct bpf_netns_link, link);
40+
enum netns_bpf_attach_type type = net_link->netns_type;
41+
struct net *net;
42+
43+
/* Link auto-detached by dying netns. */
44+
if (!net_link->net)
45+
return;
46+
47+
mutex_lock(&netns_bpf_mutex);
48+
49+
/* Recheck after potential sleep. We can race with cleanup_net
50+
* here, but if we see a non-NULL struct net pointer pre_exit
51+
* has not happened yet and will block on netns_bpf_mutex.
52+
*/
53+
net = net_link->net;
54+
if (!net)
55+
goto out_unlock;
56+
57+
net->bpf.links[type] = NULL;
58+
RCU_INIT_POINTER(net->bpf.progs[type], NULL);
59+
60+
out_unlock:
61+
mutex_unlock(&netns_bpf_mutex);
62+
}
63+
64+
static void bpf_netns_link_dealloc(struct bpf_link *link)
65+
{
66+
struct bpf_netns_link *net_link =
67+
container_of(link, struct bpf_netns_link, link);
68+
69+
kfree(net_link);
70+
}
71+
72+
static int bpf_netns_link_update_prog(struct bpf_link *link,
73+
struct bpf_prog *new_prog,
74+
struct bpf_prog *old_prog)
75+
{
76+
struct bpf_netns_link *net_link =
77+
container_of(link, struct bpf_netns_link, link);
78+
enum netns_bpf_attach_type type = net_link->netns_type;
79+
struct net *net;
80+
int ret = 0;
81+
82+
if (old_prog && old_prog != link->prog)
83+
return -EPERM;
84+
if (new_prog->type != link->prog->type)
85+
return -EINVAL;
86+
87+
mutex_lock(&netns_bpf_mutex);
88+
89+
net = net_link->net;
90+
if (!net || !check_net(net)) {
91+
/* Link auto-detached or netns dying */
92+
ret = -ENOLINK;
93+
goto out_unlock;
94+
}
95+
96+
old_prog = xchg(&link->prog, new_prog);
97+
rcu_assign_pointer(net->bpf.progs[type], new_prog);
98+
bpf_prog_put(old_prog);
99+
100+
out_unlock:
101+
mutex_unlock(&netns_bpf_mutex);
102+
return ret;
103+
}
104+
105+
static int bpf_netns_link_fill_info(const struct bpf_link *link,
106+
struct bpf_link_info *info)
107+
{
108+
const struct bpf_netns_link *net_link =
109+
container_of(link, struct bpf_netns_link, link);
110+
unsigned int inum = 0;
111+
struct net *net;
112+
113+
mutex_lock(&netns_bpf_mutex);
114+
net = net_link->net;
115+
if (net && check_net(net))
116+
inum = net->ns.inum;
117+
mutex_unlock(&netns_bpf_mutex);
118+
119+
info->netns.netns_ino = inum;
120+
info->netns.attach_type = net_link->type;
121+
return 0;
122+
}
123+
124+
static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
125+
struct seq_file *seq)
126+
{
127+
struct bpf_link_info info = {};
128+
129+
bpf_netns_link_fill_info(link, &info);
130+
seq_printf(seq,
131+
"netns_ino:\t%u\n"
132+
"attach_type:\t%u\n",
133+
info.netns.netns_ino,
134+
info.netns.attach_type);
135+
}
136+
137+
static const struct bpf_link_ops bpf_netns_link_ops = {
138+
.release = bpf_netns_link_release,
139+
.dealloc = bpf_netns_link_dealloc,
140+
.update_prog = bpf_netns_link_update_prog,
141+
.fill_link_info = bpf_netns_link_fill_info,
142+
.show_fdinfo = bpf_netns_link_show_fdinfo,
143+
};
144+
14145
int netns_bpf_prog_query(const union bpf_attr *attr,
15146
union bpf_attr __user *uattr)
16147
{
@@ -67,6 +198,13 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
67198

68199
net = current->nsproxy->net_ns;
69200
mutex_lock(&netns_bpf_mutex);
201+
202+
/* Attaching prog directly is not compatible with links */
203+
if (net->bpf.links[type]) {
204+
ret = -EEXIST;
205+
goto out_unlock;
206+
}
207+
70208
switch (type) {
71209
case NETNS_BPF_FLOW_DISSECTOR:
72210
ret = flow_dissector_bpf_prog_attach(net, prog);
@@ -75,6 +213,7 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
75213
ret = -EINVAL;
76214
break;
77215
}
216+
out_unlock:
78217
mutex_unlock(&netns_bpf_mutex);
79218

80219
return ret;
@@ -86,6 +225,10 @@ static int __netns_bpf_prog_detach(struct net *net,
86225
{
87226
struct bpf_prog *attached;
88227

228+
/* Progs attached via links cannot be detached */
229+
if (net->bpf.links[type])
230+
return -EINVAL;
231+
89232
attached = rcu_dereference_protected(net->bpf.progs[type],
90233
lockdep_is_held(&netns_bpf_mutex));
91234
if (!attached)
@@ -111,13 +254,110 @@ int netns_bpf_prog_detach(const union bpf_attr *attr)
111254
return ret;
112255
}
113256

257+
static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
258+
enum netns_bpf_attach_type type)
259+
{
260+
struct bpf_prog *prog;
261+
int err;
262+
263+
mutex_lock(&netns_bpf_mutex);
264+
265+
/* Allow attaching only one prog or link for now */
266+
if (net->bpf.links[type]) {
267+
err = -E2BIG;
268+
goto out_unlock;
269+
}
270+
/* Links are not compatible with attaching prog directly */
271+
prog = rcu_dereference_protected(net->bpf.progs[type],
272+
lockdep_is_held(&netns_bpf_mutex));
273+
if (prog) {
274+
err = -EEXIST;
275+
goto out_unlock;
276+
}
277+
278+
switch (type) {
279+
case NETNS_BPF_FLOW_DISSECTOR:
280+
err = flow_dissector_bpf_prog_attach(net, link->prog);
281+
break;
282+
default:
283+
err = -EINVAL;
284+
break;
285+
}
286+
if (err)
287+
goto out_unlock;
288+
289+
net->bpf.links[type] = link;
290+
291+
out_unlock:
292+
mutex_unlock(&netns_bpf_mutex);
293+
return err;
294+
}
295+
296+
int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
297+
{
298+
enum netns_bpf_attach_type netns_type;
299+
struct bpf_link_primer link_primer;
300+
struct bpf_netns_link *net_link;
301+
enum bpf_attach_type type;
302+
struct net *net;
303+
int err;
304+
305+
if (attr->link_create.flags)
306+
return -EINVAL;
307+
308+
type = attr->link_create.attach_type;
309+
netns_type = to_netns_bpf_attach_type(type);
310+
if (netns_type < 0)
311+
return -EINVAL;
312+
313+
net = get_net_ns_by_fd(attr->link_create.target_fd);
314+
if (IS_ERR(net))
315+
return PTR_ERR(net);
316+
317+
net_link = kzalloc(sizeof(*net_link), GFP_USER);
318+
if (!net_link) {
319+
err = -ENOMEM;
320+
goto out_put_net;
321+
}
322+
bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
323+
&bpf_netns_link_ops, prog);
324+
net_link->net = net;
325+
net_link->type = type;
326+
net_link->netns_type = netns_type;
327+
328+
err = bpf_link_prime(&net_link->link, &link_primer);
329+
if (err) {
330+
kfree(net_link);
331+
goto out_put_net;
332+
}
333+
334+
err = netns_bpf_link_attach(net, &net_link->link, netns_type);
335+
if (err) {
336+
bpf_link_cleanup(&link_primer);
337+
goto out_put_net;
338+
}
339+
340+
put_net(net);
341+
return bpf_link_settle(&link_primer);
342+
343+
out_put_net:
344+
put_net(net);
345+
return err;
346+
}
347+
114348
static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
115349
{
116350
enum netns_bpf_attach_type type;
351+
struct bpf_link *link;
117352

118353
mutex_lock(&netns_bpf_mutex);
119-
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
120-
__netns_bpf_prog_detach(net, type);
354+
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
355+
link = net->bpf.links[type];
356+
if (link)
357+
bpf_netns_link_auto_detach(link);
358+
else
359+
__netns_bpf_prog_detach(net, type);
360+
}
121361
mutex_unlock(&netns_bpf_mutex);
122362
}
123363

kernel/bpf/syscall.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3887,6 +3887,9 @@ static int link_create(union bpf_attr *attr)
38873887
case BPF_PROG_TYPE_TRACING:
38883888
ret = tracing_bpf_link_attach(attr, prog);
38893889
break;
3890+
case BPF_PROG_TYPE_FLOW_DISSECTOR:
3891+
ret = netns_bpf_link_create(attr, prog);
3892+
break;
38903893
default:
38913894
ret = -EINVAL;
38923895
}

tools/include/uapi/linux/bpf.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ enum bpf_link_type {
237237
BPF_LINK_TYPE_TRACING = 2,
238238
BPF_LINK_TYPE_CGROUP = 3,
239239
BPF_LINK_TYPE_ITER = 4,
240+
BPF_LINK_TYPE_NETNS = 5,
240241

241242
MAX_BPF_LINK_TYPE,
242243
};
@@ -3839,6 +3840,10 @@ struct bpf_link_info {
38393840
__u64 cgroup_id;
38403841
__u32 attach_type;
38413842
} cgroup;
3843+
struct {
3844+
__u32 netns_ino;
3845+
__u32 attach_type;
3846+
} netns;
38423847
};
38433848
} __attribute__((aligned(8)));
38443849

0 commit comments

Comments
 (0)