Skip to content

Commit 6855b9b

Browse files
author
Paolo Abeni
committed
Merge branch 'mptcp-pm-prep-work-for-new-ops-and-sysctl-knobs'
Matthieu Baerts says: ==================== mptcp: pm: prep work for new ops and sysctl knobs Here are a few cleanups, preparation work for the new PM ops, and sysctl knobs. - Patch 1: reorg: move generic NL code used by all PMs to pm_netlink.c. - Patch 2: use kmemdup() instead of kmalloc + copy. - Patch 3: small cleanup to use pm var instead of msk->pm. - Patch 4: reorg: id_avail_bitmap is only used by the in-kernel PM. - Patch 5: use struct_group to easily reset a subset of PM data vars. - Patch 6: introduce the minimal skeleton for the new PM ops. - Patch 7: register in-kernel and userspace PM ops. - Patch 8: new net.mptcp.path_manager sysctl knob, deprecating pm_type. - Patch 9: map the new path_manager sysctl knob with pm_type. - Patch 10: map the old pm_type sysctl knob with path_manager. - Patch 11: new net.mptcp.available_path_managers sysctl knob. - Patch 12: new test to validate path_manager and pm_type mapping. Signed-off-by: Matthieu Baerts (NGI0) <[email protected]> ==================== Link: https://patch.msgid.link/20250313-net-next-mptcp-pm-ops-intro-v1-0-f4e4a88efc50@kernel.org Signed-off-by: Paolo Abeni <[email protected]>
2 parents 27b9180 + 9cf0128 commit 6855b9b

File tree

9 files changed

+301
-25
lines changed

9 files changed

+301
-25
lines changed

Documentation/networking/mptcp-sysctl.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ allow_join_initial_addr_port - BOOLEAN
3030

3131
Default: 1
3232

33+
available_path_managers - STRING
34+
Shows the available path managers choices that are registered. More
35+
path managers may be available, but not loaded.
36+
3337
available_schedulers - STRING
3438
Shows the available schedulers choices that are registered. More packet
3539
schedulers may be available, but not loaded.
@@ -72,6 +76,23 @@ enabled - BOOLEAN
7276

7377
Default: 1 (enabled)
7478

79+
path_manager - STRING
80+
Set the default path manager name to use for each new MPTCP
81+
socket. In-kernel path management will control subflow
82+
connections and address advertisements according to
83+
per-namespace values configured over the MPTCP netlink
84+
API. Userspace path management puts per-MPTCP-connection subflow
85+
connection decisions and address advertisements under control of
86+
a privileged userspace program, at the cost of more netlink
87+
traffic to propagate all of the related events and commands.
88+
89+
This is a per-namespace sysctl.
90+
91+
* "kernel" - In-kernel path manager
92+
* "userspace" - Userspace path manager
93+
94+
Default: "kernel"
95+
7596
pm_type - INTEGER
7697
Set the default path manager type to use for each new MPTCP
7798
socket. In-kernel path management will control subflow
@@ -84,6 +105,8 @@ pm_type - INTEGER
84105

85106
This is a per-namespace sysctl.
86107

108+
Deprecated since v6.15, use path_manager instead.
109+
87110
* 0 - In-kernel path manager
88111
* 1 - Userspace path manager
89112

include/net/mptcp.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
struct mptcp_info;
1616
struct mptcp_sock;
17+
struct mptcp_pm_addr_entry;
1718
struct seq_file;
1819

1920
/* MPTCP sk_buff extension data */
@@ -121,6 +122,19 @@ struct mptcp_sched_ops {
121122
void (*release)(struct mptcp_sock *msk);
122123
} ____cacheline_aligned_in_smp;
123124

125+
#define MPTCP_PM_NAME_MAX 16
126+
#define MPTCP_PM_MAX 128
127+
#define MPTCP_PM_BUF_MAX (MPTCP_PM_NAME_MAX * MPTCP_PM_MAX)
128+
129+
struct mptcp_pm_ops {
130+
char name[MPTCP_PM_NAME_MAX];
131+
struct module *owner;
132+
struct list_head list;
133+
134+
void (*init)(struct mptcp_sock *msk);
135+
void (*release)(struct mptcp_sock *msk);
136+
} ____cacheline_aligned_in_smp;
137+
124138
#ifdef CONFIG_MPTCP
125139
void mptcp_init(void);
126140

net/mptcp/ctrl.c

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ struct mptcp_pernet {
3939
u8 allow_join_initial_addr_port;
4040
u8 pm_type;
4141
char scheduler[MPTCP_SCHED_NAME_MAX];
42+
char path_manager[MPTCP_PM_NAME_MAX];
4243
};
4344

4445
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -83,6 +84,11 @@ int mptcp_get_pm_type(const struct net *net)
8384
return mptcp_get_pernet(net)->pm_type;
8485
}
8586

87+
const char *mptcp_get_path_manager(const struct net *net)
88+
{
89+
return mptcp_get_pernet(net)->path_manager;
90+
}
91+
8692
const char *mptcp_get_scheduler(const struct net *net)
8793
{
8894
return mptcp_get_pernet(net)->scheduler;
@@ -101,6 +107,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
101107
pernet->stale_loss_cnt = 4;
102108
pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
103109
strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler));
110+
strscpy(pernet->path_manager, "kernel", sizeof(pernet->path_manager));
104111
}
105112

106113
#ifdef CONFIG_SYSCTL
@@ -174,6 +181,96 @@ static int proc_blackhole_detect_timeout(const struct ctl_table *table,
174181
return ret;
175182
}
176183

184+
static int mptcp_set_path_manager(char *path_manager, const char *name)
185+
{
186+
struct mptcp_pm_ops *pm_ops;
187+
int ret = 0;
188+
189+
rcu_read_lock();
190+
pm_ops = mptcp_pm_find(name);
191+
if (pm_ops)
192+
strscpy(path_manager, name, MPTCP_PM_NAME_MAX);
193+
else
194+
ret = -ENOENT;
195+
rcu_read_unlock();
196+
197+
return ret;
198+
}
199+
200+
static int proc_path_manager(const struct ctl_table *ctl, int write,
201+
void *buffer, size_t *lenp, loff_t *ppos)
202+
{
203+
struct mptcp_pernet *pernet = container_of(ctl->data,
204+
struct mptcp_pernet,
205+
path_manager);
206+
char (*path_manager)[MPTCP_PM_NAME_MAX] = ctl->data;
207+
char pm_name[MPTCP_PM_NAME_MAX];
208+
const struct ctl_table tbl = {
209+
.data = pm_name,
210+
.maxlen = MPTCP_PM_NAME_MAX,
211+
};
212+
int ret;
213+
214+
strscpy(pm_name, *path_manager, MPTCP_PM_NAME_MAX);
215+
216+
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
217+
if (write && ret == 0) {
218+
ret = mptcp_set_path_manager(*path_manager, pm_name);
219+
if (ret == 0) {
220+
u8 pm_type = __MPTCP_PM_TYPE_NR;
221+
222+
if (strncmp(pm_name, "kernel", MPTCP_PM_NAME_MAX) == 0)
223+
pm_type = MPTCP_PM_TYPE_KERNEL;
224+
else if (strncmp(pm_name, "userspace", MPTCP_PM_NAME_MAX) == 0)
225+
pm_type = MPTCP_PM_TYPE_USERSPACE;
226+
pernet->pm_type = pm_type;
227+
}
228+
}
229+
230+
return ret;
231+
}
232+
233+
static int proc_pm_type(const struct ctl_table *ctl, int write,
234+
void *buffer, size_t *lenp, loff_t *ppos)
235+
{
236+
struct mptcp_pernet *pernet = container_of(ctl->data,
237+
struct mptcp_pernet,
238+
pm_type);
239+
int ret;
240+
241+
ret = proc_dou8vec_minmax(ctl, write, buffer, lenp, ppos);
242+
if (write && ret == 0) {
243+
u8 pm_type = READ_ONCE(*(u8 *)ctl->data);
244+
char *pm_name = "";
245+
246+
if (pm_type == MPTCP_PM_TYPE_KERNEL)
247+
pm_name = "kernel";
248+
else if (pm_type == MPTCP_PM_TYPE_USERSPACE)
249+
pm_name = "userspace";
250+
mptcp_set_path_manager(pernet->path_manager, pm_name);
251+
}
252+
253+
return ret;
254+
}
255+
256+
static int proc_available_path_managers(const struct ctl_table *ctl,
257+
int write, void *buffer,
258+
size_t *lenp, loff_t *ppos)
259+
{
260+
struct ctl_table tbl = { .maxlen = MPTCP_PM_BUF_MAX, };
261+
int ret;
262+
263+
tbl.data = kmalloc(tbl.maxlen, GFP_USER);
264+
if (!tbl.data)
265+
return -ENOMEM;
266+
267+
mptcp_pm_get_available(tbl.data, MPTCP_PM_BUF_MAX);
268+
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
269+
kfree(tbl.data);
270+
271+
return ret;
272+
}
273+
177274
static struct ctl_table mptcp_sysctl_table[] = {
178275
{
179276
.procname = "enabled",
@@ -218,7 +315,7 @@ static struct ctl_table mptcp_sysctl_table[] = {
218315
.procname = "pm_type",
219316
.maxlen = sizeof(u8),
220317
.mode = 0644,
221-
.proc_handler = proc_dou8vec_minmax,
318+
.proc_handler = proc_pm_type,
222319
.extra1 = SYSCTL_ZERO,
223320
.extra2 = &mptcp_pm_type_max
224321
},
@@ -253,6 +350,18 @@ static struct ctl_table mptcp_sysctl_table[] = {
253350
.mode = 0644,
254351
.proc_handler = proc_dou8vec_minmax,
255352
},
353+
{
354+
.procname = "path_manager",
355+
.maxlen = MPTCP_PM_NAME_MAX,
356+
.mode = 0644,
357+
.proc_handler = proc_path_manager,
358+
},
359+
{
360+
.procname = "available_path_managers",
361+
.maxlen = MPTCP_PM_BUF_MAX,
362+
.mode = 0444,
363+
.proc_handler = proc_available_path_managers,
364+
},
256365
};
257366

258367
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -278,6 +387,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
278387
table[8].data = &pernet->close_timeout;
279388
table[9].data = &pernet->blackhole_timeout;
280389
table[10].data = &pernet->syn_retrans_before_tcp_fallback;
390+
table[11].data = &pernet->path_manager;
391+
/* table[12] is for available_path_managers which is read-only info */
281392

282393
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
283394
ARRAY_SIZE(mptcp_sysctl_table));

net/mptcp/pm.c

Lines changed: 81 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
*/
66
#define pr_fmt(fmt) "MPTCP: " fmt
77

8+
#include <linux/rculist.h>
9+
#include <linux/spinlock.h>
810
#include "protocol.h"
911
#include "mib.h"
1012

@@ -18,6 +20,9 @@ struct mptcp_pm_add_entry {
1820
struct mptcp_sock *sock;
1921
};
2022

23+
static DEFINE_SPINLOCK(mptcp_pm_list_lock);
24+
static LIST_HEAD(mptcp_pm_list);
25+
2126
/* path manager helpers */
2227

2328
/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses,
@@ -511,13 +516,13 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk)
511516
* be sure to serve this event only once.
512517
*/
513518
if (READ_ONCE(pm->work_pending) &&
514-
!(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
519+
!(pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
515520
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
516521

517-
if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
522+
if ((pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
518523
announce = true;
519524

520-
msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
525+
pm->status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
521526
spin_unlock_bh(&pm->lock);
522527

523528
if (announce)
@@ -978,10 +983,7 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk)
978983
u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
979984
struct mptcp_pm_data *pm = &msk->pm;
980985

981-
pm->add_addr_signaled = 0;
982-
pm->add_addr_accepted = 0;
983-
pm->local_addr_used = 0;
984-
pm->subflows = 0;
986+
memset(&pm->reset, 0, sizeof(pm->reset));
985987
pm->rm_list_tx.nr = 0;
986988
pm->rm_list_rx.nr = 0;
987989
WRITE_ONCE(pm->pm_type, pm_type);
@@ -1000,16 +1002,9 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk)
10001002
!!mptcp_pm_get_add_addr_accept_max(msk) &&
10011003
subflows_allowed);
10021004
WRITE_ONCE(pm->accept_subflow, subflows_allowed);
1003-
} else {
1004-
WRITE_ONCE(pm->work_pending, 0);
1005-
WRITE_ONCE(pm->accept_addr, 0);
1006-
WRITE_ONCE(pm->accept_subflow, 0);
1007-
}
10081005

1009-
WRITE_ONCE(pm->addr_signal, 0);
1010-
WRITE_ONCE(pm->remote_deny_join_id0, false);
1011-
pm->status = 0;
1012-
bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
1006+
bitmap_fill(pm->id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
1007+
}
10131008
}
10141009

10151010
void mptcp_pm_data_init(struct mptcp_sock *msk)
@@ -1022,5 +1017,75 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
10221017

10231018
void __init mptcp_pm_init(void)
10241019
{
1020+
mptcp_pm_kernel_register();
1021+
mptcp_pm_userspace_register();
10251022
mptcp_pm_nl_init();
10261023
}
1024+
1025+
/* Must be called with rcu read lock held */
1026+
struct mptcp_pm_ops *mptcp_pm_find(const char *name)
1027+
{
1028+
struct mptcp_pm_ops *pm_ops;
1029+
1030+
list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) {
1031+
if (!strcmp(pm_ops->name, name))
1032+
return pm_ops;
1033+
}
1034+
1035+
return NULL;
1036+
}
1037+
1038+
int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops)
1039+
{
1040+
return 0;
1041+
}
1042+
1043+
int mptcp_pm_register(struct mptcp_pm_ops *pm_ops)
1044+
{
1045+
int ret;
1046+
1047+
ret = mptcp_pm_validate(pm_ops);
1048+
if (ret)
1049+
return ret;
1050+
1051+
spin_lock(&mptcp_pm_list_lock);
1052+
if (mptcp_pm_find(pm_ops->name)) {
1053+
spin_unlock(&mptcp_pm_list_lock);
1054+
return -EEXIST;
1055+
}
1056+
list_add_tail_rcu(&pm_ops->list, &mptcp_pm_list);
1057+
spin_unlock(&mptcp_pm_list_lock);
1058+
1059+
pr_debug("%s registered\n", pm_ops->name);
1060+
return 0;
1061+
}
1062+
1063+
void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops)
1064+
{
1065+
/* skip unregistering the default path manager */
1066+
if (WARN_ON_ONCE(pm_ops == &mptcp_pm_kernel))
1067+
return;
1068+
1069+
spin_lock(&mptcp_pm_list_lock);
1070+
list_del_rcu(&pm_ops->list);
1071+
spin_unlock(&mptcp_pm_list_lock);
1072+
}
1073+
1074+
/* Build string with list of available path manager values.
1075+
* Similar to tcp_get_available_congestion_control()
1076+
*/
1077+
void mptcp_pm_get_available(char *buf, size_t maxlen)
1078+
{
1079+
struct mptcp_pm_ops *pm_ops;
1080+
size_t offs = 0;
1081+
1082+
rcu_read_lock();
1083+
list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) {
1084+
offs += snprintf(buf + offs, maxlen - offs, "%s%s",
1085+
offs == 0 ? "" : " ", pm_ops->name);
1086+
1087+
if (WARN_ON_ONCE(offs >= maxlen))
1088+
break;
1089+
}
1090+
rcu_read_unlock();
1091+
}

0 commit comments

Comments
 (0)