Skip to content

Commit 67f4cfb

Browse files
author
Martin KaFai Lau
committed
Merge branch 'net-smc-introduce-smc_hs_ctrl'
D. Wythe says: ==================== net/smc: Introduce smc_hs_ctrl This patch aims to introduce BPF injection capabilities for SMC and includes a self-test to ensure code stability. Since the SMC protocol isn't ideal for every situation, especially short-lived ones, most applications can't guarantee the absence of such scenarios. Consequently, applications may need specific strategies to decide whether to use SMC. For example, an application might limit SMC usage to certain IP addresses or ports. To maintain the principle of transparent replacement, we want applications to remain unaffected even if they need specific SMC strategies. In other words, they should not require recompilation of their code. Additionally, we need to ensure the scalability of strategy implementation. While using socket options or sysctl might be straightforward, it could complicate future expansions. Fortunately, BPF addresses these concerns effectively. Users can write their own strategies in eBPF to determine whether to use SMC, and they can easily modify those strategies in the future. This is a rework of the series from [1]. Changes since [1] are limited to the SMC parts: 1. Rename smc_ops to smc_hs_ctrl and change interface name. 2. Squash SMC patches, removing standalone non-BPF hook capability. 3. Fix typos [1]: https://lore.kernel.org/bpf/[email protected]/#t v2 -> v1: - Removed the fixes patch, which have already been merged on current branch. - Fixed compilation warning of smc_call_hsbpf() when CONFIG_SMC_HS_CTRL_BPF is not enabled. - Changed the default value of CONFIG_SMC_HS_CTRL_BPF to Y. - Fix typo and renamed some variables v3 -> v2: - Removed the libbpf patch, which have already been merged on current branch. - Fixed sparse warning of smc_call_hsbpf() and xchg(). v4 -> v3: - Rebased on latest bpf-next, updated SMC loopback config from SMC_LO to DIBS_LO per upstream changes. v5 -> v4: - Removed the redundant sk parameter from smc_call_hsbpf - Reject registration when bpf_link is set, link support will be added in the future. - Updated selftests with new test heplers. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Martin KaFai Lau <[email protected]>
2 parents abd0c0f + beb3c67 commit 67f4cfb

File tree

14 files changed

+870
-14
lines changed

14 files changed

+870
-14
lines changed

include/net/netns/smc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ struct netns_smc {
1717
#ifdef CONFIG_SYSCTL
1818
struct ctl_table_header *smc_hdr;
1919
#endif
20+
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
21+
struct smc_hs_ctrl __rcu *hs_ctrl;
22+
#endif /* CONFIG_SMC_HS_CTRL_BPF */
2023
unsigned int sysctl_autocorking_size;
2124
unsigned int sysctl_smcr_buf_type;
2225
int sysctl_smcr_testlink_time;

include/net/smc.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <linux/wait.h>
1818
#include <linux/dibs.h>
1919

20+
struct tcp_sock;
21+
struct inet_request_sock;
2022
struct sock;
2123

2224
#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
@@ -50,4 +52,55 @@ struct smcd_dev {
5052
u8 going_away : 1;
5153
};
5254

55+
#define SMC_HS_CTRL_NAME_MAX 16
56+
57+
enum {
58+
/* ops can be inherit from init_net */
59+
SMC_HS_CTRL_FLAG_INHERITABLE = 0x1,
60+
61+
SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE,
62+
};
63+
64+
struct smc_hs_ctrl {
65+
/* private */
66+
67+
struct list_head list;
68+
struct module *owner;
69+
70+
/* public */
71+
72+
/* unique name */
73+
char name[SMC_HS_CTRL_NAME_MAX];
74+
int flags;
75+
76+
/* Invoked before computing SMC option for SYN packets.
77+
* We can control whether to set SMC options by returning various value.
78+
* Return 0 to disable SMC, or return any other value to enable it.
79+
*/
80+
int (*syn_option)(struct tcp_sock *tp);
81+
82+
/* Invoked before Set up SMC options for SYN-ACK packets
83+
* We can control whether to respond SMC options by returning various
84+
* value. Return 0 to disable SMC, or return any other value to enable
85+
* it.
86+
*/
87+
int (*synack_option)(const struct tcp_sock *tp,
88+
struct inet_request_sock *ireq);
89+
};
90+
91+
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
92+
#define smc_call_hsbpf(init_val, tp, func, ...) ({ \
93+
typeof(init_val) __ret = (init_val); \
94+
struct smc_hs_ctrl *ctrl; \
95+
rcu_read_lock(); \
96+
ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \
97+
if (ctrl && ctrl->func) \
98+
__ret = ctrl->func(tp, ##__VA_ARGS__); \
99+
rcu_read_unlock(); \
100+
__ret; \
101+
})
102+
#else
103+
#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); })
104+
#endif /* CONFIG_SMC_HS_CTRL_BPF */
105+
53106
#endif /* _SMC_H */

kernel/bpf/bpf_struct_ops.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,7 @@ bool bpf_struct_ops_get(const void *kdata)
11621162
map = __bpf_map_inc_not_zero(&st_map->map, false);
11631163
return !IS_ERR(map);
11641164
}
1165+
EXPORT_SYMBOL_GPL(bpf_struct_ops_get);
11651166

11661167
void bpf_struct_ops_put(const void *kdata)
11671168
{
@@ -1173,6 +1174,7 @@ void bpf_struct_ops_put(const void *kdata)
11731174

11741175
bpf_map_put(&st_map->map);
11751176
}
1177+
EXPORT_SYMBOL_GPL(bpf_struct_ops_put);
11761178

11771179
u32 bpf_struct_ops_id(const void *kdata)
11781180
{

kernel/bpf/syscall.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
12341234

12351235
return src - orig_src;
12361236
}
1237+
EXPORT_SYMBOL_GPL(bpf_obj_name_cpy);
12371238

12381239
int map_check_no_btf(const struct bpf_map *map,
12391240
const struct btf *btf,

net/ipv4/tcp_output.c

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include <net/tcp.h>
4141
#include <net/tcp_ecn.h>
4242
#include <net/mptcp.h>
43+
#include <net/smc.h>
4344
#include <net/proto_memory.h>
4445
#include <net/psp.h>
4546

@@ -802,34 +803,36 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
802803
mptcp_options_write(th, ptr, tp, opts);
803804
}
804805

805-
static void smc_set_option(const struct tcp_sock *tp,
806+
static void smc_set_option(struct tcp_sock *tp,
806807
struct tcp_out_options *opts,
807808
unsigned int *remaining)
808809
{
809810
#if IS_ENABLED(CONFIG_SMC)
810-
if (static_branch_unlikely(&tcp_have_smc)) {
811-
if (tp->syn_smc) {
812-
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
813-
opts->options |= OPTION_SMC;
814-
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
815-
}
811+
if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc) {
812+
tp->syn_smc = !!smc_call_hsbpf(1, tp, syn_option);
813+
/* re-check syn_smc */
814+
if (tp->syn_smc &&
815+
*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
816+
opts->options |= OPTION_SMC;
817+
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
816818
}
817819
}
818820
#endif
819821
}
820822

821823
static void smc_set_option_cond(const struct tcp_sock *tp,
822-
const struct inet_request_sock *ireq,
824+
struct inet_request_sock *ireq,
823825
struct tcp_out_options *opts,
824826
unsigned int *remaining)
825827
{
826828
#if IS_ENABLED(CONFIG_SMC)
827-
if (static_branch_unlikely(&tcp_have_smc)) {
828-
if (tp->syn_smc && ireq->smc_ok) {
829-
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
830-
opts->options |= OPTION_SMC;
831-
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
832-
}
829+
if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc && ireq->smc_ok) {
830+
ireq->smc_ok = !!smc_call_hsbpf(1, tp, synack_option, ireq);
831+
/* re-check smc_ok */
832+
if (ireq->smc_ok &&
833+
*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
834+
opts->options |= OPTION_SMC;
835+
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
833836
}
834837
}
835838
#endif

net/smc/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,13 @@ config SMC_DIAG
1919
smcss.
2020

2121
if unsure, say Y.
22+
23+
config SMC_HS_CTRL_BPF
24+
bool "Generic eBPF hook for SMC handshake flow"
25+
depends on SMC && BPF_SYSCALL
26+
default y
27+
help
28+
SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
29+
handshake flow, which offer much greater flexibility in modifying the behavior
30+
of the SMC protocol stack compared to a complete kernel-based approach. Select
31+
this option if you want filtring the handshake process via eBPF programs.

net/smc/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
66
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
77
smc-y += smc_tracepoint.o smc_inet.o
88
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
9+
smc-$(CONFIG_SMC_HS_CTRL_BPF) += smc_hs_bpf.o

net/smc/af_smc.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "smc_tracepoint.h"
5959
#include "smc_sysctl.h"
6060
#include "smc_inet.h"
61+
#include "smc_hs_bpf.h"
6162

6263
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
6364
* creation on server
@@ -3600,8 +3601,16 @@ static int __init smc_init(void)
36003601
pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
36013602
goto out_ulp;
36023603
}
3604+
rc = bpf_smc_hs_ctrl_init();
3605+
if (rc) {
3606+
pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__,
3607+
rc);
3608+
goto out_inet;
3609+
}
36033610
static_branch_enable(&tcp_have_smc);
36043611
return 0;
3612+
out_inet:
3613+
smc_inet_exit();
36053614
out_ulp:
36063615
tcp_unregister_ulp(&smc_ulp_ops);
36073616
out_ib:

net/smc/smc_hs_bpf.c

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Shared Memory Communications over RDMA (SMC-R) and RoCE
4+
*
5+
* Generic hook for SMC handshake flow.
6+
*
7+
* Copyright IBM Corp. 2016
8+
* Copyright (c) 2025, Alibaba Inc.
9+
*
10+
* Author: D. Wythe <[email protected]>
11+
*/
12+
13+
#include <linux/bpf_verifier.h>
14+
#include <linux/bpf.h>
15+
#include <linux/btf.h>
16+
#include <linux/rculist.h>
17+
18+
#include "smc_hs_bpf.h"
19+
20+
static DEFINE_SPINLOCK(smc_hs_ctrl_list_lock);
21+
static LIST_HEAD(smc_hs_ctrl_list);
22+
23+
static int smc_hs_ctrl_reg(struct smc_hs_ctrl *ctrl)
24+
{
25+
int ret = 0;
26+
27+
spin_lock(&smc_hs_ctrl_list_lock);
28+
/* already exist or duplicate name */
29+
if (smc_hs_ctrl_find_by_name(ctrl->name))
30+
ret = -EEXIST;
31+
else
32+
list_add_tail_rcu(&ctrl->list, &smc_hs_ctrl_list);
33+
spin_unlock(&smc_hs_ctrl_list_lock);
34+
return ret;
35+
}
36+
37+
static void smc_hs_ctrl_unreg(struct smc_hs_ctrl *ctrl)
38+
{
39+
spin_lock(&smc_hs_ctrl_list_lock);
40+
list_del_rcu(&ctrl->list);
41+
spin_unlock(&smc_hs_ctrl_list_lock);
42+
43+
/* Ensure that all readers to complete */
44+
synchronize_rcu();
45+
}
46+
47+
struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name)
48+
{
49+
struct smc_hs_ctrl *ctrl;
50+
51+
list_for_each_entry_rcu(ctrl, &smc_hs_ctrl_list, list) {
52+
if (strcmp(ctrl->name, name) == 0)
53+
return ctrl;
54+
}
55+
return NULL;
56+
}
57+
58+
static int __smc_bpf_stub_set_tcp_option(struct tcp_sock *tp) { return 1; }
59+
static int __smc_bpf_stub_set_tcp_option_cond(const struct tcp_sock *tp,
60+
struct inet_request_sock *ireq)
61+
{
62+
return 1;
63+
}
64+
65+
static struct smc_hs_ctrl __smc_bpf_hs_ctrl = {
66+
.syn_option = __smc_bpf_stub_set_tcp_option,
67+
.synack_option = __smc_bpf_stub_set_tcp_option_cond,
68+
};
69+
70+
static int smc_bpf_hs_ctrl_init(struct btf *btf) { return 0; }
71+
72+
static int smc_bpf_hs_ctrl_reg(void *kdata, struct bpf_link *link)
73+
{
74+
if (link)
75+
return -EOPNOTSUPP;
76+
77+
return smc_hs_ctrl_reg(kdata);
78+
}
79+
80+
static void smc_bpf_hs_ctrl_unreg(void *kdata, struct bpf_link *link)
81+
{
82+
smc_hs_ctrl_unreg(kdata);
83+
}
84+
85+
static int smc_bpf_hs_ctrl_init_member(const struct btf_type *t,
86+
const struct btf_member *member,
87+
void *kdata, const void *udata)
88+
{
89+
const struct smc_hs_ctrl *u_ctrl;
90+
struct smc_hs_ctrl *k_ctrl;
91+
u32 moff;
92+
93+
u_ctrl = (const struct smc_hs_ctrl *)udata;
94+
k_ctrl = (struct smc_hs_ctrl *)kdata;
95+
96+
moff = __btf_member_bit_offset(t, member) / 8;
97+
switch (moff) {
98+
case offsetof(struct smc_hs_ctrl, name):
99+
if (bpf_obj_name_cpy(k_ctrl->name, u_ctrl->name,
100+
sizeof(u_ctrl->name)) <= 0)
101+
return -EINVAL;
102+
return 1;
103+
case offsetof(struct smc_hs_ctrl, flags):
104+
if (u_ctrl->flags & ~SMC_HS_CTRL_ALL_FLAGS)
105+
return -EINVAL;
106+
k_ctrl->flags = u_ctrl->flags;
107+
return 1;
108+
default:
109+
break;
110+
}
111+
112+
return 0;
113+
}
114+
115+
static const struct bpf_func_proto *
116+
bpf_smc_hs_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
117+
{
118+
return bpf_base_func_proto(func_id, prog);
119+
}
120+
121+
static const struct bpf_verifier_ops smc_bpf_verifier_ops = {
122+
.get_func_proto = bpf_smc_hs_func_proto,
123+
.is_valid_access = bpf_tracing_btf_ctx_access,
124+
};
125+
126+
static struct bpf_struct_ops bpf_smc_hs_ctrl_ops = {
127+
.name = "smc_hs_ctrl",
128+
.init = smc_bpf_hs_ctrl_init,
129+
.reg = smc_bpf_hs_ctrl_reg,
130+
.unreg = smc_bpf_hs_ctrl_unreg,
131+
.cfi_stubs = &__smc_bpf_hs_ctrl,
132+
.verifier_ops = &smc_bpf_verifier_ops,
133+
.init_member = smc_bpf_hs_ctrl_init_member,
134+
.owner = THIS_MODULE,
135+
};
136+
137+
int bpf_smc_hs_ctrl_init(void)
138+
{
139+
return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl);
140+
}

net/smc/smc_hs_bpf.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Shared Memory Communications over RDMA (SMC-R) and RoCE
4+
*
5+
* Generic hook for SMC handshake flow.
6+
*
7+
* Copyright IBM Corp. 2016
8+
* Copyright (c) 2025, Alibaba Inc.
9+
*
10+
* Author: D. Wythe <[email protected]>
11+
*/
12+
13+
#ifndef __SMC_HS_CTRL
14+
#define __SMC_HS_CTRL
15+
16+
#include <net/smc.h>
17+
18+
/* Find hs_ctrl by the target name, which required to be a c-string.
19+
* Return NULL if no such ctrl was found,otherwise, return a valid ctrl.
20+
*
21+
* Note: Caller MUST ensure it's was invoked under rcu_read_lock.
22+
*/
23+
struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name);
24+
25+
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
26+
int bpf_smc_hs_ctrl_init(void);
27+
#else
28+
static inline int bpf_smc_hs_ctrl_init(void) { return 0; }
29+
#endif /* CONFIG_SMC_HS_CTRL_BPF */
30+
31+
#endif /* __SMC_HS_CTRL */

0 commit comments

Comments
 (0)