Skip to content

Commit 59464c7

Browse files
D-WytheKernel Patches Daemon
authored andcommitted
net/smc: bpf: Introduce generic hook for handshake flow
The introduction of IPPROTO_SMC enables eBPF programs to determine whether to use SMC based on the context of socket creation, such as network namespaces, PID and comm name, etc. As a subsequent enhancement, to introduce a new generic hook that allows decisions on whether to use SMC or not at runtime, including but not limited to local/remote IP address or ports. User can write their own implememtion via bpf_struct_ops now to choose whether to use SMC or not before TCP 3rd handshake to be comleted. Signed-off-by: D. Wythe <[email protected]> Reviewed-by: Dust Li <[email protected]>
1 parent 56e6454 commit 59464c7

File tree

9 files changed

+355
-14
lines changed

9 files changed

+355
-14
lines changed

include/net/netns/smc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ struct netns_smc {
1717
#ifdef CONFIG_SYSCTL
1818
struct ctl_table_header *smc_hdr;
1919
#endif
20+
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
21+
struct smc_hs_ctrl __rcu *hs_ctrl;
22+
#endif /* CONFIG_SMC_HS_CTRL_BPF */
2023
unsigned int sysctl_autocorking_size;
2124
unsigned int sysctl_smcr_buf_type;
2225
int sysctl_smcr_testlink_time;

include/net/smc.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <linux/wait.h>
1818
#include <linux/dibs.h>
1919

20+
struct tcp_sock;
21+
struct inet_request_sock;
2022
struct sock;
2123

2224
#define SMC_MAX_PNETID_LEN 16 /* Max. length of PNET id */
@@ -50,4 +52,55 @@ struct smcd_dev {
5052
u8 going_away : 1;
5153
};
5254

55+
#define SMC_HS_CTRL_NAME_MAX 16
56+
57+
enum {
58+
/* ops can be inherit from init_net */
59+
SMC_HS_CTRL_FLAG_INHERITABLE = 0x1,
60+
61+
SMC_HS_CTRL_ALL_FLAGS = SMC_HS_CTRL_FLAG_INHERITABLE,
62+
};
63+
64+
struct smc_hs_ctrl {
65+
/* private */
66+
67+
struct list_head list;
68+
struct module *owner;
69+
70+
/* public */
71+
72+
/* unique name */
73+
char name[SMC_HS_CTRL_NAME_MAX];
74+
int flags;
75+
76+
/* Invoked before computing SMC option for SYN packets.
77+
* We can control whether to set SMC options by returning various value.
78+
* Return 0 to disable SMC, or return any other value to enable it.
79+
*/
80+
int (*syn_option)(struct tcp_sock *tp);
81+
82+
/* Invoked before Set up SMC options for SYN-ACK packets
83+
* We can control whether to respond SMC options by returning various
84+
* value. Return 0 to disable SMC, or return any other value to enable
85+
* it.
86+
*/
87+
int (*synack_option)(const struct tcp_sock *tp,
88+
struct inet_request_sock *ireq);
89+
};
90+
91+
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
92+
#define smc_call_hsbpf(init_val, tp, func, ...) ({ \
93+
typeof(init_val) __ret = (init_val); \
94+
struct smc_hs_ctrl *ctrl; \
95+
rcu_read_lock(); \
96+
ctrl = rcu_dereference(sock_net((struct sock *)(tp))->smc.hs_ctrl); \
97+
if (ctrl && ctrl->func) \
98+
__ret = ctrl->func(tp, ##__VA_ARGS__); \
99+
rcu_read_unlock(); \
100+
__ret; \
101+
})
102+
#else
103+
#define smc_call_hsbpf(init_val, tp, ...) ({ (void)(tp); (init_val); })
104+
#endif /* CONFIG_SMC_HS_CTRL_BPF */
105+
53106
#endif /* _SMC_H */

net/ipv4/tcp_output.c

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include <net/tcp.h>
4141
#include <net/tcp_ecn.h>
4242
#include <net/mptcp.h>
43+
#include <net/smc.h>
4344
#include <net/proto_memory.h>
4445
#include <net/psp.h>
4546

@@ -802,34 +803,36 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
802803
mptcp_options_write(th, ptr, tp, opts);
803804
}
804805

805-
static void smc_set_option(const struct tcp_sock *tp,
806+
static void smc_set_option(struct tcp_sock *tp,
806807
struct tcp_out_options *opts,
807808
unsigned int *remaining)
808809
{
809810
#if IS_ENABLED(CONFIG_SMC)
810-
if (static_branch_unlikely(&tcp_have_smc)) {
811-
if (tp->syn_smc) {
812-
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
813-
opts->options |= OPTION_SMC;
814-
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
815-
}
811+
if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc) {
812+
tp->syn_smc = !!smc_call_hsbpf(1, tp, syn_option);
813+
/* re-check syn_smc */
814+
if (tp->syn_smc &&
815+
*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
816+
opts->options |= OPTION_SMC;
817+
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
816818
}
817819
}
818820
#endif
819821
}
820822

821823
static void smc_set_option_cond(const struct tcp_sock *tp,
822-
const struct inet_request_sock *ireq,
824+
struct inet_request_sock *ireq,
823825
struct tcp_out_options *opts,
824826
unsigned int *remaining)
825827
{
826828
#if IS_ENABLED(CONFIG_SMC)
827-
if (static_branch_unlikely(&tcp_have_smc)) {
828-
if (tp->syn_smc && ireq->smc_ok) {
829-
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
830-
opts->options |= OPTION_SMC;
831-
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
832-
}
829+
if (static_branch_unlikely(&tcp_have_smc) && tp->syn_smc && ireq->smc_ok) {
830+
ireq->smc_ok = !!smc_call_hsbpf(1, tp, synack_option, ireq);
831+
/* re-check smc_ok */
832+
if (ireq->smc_ok &&
833+
*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
834+
opts->options |= OPTION_SMC;
835+
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
833836
}
834837
}
835838
#endif

net/smc/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,13 @@ config SMC_DIAG
1919
smcss.
2020

2121
if unsure, say Y.
22+
23+
config SMC_HS_CTRL_BPF
24+
bool "Generic eBPF hook for SMC handshake flow"
25+
depends on SMC && BPF_SYSCALL
26+
default y
27+
help
28+
SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
29+
handshake flow, which offer much greater flexibility in modifying the behavior
30+
of the SMC protocol stack compared to a complete kernel-based approach. Select
31+
this option if you want filtring the handshake process via eBPF programs.

net/smc/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
66
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
77
smc-y += smc_tracepoint.o smc_inet.o
88
smc-$(CONFIG_SYSCTL) += smc_sysctl.o
9+
smc-$(CONFIG_SMC_HS_CTRL_BPF) += smc_hs_bpf.o

net/smc/af_smc.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "smc_tracepoint.h"
5959
#include "smc_sysctl.h"
6060
#include "smc_inet.h"
61+
#include "smc_hs_bpf.h"
6162

6263
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
6364
* creation on server
@@ -3600,8 +3601,16 @@ static int __init smc_init(void)
36003601
pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
36013602
goto out_ulp;
36023603
}
3604+
rc = bpf_smc_hs_ctrl_init();
3605+
if (rc) {
3606+
pr_err("%s: bpf_smc_hs_ctrl_init fails with %d\n", __func__,
3607+
rc);
3608+
goto out_inet;
3609+
}
36033610
static_branch_enable(&tcp_have_smc);
36043611
return 0;
3612+
out_inet:
3613+
smc_inet_exit();
36053614
out_ulp:
36063615
tcp_unregister_ulp(&smc_ulp_ops);
36073616
out_ib:

net/smc/smc_hs_bpf.c

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Shared Memory Communications over RDMA (SMC-R) and RoCE
4+
*
5+
* Generic hook for SMC handshake flow.
6+
*
7+
* Copyright IBM Corp. 2016
8+
* Copyright (c) 2025, Alibaba Inc.
9+
*
10+
* Author: D. Wythe <[email protected]>
11+
*/
12+
13+
#include <linux/bpf_verifier.h>
14+
#include <linux/bpf.h>
15+
#include <linux/btf.h>
16+
#include <linux/rculist.h>
17+
18+
#include "smc_hs_bpf.h"
19+
20+
static DEFINE_SPINLOCK(smc_hs_ctrl_list_lock);
21+
static LIST_HEAD(smc_hs_ctrl_list);
22+
23+
static int smc_hs_ctrl_reg(struct smc_hs_ctrl *ctrl)
24+
{
25+
int ret = 0;
26+
27+
spin_lock(&smc_hs_ctrl_list_lock);
28+
/* already exist or duplicate name */
29+
if (smc_hs_ctrl_find_by_name(ctrl->name))
30+
ret = -EEXIST;
31+
else
32+
list_add_tail_rcu(&ctrl->list, &smc_hs_ctrl_list);
33+
spin_unlock(&smc_hs_ctrl_list_lock);
34+
return ret;
35+
}
36+
37+
static void smc_hs_ctrl_unreg(struct smc_hs_ctrl *ctrl)
38+
{
39+
spin_lock(&smc_hs_ctrl_list_lock);
40+
list_del_rcu(&ctrl->list);
41+
spin_unlock(&smc_hs_ctrl_list_lock);
42+
43+
/* Ensure that all readers to complete */
44+
synchronize_rcu();
45+
}
46+
47+
struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name)
48+
{
49+
struct smc_hs_ctrl *ctrl;
50+
51+
list_for_each_entry_rcu(ctrl, &smc_hs_ctrl_list, list) {
52+
if (strcmp(ctrl->name, name) == 0)
53+
return ctrl;
54+
}
55+
return NULL;
56+
}
57+
58+
static int __smc_bpf_stub_set_tcp_option(struct tcp_sock *tp) { return 1; }
59+
static int __smc_bpf_stub_set_tcp_option_cond(const struct tcp_sock *tp,
60+
struct inet_request_sock *ireq)
61+
{
62+
return 1;
63+
}
64+
65+
static struct smc_hs_ctrl __smc_bpf_hs_ctrl = {
66+
.syn_option = __smc_bpf_stub_set_tcp_option,
67+
.synack_option = __smc_bpf_stub_set_tcp_option_cond,
68+
};
69+
70+
static int smc_bpf_hs_ctrl_init(struct btf *btf) { return 0; }
71+
72+
static int smc_bpf_hs_ctrl_reg(void *kdata, struct bpf_link *link)
73+
{
74+
if (link)
75+
return -EOPNOTSUPP;
76+
77+
return smc_hs_ctrl_reg(kdata);
78+
}
79+
80+
static void smc_bpf_hs_ctrl_unreg(void *kdata, struct bpf_link *link)
81+
{
82+
smc_hs_ctrl_unreg(kdata);
83+
}
84+
85+
static int smc_bpf_hs_ctrl_init_member(const struct btf_type *t,
86+
const struct btf_member *member,
87+
void *kdata, const void *udata)
88+
{
89+
const struct smc_hs_ctrl *u_ctrl;
90+
struct smc_hs_ctrl *k_ctrl;
91+
u32 moff;
92+
93+
u_ctrl = (const struct smc_hs_ctrl *)udata;
94+
k_ctrl = (struct smc_hs_ctrl *)kdata;
95+
96+
moff = __btf_member_bit_offset(t, member) / 8;
97+
switch (moff) {
98+
case offsetof(struct smc_hs_ctrl, name):
99+
if (bpf_obj_name_cpy(k_ctrl->name, u_ctrl->name,
100+
sizeof(u_ctrl->name)) <= 0)
101+
return -EINVAL;
102+
return 1;
103+
case offsetof(struct smc_hs_ctrl, flags):
104+
if (u_ctrl->flags & ~SMC_HS_CTRL_ALL_FLAGS)
105+
return -EINVAL;
106+
k_ctrl->flags = u_ctrl->flags;
107+
return 1;
108+
default:
109+
break;
110+
}
111+
112+
return 0;
113+
}
114+
115+
static const struct bpf_func_proto *
116+
bpf_smc_hs_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
117+
{
118+
return bpf_base_func_proto(func_id, prog);
119+
}
120+
121+
static const struct bpf_verifier_ops smc_bpf_verifier_ops = {
122+
.get_func_proto = bpf_smc_hs_func_proto,
123+
.is_valid_access = bpf_tracing_btf_ctx_access,
124+
};
125+
126+
static struct bpf_struct_ops bpf_smc_hs_ctrl_ops = {
127+
.name = "smc_hs_ctrl",
128+
.init = smc_bpf_hs_ctrl_init,
129+
.reg = smc_bpf_hs_ctrl_reg,
130+
.unreg = smc_bpf_hs_ctrl_unreg,
131+
.cfi_stubs = &__smc_bpf_hs_ctrl,
132+
.verifier_ops = &smc_bpf_verifier_ops,
133+
.init_member = smc_bpf_hs_ctrl_init_member,
134+
.owner = THIS_MODULE,
135+
};
136+
137+
int bpf_smc_hs_ctrl_init(void)
138+
{
139+
return register_bpf_struct_ops(&bpf_smc_hs_ctrl_ops, smc_hs_ctrl);
140+
}

net/smc/smc_hs_bpf.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Shared Memory Communications over RDMA (SMC-R) and RoCE
4+
*
5+
* Generic hook for SMC handshake flow.
6+
*
7+
* Copyright IBM Corp. 2016
8+
* Copyright (c) 2025, Alibaba Inc.
9+
*
10+
* Author: D. Wythe <[email protected]>
11+
*/
12+
13+
#ifndef __SMC_HS_CTRL
14+
#define __SMC_HS_CTRL
15+
16+
#include <net/smc.h>
17+
18+
/* Find hs_ctrl by the target name, which required to be a c-string.
19+
* Return NULL if no such ctrl was found,otherwise, return a valid ctrl.
20+
*
21+
* Note: Caller MUST ensure it's was invoked under rcu_read_lock.
22+
*/
23+
struct smc_hs_ctrl *smc_hs_ctrl_find_by_name(const char *name);
24+
25+
#if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF)
26+
int bpf_smc_hs_ctrl_init(void);
27+
#else
28+
static inline int bpf_smc_hs_ctrl_init(void) { return 0; }
29+
#endif /* CONFIG_SMC_HS_CTRL_BPF */
30+
31+
#endif /* __SMC_HS_CTRL */

0 commit comments

Comments
 (0)