Skip to content

Commit 59af653

Browse files
committed
Merge branch 'Add IP-TFS mode to xfrm'
Christian Hopps says: ==================== * Summary of Changes: This patchset adds a new xfrm mode implementing on-demand IP-TFS. IP-TFS (AggFrag encapsulation) has been standardized in RFC9347. Link: https://www.rfc-editor.org/rfc/rfc9347.txt This feature supports demand driven (i.e., non-constant send rate) IP-TFS to take advantage of the AGGFRAG ESP payload encapsulation. This payload type supports aggregation and fragmentation of the inner IP packet stream which in turn yields higher small-packet bandwidth as well as reducing MTU/PMTU issues. Congestion control is unimplementated as the send rate is demand driven rather than constant. In order to allow loading this fucntionality as a module a set of callbacks xfrm_mode_cbs has been added to xfrm as well. Patchset Structure: ------------------- The first 5 commits are changes to the net and xfrm infrastructure to support the callbacks as well as more generic IP-TFS additions that may be used outside the actual IP-TFS implementation. - xfrm: config: add CONFIG_XFRM_IPTFS - include: uapi: protocol number and packet structs for AGGFRAG in ESP - xfrm: netlink: add config (netlink) options - xfrm: add mode_cbs module functionality - xfrm: add generic iptfs defines and functionality The last 10 commits constitute the IP-TFS implementation constructed in layers to make review easier. The first 9 commits all apply to a single file `net/xfrm/xfrm_iptfs.c`, the last commit adds a new tracepoint header file along with the use of these new tracepoint calls. - xfrm: iptfs: add new iptfs xfrm mode impl - xfrm: iptfs: add user packet (tunnel ingress) handling - xfrm: iptfs: share page fragments of inner packets - xfrm: iptfs: add fragmenting of larger than MTU user packets - xfrm: iptfs: add basic receive packet (tunnel egress) handling - xfrm: iptfs: handle received fragmented inner packets - xfrm: iptfs: add reusing received skb for the tunnel egress packet - xfrm: iptfs: add skb-fragment sharing code - xfrm: iptfs: handle reordering of received packets - xfrm: iptfs: add tracepoint functionality ==================== Signed-off-by: Steffen Klassert <[email protected]>
2 parents 152d00a + ed58b18 commit 59af653

File tree

21 files changed

+3292
-19
lines changed

21 files changed

+3292
-19
lines changed

include/net/xfrm.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#define XFRM_PROTO_COMP 108
3939
#define XFRM_PROTO_IPIP 4
4040
#define XFRM_PROTO_IPV6 41
41+
#define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG
4142
#define XFRM_PROTO_ROUTING IPPROTO_ROUTING
4243
#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS
4344

@@ -213,6 +214,7 @@ struct xfrm_state {
213214
u16 family;
214215
xfrm_address_t saddr;
215216
int header_len;
217+
int enc_hdr_len;
216218
int trailer_len;
217219
u32 extra_flags;
218220
struct xfrm_mark smark;
@@ -303,6 +305,9 @@ struct xfrm_state {
303305
* interpreted by xfrm_type methods. */
304306
void *data;
305307
u8 dir;
308+
309+
const struct xfrm_mode_cbs *mode_cbs;
310+
void *mode_data;
306311
};
307312

308313
static inline struct net *xs_net(struct xfrm_state *x)
@@ -460,6 +465,45 @@ struct xfrm_type_offload {
460465
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
461466
void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
462467

468+
/**
469+
* struct xfrm_mode_cbs - XFRM mode callbacks
470+
* @owner: module owner or NULL
471+
* @init_state: Add/init mode specific state in `xfrm_state *x`
472+
* @clone_state: Copy mode specific values from `orig` to new state `x`
473+
* @destroy_state: Cleanup mode specific state from `xfrm_state *x`
474+
* @user_init: Process mode specific netlink attributes from user
475+
* @copy_to_user: Add netlink attributes to `attrs` based on state in `x`
476+
* @sa_len: Return space required to store mode specific netlink attributes
477+
* @get_inner_mtu: Return avail payload space after removing encap overhead
478+
* @input: Process received packet from SA using mode
479+
* @output: Output given packet using mode
480+
* @prepare_output: Add mode specific encapsulation to packet in skb. On return
481+
* `transport_header` should point at ESP header, `network_header` should
482+
* point at outer IP header and `mac_header` should opint at the
483+
* protocol/nexthdr field of the outer IP.
484+
*
485+
* One should examine and understand the specific uses of these callbacks in
486+
* xfrm for further detail on how and when these functions are called. RTSL.
487+
*/
488+
struct xfrm_mode_cbs {
489+
struct module *owner;
490+
int (*init_state)(struct xfrm_state *x);
491+
int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig);
492+
void (*destroy_state)(struct xfrm_state *x);
493+
int (*user_init)(struct net *net, struct xfrm_state *x,
494+
struct nlattr **attrs,
495+
struct netlink_ext_ack *extack);
496+
int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb);
497+
unsigned int (*sa_len)(const struct xfrm_state *x);
498+
u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu);
499+
int (*input)(struct xfrm_state *x, struct sk_buff *skb);
500+
int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
501+
int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
502+
};
503+
504+
int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs);
505+
void xfrm_unregister_mode_cbs(u8 mode);
506+
463507
static inline int xfrm_af2proto(unsigned int family)
464508
{
465509
switch(family) {

include/uapi/linux/in.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ enum {
7979
#define IPPROTO_MPLS IPPROTO_MPLS
8080
IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
8181
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
82+
IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */
83+
#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG
8284
IPPROTO_RAW = 255, /* Raw IP packets */
8385
#define IPPROTO_RAW IPPROTO_RAW
8486
IPPROTO_SMC = 256, /* Shared Memory Communications */

include/uapi/linux/ip.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,22 @@ struct ip_beet_phdr {
137137
__u8 reserved;
138138
};
139139

140+
struct ip_iptfs_hdr {
141+
__u8 subtype; /* 0*: basic, 1: CC */
142+
__u8 flags;
143+
__be16 block_offset;
144+
};
145+
146+
struct ip_iptfs_cc_hdr {
147+
__u8 subtype; /* 0: basic, 1*: CC */
148+
__u8 flags;
149+
__be16 block_offset;
150+
__be32 loss_rate;
151+
__be64 rtt_adelay_xdelay;
152+
__be32 tval;
153+
__be32 techo;
154+
};
155+
140156
/* index values for the variables in ipv4_devconf */
141157
enum
142158
{

include/uapi/linux/ipsec.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ enum {
1414
IPSEC_MODE_ANY = 0, /* We do not support this for SA */
1515
IPSEC_MODE_TRANSPORT = 1,
1616
IPSEC_MODE_TUNNEL = 2,
17-
IPSEC_MODE_BEET = 3
17+
IPSEC_MODE_BEET = 3,
18+
IPSEC_MODE_IPTFS = 4
1819
};
1920

2021
enum {

include/uapi/linux/snmp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,8 @@ enum
339339
LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */
340340
LINUX_MIB_XFRMOUTSTATEDIRERROR, /* XfrmOutStateDirError */
341341
LINUX_MIB_XFRMINSTATEDIRERROR, /* XfrmInStateDirError */
342+
LINUX_MIB_XFRMINIPTFSERROR, /* XfrmInIptfsError */
343+
LINUX_MIB_XFRMOUTNOQSPACE, /* XfrmOutNoQueueSpace */
342344
__LINUX_MIB_XFRMMAX
343345
};
344346

include/uapi/linux/xfrm.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@ enum {
158158
#define XFRM_MODE_ROUTEOPTIMIZATION 2
159159
#define XFRM_MODE_IN_TRIGGER 3
160160
#define XFRM_MODE_BEET 4
161-
#define XFRM_MODE_MAX 5
161+
#define XFRM_MODE_IPTFS 5
162+
#define XFRM_MODE_MAX 6
162163

163164
/* Netlink configuration messages. */
164165
enum {
@@ -323,6 +324,12 @@ enum xfrm_attr_type_t {
323324
XFRMA_SA_DIR, /* __u8 */
324325
XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */
325326
XFRMA_SA_PCPU, /* __u32 */
327+
XFRMA_IPTFS_DROP_TIME, /* __u32 in: usec to wait for next seq */
328+
XFRMA_IPTFS_REORDER_WINDOW, /* __u16 in: reorder window size (pkts) */
329+
XFRMA_IPTFS_DONT_FRAG, /* out: don't use fragmentation */
330+
XFRMA_IPTFS_INIT_DELAY, /* __u32 out: initial packet wait delay (usec) */
331+
XFRMA_IPTFS_MAX_QSIZE, /* __u32 out: max ingress queue size (octets) */
332+
XFRMA_IPTFS_PKT_SIZE, /* __u32 out: size of outer packet, 0 for PMTU */
326333
__XFRMA_MAX
327334

328335
#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */

net/ipv4/esp4.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -816,7 +816,8 @@ int esp_input_done2(struct sk_buff *skb, int err)
816816
}
817817

818818
skb_pull_rcsum(skb, hlen);
819-
if (x->props.mode == XFRM_MODE_TUNNEL)
819+
if (x->props.mode == XFRM_MODE_TUNNEL ||
820+
x->props.mode == XFRM_MODE_IPTFS)
820821
skb_reset_transport_header(skb);
821822
else
822823
skb_set_transport_header(skb, -ihl);

net/ipv6/esp6.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,8 @@ int esp6_input_done2(struct sk_buff *skb, int err)
859859
skb_postpull_rcsum(skb, skb_network_header(skb),
860860
skb_network_header_len(skb));
861861
skb_pull_rcsum(skb, hlen);
862-
if (x->props.mode == XFRM_MODE_TUNNEL)
862+
if (x->props.mode == XFRM_MODE_TUNNEL ||
863+
x->props.mode == XFRM_MODE_IPTFS)
863864
skb_reset_transport_header(skb);
864865
else
865866
skb_set_transport_header(skb, -hdr_len);

net/netfilter/nft_xfrm.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
112112
return true;
113113
}
114114

115-
return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL;
115+
return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL ||
116+
mode == XFRM_MODE_IPTFS;
116117
}
117118

118119
static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,

net/xfrm/Kconfig

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,22 @@ config NET_KEY_MIGRATE
135135

136136
If unsure, say N.
137137

138+
config XFRM_IPTFS
139+
tristate "IPsec IP-TFS/AGGFRAG (RFC 9347) encapsulation support"
140+
depends on XFRM
141+
help
142+
Information on the IP-TFS/AGGFRAG encapsulation can be found
143+
in RFC 9347. This feature supports demand driven (i.e.,
144+
non-constant send rate) IP-TFS to take advantage of the
145+
AGGFRAG ESP payload encapsulation. This payload type
146+
supports aggregation and fragmentation of the inner IP
147+
packet stream which in turn yields higher small-packet
148+
bandwidth as well as reducing MTU/PMTU issues. Congestion
149+
control is unimplementated as the send rate is demand driven
150+
rather than constant.
151+
152+
If unsure, say N.
153+
138154
config XFRM_ESPINTCP
139155
bool
140156

0 commit comments

Comments
 (0)