Skip to content

Commit 3647804

Browse files
committed
tbs with the icmp commit
1 parent 0d915b5 commit 3647804

File tree

2 files changed

+64
-15
lines changed

2 files changed

+64
-15
lines changed

src/lookup.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
#include "common.h"
55

6-
typedef struct pkt_hdrs {
6+
typedef struct __attribute__((packed)) pkt_hdrs {
77
__be32 saddr;
88
__be32 daddr;
99
__u8 proto;

src/pmtud.h

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,7 @@ typedef struct pmtud_pkt {
1919
struct icmphdr icmp;
2020
}pmtud_pkt_t;
2121

22-
typedef struct __attribute__((packed)) pmtud_flow_hash{
23-
__be32 saddr;
24-
__be32 daddr;
25-
__u8 proto;
26-
__be16 sport;
27-
__be16 dport;
28-
__u8 zero[3];
29-
}pmtud_flow_hash_t;
22+
typedef pkt_hdrs_t pmtud_flow_hash_t;
3023

3124
COMPILATION_ASSERT(sizeof(pmtud_flow_hash_t) == 16,
3225
"Size of pmtud_flow_hash_t must be 16!");
@@ -256,10 +249,11 @@ static __always_inline
256249
int pmtud_proc_icmp(struct __sk_buff* skb, struct iphdr* ip){
257250
int rc;
258251
const sfunnel_ip4_rule_t* rule = NULL;
252+
pmtud_flow_state_t* state = NULL;
259253
struct udphdr* udp;
260254
struct icmphdr* icmp;
261255
struct iphdr* inner_ip;
262-
pkt_hdrs_t hdrs;
256+
pkt_hdrs_t hdrs = {0};
263257
__u8 fhdr_size;
264258

265259
icmp = (struct icmphdr*) ((__u8*)ip + (ip->ihl * 4));
@@ -278,11 +272,15 @@ int pmtud_proc_icmp(struct __sk_buff* skb, struct iphdr* ip){
278272
hdrs.daddr = inner_ip->daddr;
279273
hdrs.proto = inner_ip->protocol;
280274

281-
//Note RFC 792 only ensures the first 8 bytes of the original L4 hdr
275+
//Note: RFC 792 only ensures the first 8 bytes of the original L4 hdr
282276
//This got updated with RFC 4884 and 1812 in practice most systems
283-
//will send _up_ to 64 bytes, which includes the inner L4 hdr, which
284-
//allows us not to have to do "connection/flow tracking".
285-
udp = (struct udphdr *) ((__u8*)inner_ip + (inner_ip->ihl * 4));
277+
//will send at least 64 bytes, which includes the inner L4 hdr. This
278+
//allows us to look in the inner L4 hdr instead of having to do
279+
//flow tracking
280+
//
281+
//Note2: we are only interested in the s/dport of the L4 hdr. Using
282+
//UDP as they are in the same position of the hdr.
283+
udp = (struct udphdr *)((__u8*)inner_ip + (inner_ip->ihl * 4));
286284
CHECK_SKB_PTR(skb, ((__u8*)udp) + 8);
287285

288286
hdrs.sport = udp->source;
@@ -293,6 +291,7 @@ int pmtud_proc_icmp(struct __sk_buff* skb, struct iphdr* ip){
293291
if(!rule || !rule->actions.unfunnel.execute)
294292
return TC_ACT_UNSPEC;
295293

294+
296295
if(rule->actions.unfunnel.p.unfunnel.proto == IPPROTO_UDP){
297296
fhdr_size = sizeof(struct udphdr);
298297
}else if(rule->actions.unfunnel.p.unfunnel.proto == IPPROTO_TCP){
@@ -301,6 +300,38 @@ int pmtud_proc_icmp(struct __sk_buff* skb, struct iphdr* ip){
301300
return TC_ACT_SHOT;
302301
}
303302

303+
//Recover the max network MTU
304+
__u16 net_mtu = bpf_ntohs(icmp->un.frag.mtu);
305+
__s64 icmp_diff = 0;
306+
307+
//Check whether we have to adjust the PMTUD map and adapt net_mtu
308+
//Note: if not present in the map, the end host effective MTU will be
309+
//lowered. We will further lower it once the first packet exceeding
310+
//net_mtu + fhdr_size is intercepted, so no need to do anything here.
311+
state = bpf_map_lookup_elem(&pmtud_map, &hdrs);
312+
if(state){
313+
if(net_mtu < state->last_seen_net_mtu){
314+
state->last_seen_net_mtu = net_mtu;
315+
state->adjusted_mtu = net_mtu - fhdr_size;
316+
317+
rc = bpf_map_update_elem(&pmtud_map, &hdrs, &state,
318+
BPF_ANY);
319+
if(rc < 0){
320+
PRINTK("[%d:0x%p][pmtud][net] Unable to create flow state rc=%d",
321+
skb->ifindex, skb, rc);
322+
}
323+
}
324+
325+
__be32 old_mtu = *(__be32*)&icmp->un.frag;
326+
327+
//Adjust ICMP network MTU (-fhdr_size)
328+
icmp->un.frag.mtu = bpf_htons(state->adjusted_mtu);
329+
330+
//Adjust ICMP checksum
331+
icmp_diff = bpf_csum_diff(&old_mtu, 4, (__be32*)&icmp->un.frag,
332+
4, 0);
333+
}
334+
304335
CHECK_SKB_PTR(skb, ((__u8*)udp) + fhdr_size + 8);
305336

306337
//Now unfunnel
@@ -309,6 +340,9 @@ int pmtud_proc_icmp(struct __sk_buff* skb, struct iphdr* ip){
309340
union ttl_proto old_ttl = *(union ttl_proto*)&inner_ip->ttl;
310341
__s64 diff = bpf_csum_diff((__be32*)&old_ttl, 4,
311342
(__be32*)&inner_ip->ttl, 4, 0);
343+
icmp_diff = bpf_csum_diff((__be32*)&old_ttl, 4,
344+
(__be32*)&inner_ip->ttl, 4,
345+
icmp_diff);
312346

313347
__u32 l3_off = (__u8*)inner_ip - (__u8*)SKB_GET_ETH(skb);
314348
l3_off += offsetof(struct iphdr, check);
@@ -320,8 +354,23 @@ int pmtud_proc_icmp(struct __sk_buff* skb, struct iphdr* ip){
320354
}
321355
}
322356

323-
//Now set ports from inner L4 (+fhdr_size). We are lazy here.
357+
//Now set ports from inner L4, which we recovered from the funneled
358+
//L4 hdr (+fhdr_size)
359+
__be32 old_ports = *(__be32*)udp;
324360
*(__be32*)udp = *(__be32*)(((__u8*)udp) + fhdr_size);
361+
icmp_diff = bpf_csum_diff((__be32*)&old_ports, 4,
362+
(__be32*)udp, 4,
363+
icmp_diff);
364+
365+
__u32 l4_off = (__u8*)udp - (__u8*)SKB_GET_ETH(skb);
366+
l4_off += offsetof(struct icmphdr, checksum);
367+
rc = bpf_l4_csum_replace(skb, l4_off, 0, icmp_diff, 0);
368+
if(rc < 0){
369+
PRINTK("[%d:0x%p][pmtud][net] Unable to set L4 csum. rc=%d",
370+
skb->ifindex, skb,
371+
rc);
372+
return TC_ACT_SHOT;
373+
}
325374

326375
return TC_ACT_OK;
327376
}

0 commit comments

Comments
 (0)