Skip to content

Commit d7e29e0

Browse files
authored
Merge pull request #67 from takehaya/fix/diff-based-packet-generation-bpf-encap
fix: diff based packet generation bpf encap
2 parents 2dd36ac + caba0e4 commit d7e29e0

File tree

5 files changed

+201
-15
lines changed

5 files changed

+201
-15
lines changed

pkg/coreelf/bpf_bpfeb.o

20.7 KB
Binary file not shown.

pkg/coreelf/bpf_bpfel.o

20.9 KB
Binary file not shown.

scripts/install_build_tools.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -e
55
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
66
source "${SCRIPT_DIR}/libs/install_utils.sh"
77

8-
install_tool "goreleaser" "go install github.com/goreleaser/goreleaser/v2@latest"
8+
install_tool "goreleaser" "go install github.com/goreleaser/goreleaser/v2@v2.13.3"
99
install_tool "tinygo" "wget https://github.com/tinygo-org/tinygo/releases/download/v0.39.0/tinygo_0.39.0_amd64.deb && sudo dpkg -i tinygo_0.39.0_amd64.deb && rm tinygo_0.39.0_amd64.deb"
1010

1111
echo "✅ All build tools have been installed successfully!"

src/xdp_checksum.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,10 @@ static __always_inline __u16 calc_transport_csum_ipv4(struct xdp_md *ctx, __u16
153153
}
154154

155155
// Calculate transport layer checksum over IPv6
156+
// If final_dst is non-NULL, use it for pseudo-header instead of IPv6 header's daddr
157+
// (required for SRv6 per RFC 8200 - use final destination from SRH)
156158
static __always_inline __u16 calc_transport_csum_ipv6(struct xdp_md *ctx, __u16 ip6_offset, __u16 transport_offset,
157-
__u16 transport_len, __u8 protocol)
159+
__u16 transport_len, __u8 protocol, __u8 *final_dst)
158160
{
159161
struct ipv6hdr ip6h;
160162

@@ -167,8 +169,9 @@ static __always_inline __u16 calc_transport_csum_ipv6(struct xdp_md *ctx, __u16
167169
__u32 sum = 0;
168170

169171
// Pseudo-header: source and destination IPv6 addresses
172+
// Per RFC 8200, if Routing header exists, use final destination from it
170173
__u16 *src = (__u16 *)&ip6h.saddr;
171-
__u16 *dst = (__u16 *)&ip6h.daddr;
174+
__u16 *dst = final_dst ? (__u16 *)final_dst : (__u16 *)&ip6h.daddr;
172175

173176
for (int i = 0; i < 8; i++) {
174177
sum += bpf_ntohs(src[i]);

src/xdp_prog.c

Lines changed: 195 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,80 @@ static __noinline bool apply_diff(struct xdp_md *ctx, struct diff_value *dv)
137137
return bpf_xdp_store_bytes(ctx, dv->offset, dv->new_value, dv->size) >= 0;
138138
}
139139

140+
// Ensure IPPROTO_ETHERNET is defined (not present in older kernel headers)
141+
#ifndef IPPROTO_ETHERNET
142+
#define IPPROTO_ETHERNET 143
143+
#endif
144+
145+
// Helper: Traverse IPv6 extension headers to find transport layer
146+
// Returns true if transport layer found, outputs protocol and L4 offset
147+
// If final_dst is non-NULL and SRH is found, copies the final destination (segment[LastEntry]) to it
148+
// Supports: Hop-by-Hop (0), Routing (43), Fragment (44), Destination Options (60)
149+
// Also recognizes IPPROTO_ETHERNET (143), IPPROTO_IPIP (4), IPPROTO_IPV6 (41) as terminal protocols
150+
static __always_inline bool ipv6_find_transport(struct xdp_md *ctx, __u16 l3_offset, __u8 *out_proto, __u16 *out_l4_offset,
151+
__u8 *final_dst, bool *has_final_dst)
152+
{
153+
__u8 proto;
154+
if (bpf_xdp_load_bytes(ctx, l3_offset + 6, &proto, 1) < 0)
155+
return false;
156+
157+
__u16 l4_offset = l3_offset + sizeof(struct ipv6hdr); // 40 bytes
158+
159+
#pragma unroll
160+
for (int i = 0; i < 4; i++) { // Max 4 extension headers
161+
if (proto == IPPROTO_UDP || proto == IPPROTO_TCP || proto == IPPROTO_ICMPV6 || proto == IPPROTO_ETHERNET ||
162+
proto == IPPROTO_IPIP || proto == IPPROTO_IPV6) {
163+
*out_proto = proto;
164+
*out_l4_offset = l4_offset;
165+
return true;
166+
}
167+
168+
if (proto == IPPROTO_HOPOPTS || proto == IPPROTO_DSTOPTS) {
169+
// Extension header: byte 0 = next header, byte 1 = length in 8-octet units (excluding first 8)
170+
__u8 ext_hdr[2];
171+
if (bpf_xdp_load_bytes(ctx, l4_offset, ext_hdr, 2) < 0)
172+
return false;
173+
proto = ext_hdr[0];
174+
l4_offset += (ext_hdr[1] + 1) * 8;
175+
} else if (proto == IPPROTO_ROUTING) {
176+
// Routing header (SRH): extract final destination for pseudo-header
177+
// SRH structure: next_hdr(1) + hdr_ext_len(1) + routing_type(1) + segments_left(1)
178+
// + last_entry(1) + flags(1) + tag(2) + segment_list[...]
179+
// Per RFC 8200, when Segments Left > 0, use final destination from SRH
180+
// When Segments Left = 0, use IPv6 Dst (packet has reached final destination)
181+
__u8 srh_hdr[4]; // next_hdr, hdr_ext_len, routing_type, segments_left
182+
if (bpf_xdp_load_bytes(ctx, l4_offset, srh_hdr, 4) < 0)
183+
return false;
184+
185+
__u8 segments_left = srh_hdr[3];
186+
187+
// Only use SRH final destination if Segments Left > 0
188+
if (final_dst && has_final_dst && segments_left > 0) {
189+
// Per RFC 8754, segment list is in reverse order:
190+
// segment[0] = final destination (last hop)
191+
// segment[LastEntry] = first hop (closest to source)
192+
// Pseudo-header needs the final destination = segment[0] at SRH + 8
193+
if (bpf_xdp_load_bytes(ctx, l4_offset + 8, final_dst, 16) < 0)
194+
return false;
195+
*has_final_dst = true;
196+
}
197+
198+
proto = srh_hdr[0];
199+
l4_offset += (srh_hdr[1] + 1) * 8;
200+
} else if (proto == IPPROTO_FRAGMENT) {
201+
// Fragment header is fixed 8 bytes
202+
__u8 next_hdr;
203+
if (bpf_xdp_load_bytes(ctx, l4_offset, &next_hdr, 1) < 0)
204+
return false;
205+
proto = next_hdr;
206+
l4_offset += 8;
207+
} else {
208+
return false; // Unknown extension header or no transport found
209+
}
210+
}
211+
return false; // Too many extension headers
212+
}
213+
140214
// Recalculate checksum from scratch using bpf_xdp_load_bytes/bpf_xdp_store_bytes
141215
// Used when packet length changes (incremental update not possible)
142216
// This is O(packet_length) - only use when necessary
@@ -182,13 +256,21 @@ static __noinline bool recalc_checksum(struct xdp_md *ctx, struct checksum_meta
182256
if (bpf_xdp_store_bytes(ctx, meta->csum_offset, &csum, 2) < 0)
183257
return false;
184258
} else if (ip_version == 6) {
185-
// IPv6 transport checksum
186-
// TODO: Extension headers (Hop-by-Hop, Routing, Fragment, etc.) are not supported.
187-
struct ipv6hdr ip6h;
188-
if (bpf_xdp_load_bytes(ctx, meta->ip_header_offset, &ip6h, sizeof(ip6h)) < 0)
259+
// IPv6 transport checksum (supports extension headers like SRH)
260+
__u8 proto;
261+
__u16 l4_offset;
262+
__u8 final_dst[16] = {0};
263+
bool has_final_dst = false;
264+
if (!ipv6_find_transport(ctx, meta->ip_header_offset, &proto, &l4_offset, final_dst, &has_final_dst))
265+
return false; // No transport layer found
266+
267+
// Bounds check to prevent underflow in transport length calculation
268+
if (l4_offset >= pkt_len)
189269
return false;
190-
transport_len = bpf_ntohs(ip6h.payload_len);
191-
csum = calc_transport_csum_ipv6(ctx, meta->ip_header_offset, meta->header_start, transport_len, ip6h.nexthdr);
270+
transport_len = pkt_len - l4_offset;
271+
272+
__u8 *final_dst_ptr = has_final_dst ? final_dst : NULL;
273+
csum = calc_transport_csum_ipv6(ctx, meta->ip_header_offset, l4_offset, transport_len, proto, final_dst_ptr);
192274
if (bpf_xdp_store_bytes(ctx, meta->csum_offset, &csum, 2) < 0)
193275
return false;
194276
} else {
@@ -230,7 +312,42 @@ static __noinline bool update_packet_lengths(struct xdp_md *ctx, __u16 target_le
230312
}
231313
}
232314

233-
// Validate l3_offset after VLAN parsing
315+
// Handle MPLS - skip labels until S=1 (bottom of stack)
316+
// MPLS header: Label(20) | Exp(3) | S(1) | TTL(8) = 4 bytes
317+
// S bit is at byte offset 2, bit 0 (0x01)
318+
if (eth_proto == bpf_htons(ETH_P_MPLS_UC) || eth_proto == bpf_htons(ETH_P_MPLS_MC)) {
319+
#pragma unroll
320+
for (int i = 0; i < 8; i++) { // Max 8 MPLS labels
321+
__u8 mpls_byte2;
322+
if (bpf_xdp_load_bytes(ctx, l3_offset + 2, &mpls_byte2, 1) < 0)
323+
return false;
324+
l3_offset += 4; // Skip this MPLS label
325+
if (mpls_byte2 & 0x01) // S bit set = bottom of stack
326+
break;
327+
}
328+
// After MPLS, detect inner protocol from first nibble (IPv4=4, IPv6=6)
329+
__u8 version_byte;
330+
if (bpf_xdp_load_bytes(ctx, l3_offset, &version_byte, 1) < 0)
331+
return false;
332+
__u8 ip_version = (version_byte >> 4) & 0x0F;
333+
if (ip_version == 4)
334+
eth_proto = bpf_htons(ETH_P_IP);
335+
else if (ip_version == 6)
336+
eth_proto = bpf_htons(ETH_P_IPV6);
337+
else {
338+
// L2VPN: inner Ethernet frame after MPLS labels
339+
// TODO: PW Control Word (RFC 4385) not supported - if present (first nibble 0),
340+
// 4 bytes should be skipped before the inner Ethernet header
341+
// Skip inner Ethernet header (14 bytes) and read inner EtherType
342+
__be16 inner_eth_proto;
343+
if (bpf_xdp_load_bytes(ctx, l3_offset + 12, &inner_eth_proto, 2) < 0)
344+
return false;
345+
l3_offset += 14;
346+
eth_proto = inner_eth_proto;
347+
}
348+
}
349+
350+
// Validate l3_offset after VLAN/MPLS parsing
234351
if (l3_offset >= target_len)
235352
return false;
236353

@@ -272,12 +389,11 @@ static __noinline bool update_packet_lengths(struct xdp_md *ctx, __u16 target_le
272389
if (bpf_xdp_store_bytes(ctx, l3_offset + 4, &payload_len_be, 2) < 0)
273390
return false;
274391

275-
// Get next header (protocol) from IPv6 header (offset 6)
392+
// Find transport layer (traversing extension headers like SRH)
276393
__u8 proto;
277-
if (bpf_xdp_load_bytes(ctx, l3_offset + 6, &proto, 1) < 0)
278-
return false;
279-
280-
__u16 l4_offset = l3_offset + sizeof(struct ipv6hdr);
394+
__u16 l4_offset;
395+
if (!ipv6_find_transport(ctx, l3_offset, &proto, &l4_offset, NULL, NULL))
396+
return true; // No transport layer found, but payload_len is updated
281397

282398
if (proto == IPPROTO_UDP) {
283399
// UDP: update len field
@@ -287,6 +403,73 @@ static __noinline bool update_packet_lengths(struct xdp_md *ctx, __u16 target_le
287403
__be16 udp_len_be = bpf_htons(udp_len);
288404
if (bpf_xdp_store_bytes(ctx, l4_offset + 4, &udp_len_be, 2) < 0)
289405
return false;
406+
} else if (proto == IPPROTO_ETHERNET) {
407+
// L2VPN over SRv6: inner Ethernet frame after SRH
408+
// Skip inner Ethernet header (14 bytes) and read inner EtherType
409+
__be16 inner_eth_proto;
410+
if (bpf_xdp_load_bytes(ctx, l4_offset + 12, &inner_eth_proto, 2) < 0)
411+
return true;
412+
__u16 inner_l3 = l4_offset + 14;
413+
414+
if (inner_eth_proto == bpf_htons(ETH_P_IP) && target_len > inner_l3) {
415+
// Update inner IPv4 tot_len
416+
__u16 inner_ip_len = target_len - inner_l3;
417+
__be16 inner_ip_len_be = bpf_htons(inner_ip_len);
418+
if (bpf_xdp_store_bytes(ctx, inner_l3 + 2, &inner_ip_len_be, 2) < 0)
419+
return false;
420+
// Read IHL and protocol from inner IPv4 header
421+
__u8 inner_ver_ihl;
422+
if (bpf_xdp_load_bytes(ctx, inner_l3, &inner_ver_ihl, 1) < 0)
423+
return false;
424+
__u16 inner_ihl = (inner_ver_ihl & 0x0F) * 4;
425+
__u8 inner_proto;
426+
if (bpf_xdp_load_bytes(ctx, inner_l3 + 9, &inner_proto, 1) < 0)
427+
return false;
428+
__u16 inner_l4 = inner_l3 + inner_ihl;
429+
if (inner_proto == IPPROTO_UDP && target_len > inner_l4) {
430+
__u16 inner_udp_len = target_len - inner_l4;
431+
__be16 inner_udp_len_be = bpf_htons(inner_udp_len);
432+
if (bpf_xdp_store_bytes(ctx, inner_l4 + 4, &inner_udp_len_be, 2) < 0)
433+
return false;
434+
}
435+
} else if (inner_eth_proto == bpf_htons(ETH_P_IPV6) && target_len > inner_l3 + sizeof(struct ipv6hdr)) {
436+
// Update inner IPv6 payload_len
437+
__u16 inner_payload_len = target_len - inner_l3 - sizeof(struct ipv6hdr);
438+
__be16 inner_payload_len_be = bpf_htons(inner_payload_len);
439+
if (bpf_xdp_store_bytes(ctx, inner_l3 + 4, &inner_payload_len_be, 2) < 0)
440+
return false;
441+
}
442+
} else if (proto == IPPROTO_IPIP) {
443+
// L3VPN over SRv6: inner IPv4 after SRH
444+
if (target_len > l4_offset) {
445+
__u16 inner_ip_len = target_len - l4_offset;
446+
__be16 inner_ip_len_be = bpf_htons(inner_ip_len);
447+
if (bpf_xdp_store_bytes(ctx, l4_offset + 2, &inner_ip_len_be, 2) < 0)
448+
return false;
449+
// Read IHL and protocol from inner IPv4 header
450+
__u8 inner_ver_ihl;
451+
if (bpf_xdp_load_bytes(ctx, l4_offset, &inner_ver_ihl, 1) < 0)
452+
return false;
453+
__u16 inner_ihl = (inner_ver_ihl & 0x0F) * 4;
454+
__u8 inner_proto;
455+
if (bpf_xdp_load_bytes(ctx, l4_offset + 9, &inner_proto, 1) < 0)
456+
return false;
457+
__u16 inner_l4 = l4_offset + inner_ihl;
458+
if (inner_proto == IPPROTO_UDP && target_len > inner_l4) {
459+
__u16 inner_udp_len = target_len - inner_l4;
460+
__be16 inner_udp_len_be = bpf_htons(inner_udp_len);
461+
if (bpf_xdp_store_bytes(ctx, inner_l4 + 4, &inner_udp_len_be, 2) < 0)
462+
return false;
463+
}
464+
}
465+
} else if (proto == IPPROTO_IPV6) {
466+
// L3VPN over SRv6: inner IPv6 after SRH
467+
if (target_len > l4_offset + sizeof(struct ipv6hdr)) {
468+
__u16 inner_payload_len = target_len - l4_offset - sizeof(struct ipv6hdr);
469+
__be16 inner_payload_len_be = bpf_htons(inner_payload_len);
470+
if (bpf_xdp_store_bytes(ctx, l4_offset + 4, &inner_payload_len_be, 2) < 0)
471+
return false;
472+
}
290473
}
291474
}
292475

0 commit comments

Comments
 (0)