Skip to content

Commit 4fcc204

Browse files
committed
fix: nat v3 ipv6 prefix matching
1 parent 173ebfd commit 4fcc204

File tree

8 files changed

+1641
-18
lines changed

8 files changed

+1641
-18
lines changed

landscape-ebpf/src/bpf/land_nat4_v3.h

Lines changed: 281 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,296 @@
33

44
#include <vmlinux.h>
55

6-
#include "land_nat_v4.h"
6+
#include "landscape_log.h"
7+
#include "land_nat_common.h"
8+
#include "nat/nat_maps.h"
9+
#include "land_wan_ip.h"
710
#include "nat/nat_v3_maps.h"
811

12+
volatile const u16 tcp_range_start = 32768;
13+
volatile const u16 tcp_range_end = 65535;
14+
15+
volatile const u16 udp_range_start = 32768;
16+
volatile const u16 udp_range_end = 65535;
17+
18+
volatile const u16 icmp_range_start = 32768;
19+
volatile const u16 icmp_range_end = 65535;
20+
21+
static __always_inline int icmpx_err_l3_offset(int l4_off) {
22+
return l4_off + sizeof(struct icmphdr);
23+
}
24+
25+
#define L3_CSUM_REPLACE_OR_SHOT(skb_ptr, csum_offset, old_val, new_val, size) \
26+
do { \
27+
int _ret = bpf_l3_csum_replace(skb_ptr, csum_offset, old_val, new_val, size); \
28+
if (_ret) { \
29+
bpf_printk("l3_csum_replace err: %d", _ret); \
30+
return TC_ACT_SHOT; \
31+
} \
32+
} while (0)
33+
34+
#define L4_CSUM_REPLACE_OR_SHOT(skb_ptr, csum_offset, old_val, new_val, len_plus_flags) \
35+
do { \
36+
int _ret = bpf_l4_csum_replace(skb_ptr, csum_offset, old_val, new_val, len_plus_flags); \
37+
if (_ret) { \
38+
bpf_printk("l4_csum_replace err: %d", _ret); \
39+
return TC_ACT_SHOT; \
40+
} \
41+
} while (0)
42+
43+
static __always_inline int ipv4_update_csum_inner_macro(struct __sk_buff *skb, u32 l4_csum_off,
44+
__be32 from_addr, __be16 from_port,
45+
__be32 to_addr, __be16 to_port,
46+
bool l4_pseudo, bool l4_mangled_0) {
47+
u16 csum;
48+
if (l4_mangled_0) {
49+
READ_SKB_U16(skb, l4_csum_off, csum);
50+
}
51+
52+
if (!l4_mangled_0 || csum != 0) {
53+
L3_CSUM_REPLACE_OR_SHOT(skb, l4_csum_off, from_port, to_port, 2);
54+
55+
if (l4_pseudo) {
56+
L3_CSUM_REPLACE_OR_SHOT(skb, l4_csum_off, from_addr, to_addr, 4);
57+
}
58+
}
59+
}
60+
61+
static __always_inline int ipv4_update_csum_icmp_err_macro(struct __sk_buff *skb, u32 icmp_csum_off,
62+
u32 err_ip_check_off,
63+
u32 err_l4_csum_off, __be32 from_addr,
64+
__be16 from_port, __be32 to_addr,
65+
__be16 to_port, bool err_l4_pseudo,
66+
bool l4_mangled_0) {
67+
u16 prev_csum;
68+
u16 curr_csum;
69+
u16 *tmp_ptr;
70+
71+
if (VALIDATE_READ_DATA(skb, &tmp_ptr, err_ip_check_off, sizeof(*tmp_ptr))) {
72+
return 1;
73+
}
74+
prev_csum = *tmp_ptr;
75+
76+
L3_CSUM_REPLACE_OR_SHOT(skb, err_ip_check_off, from_addr, to_addr, 4);
77+
78+
if (VALIDATE_READ_DATA(skb, &tmp_ptr, err_ip_check_off, sizeof(*tmp_ptr))) {
79+
return 1;
80+
}
81+
curr_csum = *tmp_ptr;
82+
L4_CSUM_REPLACE_OR_SHOT(skb, icmp_csum_off, prev_csum, curr_csum, 2);
83+
84+
if (VALIDATE_READ_DATA(skb, &tmp_ptr, err_l4_csum_off, sizeof(*tmp_ptr)) == 0) {
85+
prev_csum = *tmp_ptr;
86+
ipv4_update_csum_inner_macro(skb, err_l4_csum_off, from_addr, from_port, to_addr, to_port,
87+
err_l4_pseudo, l4_mangled_0);
88+
89+
if (VALIDATE_READ_DATA(skb, &tmp_ptr, err_l4_csum_off, sizeof(*tmp_ptr))) {
90+
return 1;
91+
}
92+
curr_csum = *tmp_ptr;
93+
L4_CSUM_REPLACE_OR_SHOT(skb, icmp_csum_off, prev_csum, curr_csum, 2);
94+
}
95+
96+
L4_CSUM_REPLACE_OR_SHOT(skb, icmp_csum_off, from_addr, to_addr, 4);
97+
L4_CSUM_REPLACE_OR_SHOT(skb, icmp_csum_off, from_port, to_port, 2);
98+
99+
return 0;
100+
}
101+
102+
static __always_inline int modify_headers_v4(struct __sk_buff *skb, bool is_icmpx_error, u8 nexthdr,
103+
u32 current_l3_offset, int l4_off, int err_l4_off,
104+
bool is_modify_source,
105+
const struct nat_action_v4 *action) {
106+
#define BPF_LOG_TOPIC "modify_headers_v4"
107+
int ret;
108+
int l4_to_port_off;
109+
int l4_to_check_off;
110+
bool l4_check_pseudo;
111+
bool l4_check_mangle_0;
112+
113+
int ip_offset =
114+
is_modify_source ? offsetof(struct iphdr, saddr) : offsetof(struct iphdr, daddr);
115+
116+
ret = bpf_skb_store_bytes(skb, current_l3_offset + ip_offset, &action->to_addr.addr,
117+
sizeof(action->to_addr.addr), 0);
118+
if (ret) return ret;
119+
120+
L3_CSUM_REPLACE_OR_SHOT(skb, current_l3_offset + offsetof(struct iphdr, check),
121+
action->from_addr.addr, action->to_addr.addr, 4);
122+
123+
if (l4_off == 0) return 0;
124+
125+
switch (nexthdr) {
126+
case IPPROTO_TCP:
127+
l4_to_port_off =
128+
is_modify_source ? offsetof(struct tcphdr, source) : offsetof(struct tcphdr, dest);
129+
l4_to_check_off = offsetof(struct tcphdr, check);
130+
l4_check_pseudo = true;
131+
l4_check_mangle_0 = false;
132+
break;
133+
case IPPROTO_UDP:
134+
l4_to_port_off =
135+
is_modify_source ? offsetof(struct udphdr, source) : offsetof(struct udphdr, dest);
136+
l4_to_check_off = offsetof(struct udphdr, check);
137+
l4_check_pseudo = true;
138+
l4_check_mangle_0 = true;
139+
break;
140+
case IPPROTO_ICMP:
141+
l4_to_port_off = offsetof(struct icmphdr, un.echo.id);
142+
l4_to_check_off = offsetof(struct icmphdr, checksum);
143+
l4_check_pseudo = false;
144+
l4_check_mangle_0 = false;
145+
break;
146+
default:
147+
return 1;
148+
}
149+
150+
if (is_icmpx_error) {
151+
if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) {
152+
l4_to_port_off =
153+
is_modify_source ? offsetof(struct tcphdr, dest) : offsetof(struct tcphdr, source);
154+
}
155+
156+
int icmpx_error_offset =
157+
is_modify_source ? offsetof(struct iphdr, daddr) : offsetof(struct iphdr, saddr);
158+
159+
ret = bpf_skb_store_bytes(skb, icmpx_err_l3_offset(l4_off) + icmpx_error_offset,
160+
&action->to_addr.addr, sizeof(action->to_addr.addr), 0);
161+
if (ret) return ret;
162+
163+
ret = bpf_write_port(skb, err_l4_off + l4_to_port_off, action->to_port);
164+
if (ret) return ret;
165+
166+
if (ipv4_update_csum_icmp_err_macro(
167+
skb, l4_off + offsetof(struct icmphdr, checksum),
168+
icmpx_err_l3_offset(l4_off) + offsetof(struct iphdr, check),
169+
err_l4_off + l4_to_check_off, action->from_addr.addr, action->from_port,
170+
action->to_addr.addr, action->to_port, l4_check_pseudo, l4_check_mangle_0))
171+
return TC_ACT_SHOT;
172+
173+
} else {
174+
ret = bpf_write_port(skb, l4_off + l4_to_port_off, action->to_port);
175+
if (ret) return ret;
176+
177+
u32 l4_csum_off = l4_off + l4_to_check_off;
178+
u32 flags_mangled = l4_check_mangle_0 ? BPF_F_MARK_MANGLED_0 : 0;
179+
180+
L4_CSUM_REPLACE_OR_SHOT(skb, l4_csum_off, action->from_port, action->to_port,
181+
2 | flags_mangled);
182+
183+
if (l4_check_pseudo) {
184+
L4_CSUM_REPLACE_OR_SHOT(skb, l4_csum_off, action->from_addr.addr, action->to_addr.addr,
185+
4 | BPF_F_PSEUDO_HDR | flags_mangled);
186+
}
187+
}
188+
189+
return 0;
190+
#undef BPF_LOG_TOPIC
191+
}
192+
193+
static __always_inline void nat_metric_accumulate(struct __sk_buff *skb, bool ingress,
194+
struct nat_timer_value_v4 *value) {
195+
u64 bytes = skb->len;
196+
if (ingress) {
197+
__sync_fetch_and_add(&value->ingress_bytes, bytes);
198+
__sync_fetch_and_add(&value->ingress_packets, 1);
199+
} else {
200+
__sync_fetch_and_add(&value->egress_bytes, bytes);
201+
__sync_fetch_and_add(&value->egress_packets, 1);
202+
}
203+
}
204+
205+
static __always_inline int nat_metric_try_report_v4(struct nat_timer_key_v4 *timer_key,
206+
struct nat_timer_value_v4 *timer_value,
207+
u8 status) {
208+
#define BPF_LOG_TOPIC "nat_metric_try_report_v4"
209+
210+
struct nat_conn_metric_event *event;
211+
event = bpf_ringbuf_reserve(&nat_conn_metric_events, sizeof(struct nat_conn_metric_event), 0);
212+
if (event == NULL) {
213+
return -1;
214+
}
215+
216+
event->src_addr.ip = timer_value->client_addr.addr;
217+
event->dst_addr.ip = timer_key->pair_ip.src_addr.addr;
218+
event->src_port = timer_value->client_port;
219+
event->dst_port = timer_key->pair_ip.src_port;
220+
event->l4_proto = timer_key->l4proto;
221+
event->l3_proto = LANDSCAPE_IPV4_TYPE;
222+
event->flow_id = timer_value->flow_id;
223+
event->trace_id = 0;
224+
event->time = bpf_ktime_get_tai_ns();
225+
event->create_time = timer_value->create_time;
226+
event->ingress_bytes = timer_value->ingress_bytes;
227+
event->ingress_packets = timer_value->ingress_packets;
228+
event->egress_bytes = timer_value->egress_bytes;
229+
event->egress_packets = timer_value->egress_packets;
230+
event->cpu_id = timer_value->cpu_id;
231+
event->ifindex = timer_value->ifindex;
232+
event->status = status;
233+
event->gress = timer_value->gress;
234+
bpf_ringbuf_submit(event, 0);
235+
236+
return 0;
237+
#undef BPF_LOG_TOPIC
238+
}
239+
240+
static __always_inline bool ct_change_state(u64 *status_in_value, u64 curr_state, u64 next_state) {
241+
return __sync_bool_compare_and_swap(status_in_value, curr_state, next_state);
242+
}
243+
244+
static __always_inline int ct_state_transition(u8 pkt_type, u8 gress,
245+
struct nat_timer_value_v4 *ct_timer_value) {
246+
#define BPF_LOG_TOPIC "ct_state_transition"
247+
u64 curr_state, *modify_status = NULL;
248+
if (gress == NAT_MAPPING_INGRESS) {
249+
curr_state = ct_timer_value->server_status;
250+
modify_status = &ct_timer_value->server_status;
251+
} else {
252+
curr_state = ct_timer_value->client_status;
253+
modify_status = &ct_timer_value->client_status;
254+
}
255+
256+
#define NEW_STATE(__state) \
257+
if (!ct_change_state(modify_status, curr_state, (__state))) { \
258+
return TC_ACT_SHOT; \
259+
}
260+
261+
if (pkt_type == PKT_CONNLESS_V2) {
262+
NEW_STATE(CT_LESS_EST);
263+
}
264+
265+
if (pkt_type == PKT_TCP_RST_V2) {
266+
NEW_STATE(CT_INIT);
267+
}
268+
269+
if (pkt_type == PKT_TCP_SYN_V2) {
270+
NEW_STATE(CT_SYN);
271+
}
272+
273+
if (pkt_type == PKT_TCP_FIN_V2) {
274+
NEW_STATE(CT_FIN);
275+
}
276+
277+
u64 prev_state = __sync_lock_test_and_set(&ct_timer_value->status, TIMER_ACTIVE);
278+
if (prev_state != TIMER_ACTIVE) {
279+
if (ct_timer_value->client_port == TEST_PORT) {
280+
bpf_log_info("flush status to TIMER_ACTIVE: 20");
281+
}
282+
bpf_timer_start(&ct_timer_value->timer, REPORT_INTERVAL, 0);
283+
}
284+
285+
return TC_ACT_OK;
286+
#undef BPF_LOG_TOPIC
287+
}
288+
9289
#define NAT4_V3_STATE_SHIFT 56
10-
#define NAT4_V3_STATE_MASK (0xFFULL << NAT4_V3_STATE_SHIFT)
11290
#define NAT4_V3_REF_MASK ((1ULL << NAT4_V3_STATE_SHIFT) - 1)
12291
#define NAT4_V3_STATE_ACTIVE 1
13292
#define NAT4_V3_STATE_CLOSED 2
14293
#define TIMER_RELEASE_PENDING_QUEUE 41ULL
15294
#define NAT4_V3_TIMER_STEP_DELETE_CT 1U
16295
#define NAT4_V3_TIMER_STEP_RESTART 2U
17-
#define NAT4_V3_TIMER_STEP_KEEP 3U
18296

19297
struct nat4_lookup_result_v3 {
20298
struct nat_mapping_value_v4 *egress;
@@ -264,11 +542,6 @@ nat4_v3_timer_base(struct nat_timer_value_v4_v3 *value) {
264542
return (struct nat_timer_value_v4 *)value;
265543
}
266544

267-
static __always_inline const struct nat_timer_value_v4 *
268-
nat4_v3_timer_base_const(const struct nat_timer_value_v4_v3 *value) {
269-
return (const struct nat_timer_value_v4 *)value;
270-
}
271-
272545
static __always_inline u32 nat4_v3_handle_timer_step(struct nat_timer_key_v4 *key,
273546
struct nat_timer_value_v4_v3 *value,
274547
bool force_queue_push_fail,

0 commit comments

Comments
 (0)