|
3 | 3 |
|
4 | 4 | #include <vmlinux.h> |
5 | 5 |
|
6 | | -#include "land_nat_v4.h" |
| 6 | +#include "landscape_log.h" |
| 7 | +#include "land_nat_common.h" |
| 8 | +#include "nat/nat_maps.h" |
| 9 | +#include "land_wan_ip.h" |
7 | 10 | #include "nat/nat_v3_maps.h" |
8 | 11 |
|
| 12 | +volatile const u16 tcp_range_start = 32768; |
| 13 | +volatile const u16 tcp_range_end = 65535; |
| 14 | + |
| 15 | +volatile const u16 udp_range_start = 32768; |
| 16 | +volatile const u16 udp_range_end = 65535; |
| 17 | + |
| 18 | +volatile const u16 icmp_range_start = 32768; |
| 19 | +volatile const u16 icmp_range_end = 65535; |
| 20 | + |
| 21 | +static __always_inline int icmpx_err_l3_offset(int l4_off) { |
| 22 | + return l4_off + sizeof(struct icmphdr); |
| 23 | +} |
| 24 | + |
| 25 | +#define L3_CSUM_REPLACE_OR_SHOT(skb_ptr, csum_offset, old_val, new_val, size) \ |
| 26 | + do { \ |
| 27 | + int _ret = bpf_l3_csum_replace(skb_ptr, csum_offset, old_val, new_val, size); \ |
| 28 | + if (_ret) { \ |
| 29 | + bpf_printk("l3_csum_replace err: %d", _ret); \ |
| 30 | + return TC_ACT_SHOT; \ |
| 31 | + } \ |
| 32 | + } while (0) |
| 33 | + |
| 34 | +#define L4_CSUM_REPLACE_OR_SHOT(skb_ptr, csum_offset, old_val, new_val, len_plus_flags) \ |
| 35 | + do { \ |
| 36 | + int _ret = bpf_l4_csum_replace(skb_ptr, csum_offset, old_val, new_val, len_plus_flags); \ |
| 37 | + if (_ret) { \ |
| 38 | + bpf_printk("l4_csum_replace err: %d", _ret); \ |
| 39 | + return TC_ACT_SHOT; \ |
| 40 | + } \ |
| 41 | + } while (0) |
| 42 | + |
| 43 | +static __always_inline int ipv4_update_csum_inner_macro(struct __sk_buff *skb, u32 l4_csum_off, |
| 44 | + __be32 from_addr, __be16 from_port, |
| 45 | + __be32 to_addr, __be16 to_port, |
| 46 | + bool l4_pseudo, bool l4_mangled_0) { |
| 47 | + u16 csum; |
| 48 | + if (l4_mangled_0) { |
| 49 | + READ_SKB_U16(skb, l4_csum_off, csum); |
| 50 | + } |
| 51 | + |
| 52 | + if (!l4_mangled_0 || csum != 0) { |
| 53 | + L3_CSUM_REPLACE_OR_SHOT(skb, l4_csum_off, from_port, to_port, 2); |
| 54 | + |
| 55 | + if (l4_pseudo) { |
| 56 | + L3_CSUM_REPLACE_OR_SHOT(skb, l4_csum_off, from_addr, to_addr, 4); |
| 57 | + } |
| 58 | + } |
| 59 | +} |
| 60 | + |
| 61 | +static __always_inline int ipv4_update_csum_icmp_err_macro(struct __sk_buff *skb, u32 icmp_csum_off, |
| 62 | + u32 err_ip_check_off, |
| 63 | + u32 err_l4_csum_off, __be32 from_addr, |
| 64 | + __be16 from_port, __be32 to_addr, |
| 65 | + __be16 to_port, bool err_l4_pseudo, |
| 66 | + bool l4_mangled_0) { |
| 67 | + u16 prev_csum; |
| 68 | + u16 curr_csum; |
| 69 | + u16 *tmp_ptr; |
| 70 | + |
| 71 | + if (VALIDATE_READ_DATA(skb, &tmp_ptr, err_ip_check_off, sizeof(*tmp_ptr))) { |
| 72 | + return 1; |
| 73 | + } |
| 74 | + prev_csum = *tmp_ptr; |
| 75 | + |
| 76 | + L3_CSUM_REPLACE_OR_SHOT(skb, err_ip_check_off, from_addr, to_addr, 4); |
| 77 | + |
| 78 | + if (VALIDATE_READ_DATA(skb, &tmp_ptr, err_ip_check_off, sizeof(*tmp_ptr))) { |
| 79 | + return 1; |
| 80 | + } |
| 81 | + curr_csum = *tmp_ptr; |
| 82 | + L4_CSUM_REPLACE_OR_SHOT(skb, icmp_csum_off, prev_csum, curr_csum, 2); |
| 83 | + |
| 84 | + if (VALIDATE_READ_DATA(skb, &tmp_ptr, err_l4_csum_off, sizeof(*tmp_ptr)) == 0) { |
| 85 | + prev_csum = *tmp_ptr; |
| 86 | + ipv4_update_csum_inner_macro(skb, err_l4_csum_off, from_addr, from_port, to_addr, to_port, |
| 87 | + err_l4_pseudo, l4_mangled_0); |
| 88 | + |
| 89 | + if (VALIDATE_READ_DATA(skb, &tmp_ptr, err_l4_csum_off, sizeof(*tmp_ptr))) { |
| 90 | + return 1; |
| 91 | + } |
| 92 | + curr_csum = *tmp_ptr; |
| 93 | + L4_CSUM_REPLACE_OR_SHOT(skb, icmp_csum_off, prev_csum, curr_csum, 2); |
| 94 | + } |
| 95 | + |
| 96 | + L4_CSUM_REPLACE_OR_SHOT(skb, icmp_csum_off, from_addr, to_addr, 4); |
| 97 | + L4_CSUM_REPLACE_OR_SHOT(skb, icmp_csum_off, from_port, to_port, 2); |
| 98 | + |
| 99 | + return 0; |
| 100 | +} |
| 101 | + |
| 102 | +static __always_inline int modify_headers_v4(struct __sk_buff *skb, bool is_icmpx_error, u8 nexthdr, |
| 103 | + u32 current_l3_offset, int l4_off, int err_l4_off, |
| 104 | + bool is_modify_source, |
| 105 | + const struct nat_action_v4 *action) { |
| 106 | +#define BPF_LOG_TOPIC "modify_headers_v4" |
| 107 | + int ret; |
| 108 | + int l4_to_port_off; |
| 109 | + int l4_to_check_off; |
| 110 | + bool l4_check_pseudo; |
| 111 | + bool l4_check_mangle_0; |
| 112 | + |
| 113 | + int ip_offset = |
| 114 | + is_modify_source ? offsetof(struct iphdr, saddr) : offsetof(struct iphdr, daddr); |
| 115 | + |
| 116 | + ret = bpf_skb_store_bytes(skb, current_l3_offset + ip_offset, &action->to_addr.addr, |
| 117 | + sizeof(action->to_addr.addr), 0); |
| 118 | + if (ret) return ret; |
| 119 | + |
| 120 | + L3_CSUM_REPLACE_OR_SHOT(skb, current_l3_offset + offsetof(struct iphdr, check), |
| 121 | + action->from_addr.addr, action->to_addr.addr, 4); |
| 122 | + |
| 123 | + if (l4_off == 0) return 0; |
| 124 | + |
| 125 | + switch (nexthdr) { |
| 126 | + case IPPROTO_TCP: |
| 127 | + l4_to_port_off = |
| 128 | + is_modify_source ? offsetof(struct tcphdr, source) : offsetof(struct tcphdr, dest); |
| 129 | + l4_to_check_off = offsetof(struct tcphdr, check); |
| 130 | + l4_check_pseudo = true; |
| 131 | + l4_check_mangle_0 = false; |
| 132 | + break; |
| 133 | + case IPPROTO_UDP: |
| 134 | + l4_to_port_off = |
| 135 | + is_modify_source ? offsetof(struct udphdr, source) : offsetof(struct udphdr, dest); |
| 136 | + l4_to_check_off = offsetof(struct udphdr, check); |
| 137 | + l4_check_pseudo = true; |
| 138 | + l4_check_mangle_0 = true; |
| 139 | + break; |
| 140 | + case IPPROTO_ICMP: |
| 141 | + l4_to_port_off = offsetof(struct icmphdr, un.echo.id); |
| 142 | + l4_to_check_off = offsetof(struct icmphdr, checksum); |
| 143 | + l4_check_pseudo = false; |
| 144 | + l4_check_mangle_0 = false; |
| 145 | + break; |
| 146 | + default: |
| 147 | + return 1; |
| 148 | + } |
| 149 | + |
| 150 | + if (is_icmpx_error) { |
| 151 | + if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) { |
| 152 | + l4_to_port_off = |
| 153 | + is_modify_source ? offsetof(struct tcphdr, dest) : offsetof(struct tcphdr, source); |
| 154 | + } |
| 155 | + |
| 156 | + int icmpx_error_offset = |
| 157 | + is_modify_source ? offsetof(struct iphdr, daddr) : offsetof(struct iphdr, saddr); |
| 158 | + |
| 159 | + ret = bpf_skb_store_bytes(skb, icmpx_err_l3_offset(l4_off) + icmpx_error_offset, |
| 160 | + &action->to_addr.addr, sizeof(action->to_addr.addr), 0); |
| 161 | + if (ret) return ret; |
| 162 | + |
| 163 | + ret = bpf_write_port(skb, err_l4_off + l4_to_port_off, action->to_port); |
| 164 | + if (ret) return ret; |
| 165 | + |
| 166 | + if (ipv4_update_csum_icmp_err_macro( |
| 167 | + skb, l4_off + offsetof(struct icmphdr, checksum), |
| 168 | + icmpx_err_l3_offset(l4_off) + offsetof(struct iphdr, check), |
| 169 | + err_l4_off + l4_to_check_off, action->from_addr.addr, action->from_port, |
| 170 | + action->to_addr.addr, action->to_port, l4_check_pseudo, l4_check_mangle_0)) |
| 171 | + return TC_ACT_SHOT; |
| 172 | + |
| 173 | + } else { |
| 174 | + ret = bpf_write_port(skb, l4_off + l4_to_port_off, action->to_port); |
| 175 | + if (ret) return ret; |
| 176 | + |
| 177 | + u32 l4_csum_off = l4_off + l4_to_check_off; |
| 178 | + u32 flags_mangled = l4_check_mangle_0 ? BPF_F_MARK_MANGLED_0 : 0; |
| 179 | + |
| 180 | + L4_CSUM_REPLACE_OR_SHOT(skb, l4_csum_off, action->from_port, action->to_port, |
| 181 | + 2 | flags_mangled); |
| 182 | + |
| 183 | + if (l4_check_pseudo) { |
| 184 | + L4_CSUM_REPLACE_OR_SHOT(skb, l4_csum_off, action->from_addr.addr, action->to_addr.addr, |
| 185 | + 4 | BPF_F_PSEUDO_HDR | flags_mangled); |
| 186 | + } |
| 187 | + } |
| 188 | + |
| 189 | + return 0; |
| 190 | +#undef BPF_LOG_TOPIC |
| 191 | +} |
| 192 | + |
| 193 | +static __always_inline void nat_metric_accumulate(struct __sk_buff *skb, bool ingress, |
| 194 | + struct nat_timer_value_v4 *value) { |
| 195 | + u64 bytes = skb->len; |
| 196 | + if (ingress) { |
| 197 | + __sync_fetch_and_add(&value->ingress_bytes, bytes); |
| 198 | + __sync_fetch_and_add(&value->ingress_packets, 1); |
| 199 | + } else { |
| 200 | + __sync_fetch_and_add(&value->egress_bytes, bytes); |
| 201 | + __sync_fetch_and_add(&value->egress_packets, 1); |
| 202 | + } |
| 203 | +} |
| 204 | + |
| 205 | +static __always_inline int nat_metric_try_report_v4(struct nat_timer_key_v4 *timer_key, |
| 206 | + struct nat_timer_value_v4 *timer_value, |
| 207 | + u8 status) { |
| 208 | +#define BPF_LOG_TOPIC "nat_metric_try_report_v4" |
| 209 | + |
| 210 | + struct nat_conn_metric_event *event; |
| 211 | + event = bpf_ringbuf_reserve(&nat_conn_metric_events, sizeof(struct nat_conn_metric_event), 0); |
| 212 | + if (event == NULL) { |
| 213 | + return -1; |
| 214 | + } |
| 215 | + |
| 216 | + event->src_addr.ip = timer_value->client_addr.addr; |
| 217 | + event->dst_addr.ip = timer_key->pair_ip.src_addr.addr; |
| 218 | + event->src_port = timer_value->client_port; |
| 219 | + event->dst_port = timer_key->pair_ip.src_port; |
| 220 | + event->l4_proto = timer_key->l4proto; |
| 221 | + event->l3_proto = LANDSCAPE_IPV4_TYPE; |
| 222 | + event->flow_id = timer_value->flow_id; |
| 223 | + event->trace_id = 0; |
| 224 | + event->time = bpf_ktime_get_tai_ns(); |
| 225 | + event->create_time = timer_value->create_time; |
| 226 | + event->ingress_bytes = timer_value->ingress_bytes; |
| 227 | + event->ingress_packets = timer_value->ingress_packets; |
| 228 | + event->egress_bytes = timer_value->egress_bytes; |
| 229 | + event->egress_packets = timer_value->egress_packets; |
| 230 | + event->cpu_id = timer_value->cpu_id; |
| 231 | + event->ifindex = timer_value->ifindex; |
| 232 | + event->status = status; |
| 233 | + event->gress = timer_value->gress; |
| 234 | + bpf_ringbuf_submit(event, 0); |
| 235 | + |
| 236 | + return 0; |
| 237 | +#undef BPF_LOG_TOPIC |
| 238 | +} |
| 239 | + |
| 240 | +static __always_inline bool ct_change_state(u64 *status_in_value, u64 curr_state, u64 next_state) { |
| 241 | + return __sync_bool_compare_and_swap(status_in_value, curr_state, next_state); |
| 242 | +} |
| 243 | + |
| 244 | +static __always_inline int ct_state_transition(u8 pkt_type, u8 gress, |
| 245 | + struct nat_timer_value_v4 *ct_timer_value) { |
| 246 | +#define BPF_LOG_TOPIC "ct_state_transition" |
| 247 | + u64 curr_state, *modify_status = NULL; |
| 248 | + if (gress == NAT_MAPPING_INGRESS) { |
| 249 | + curr_state = ct_timer_value->server_status; |
| 250 | + modify_status = &ct_timer_value->server_status; |
| 251 | + } else { |
| 252 | + curr_state = ct_timer_value->client_status; |
| 253 | + modify_status = &ct_timer_value->client_status; |
| 254 | + } |
| 255 | + |
| 256 | +#define NEW_STATE(__state) \ |
| 257 | + if (!ct_change_state(modify_status, curr_state, (__state))) { \ |
| 258 | + return TC_ACT_SHOT; \ |
| 259 | + } |
| 260 | + |
| 261 | + if (pkt_type == PKT_CONNLESS_V2) { |
| 262 | + NEW_STATE(CT_LESS_EST); |
| 263 | + } |
| 264 | + |
| 265 | + if (pkt_type == PKT_TCP_RST_V2) { |
| 266 | + NEW_STATE(CT_INIT); |
| 267 | + } |
| 268 | + |
| 269 | + if (pkt_type == PKT_TCP_SYN_V2) { |
| 270 | + NEW_STATE(CT_SYN); |
| 271 | + } |
| 272 | + |
| 273 | + if (pkt_type == PKT_TCP_FIN_V2) { |
| 274 | + NEW_STATE(CT_FIN); |
| 275 | + } |
| 276 | + |
| 277 | + u64 prev_state = __sync_lock_test_and_set(&ct_timer_value->status, TIMER_ACTIVE); |
| 278 | + if (prev_state != TIMER_ACTIVE) { |
| 279 | + if (ct_timer_value->client_port == TEST_PORT) { |
| 280 | + bpf_log_info("flush status to TIMER_ACTIVE: 20"); |
| 281 | + } |
| 282 | + bpf_timer_start(&ct_timer_value->timer, REPORT_INTERVAL, 0); |
| 283 | + } |
| 284 | + |
| 285 | + return TC_ACT_OK; |
| 286 | +#undef BPF_LOG_TOPIC |
| 287 | +} |
| 288 | + |
9 | 289 | #define NAT4_V3_STATE_SHIFT 56 |
10 | | -#define NAT4_V3_STATE_MASK (0xFFULL << NAT4_V3_STATE_SHIFT) |
11 | 290 | #define NAT4_V3_REF_MASK ((1ULL << NAT4_V3_STATE_SHIFT) - 1) |
12 | 291 | #define NAT4_V3_STATE_ACTIVE 1 |
13 | 292 | #define NAT4_V3_STATE_CLOSED 2 |
14 | 293 | #define TIMER_RELEASE_PENDING_QUEUE 41ULL |
15 | 294 | #define NAT4_V3_TIMER_STEP_DELETE_CT 1U |
16 | 295 | #define NAT4_V3_TIMER_STEP_RESTART 2U |
17 | | -#define NAT4_V3_TIMER_STEP_KEEP 3U |
18 | 296 |
|
19 | 297 | struct nat4_lookup_result_v3 { |
20 | 298 | struct nat_mapping_value_v4 *egress; |
@@ -264,11 +542,6 @@ nat4_v3_timer_base(struct nat_timer_value_v4_v3 *value) { |
264 | 542 | return (struct nat_timer_value_v4 *)value; |
265 | 543 | } |
266 | 544 |
|
267 | | -static __always_inline const struct nat_timer_value_v4 * |
268 | | -nat4_v3_timer_base_const(const struct nat_timer_value_v4_v3 *value) { |
269 | | - return (const struct nat_timer_value_v4 *)value; |
270 | | -} |
271 | | - |
272 | 545 | static __always_inline u32 nat4_v3_handle_timer_step(struct nat_timer_key_v4 *key, |
273 | 546 | struct nat_timer_value_v4_v3 *value, |
274 | 547 | bool force_queue_push_fail, |
|
0 commit comments