diff --git a/include/dp_nat.h b/include/dp_nat.h index 44cf6893e..bc67fc48a 100644 --- a/include/dp_nat.h +++ b/include/dp_nat.h @@ -104,7 +104,7 @@ int dp_add_neighnat_entry(uint32_t nat_ip, uint32_t vni, uint16_t min_port, uin int dp_del_neighnat_entry(uint32_t nat_ip, uint32_t vni, uint16_t min_port, uint16_t max_port); -int dp_allocate_network_snat_port(struct snat_data *snat_data, struct dp_flow *df, uint32_t vni); +int dp_allocate_network_snat_port(struct snat_data *snat_data, struct dp_flow *df, struct dp_port *port); const union dp_ipv6 *dp_lookup_neighnat_underlay_ip(struct dp_flow *df); int dp_remove_network_snat_port(const struct flow_value *cntrack); diff --git a/src/dp_nat.c b/src/dp_nat.c index e79999432..1b96a0695 100644 --- a/src/dp_nat.c +++ b/src/dp_nat.c @@ -597,36 +597,142 @@ const union dp_ipv6 *dp_lookup_neighnat_underlay_ip(struct dp_flow *df) return NULL; } -int dp_allocate_network_snat_port(struct snat_data *snat_data, struct dp_flow *df, uint32_t vni) +static __rte_always_inline +int dp_find_new_port(struct snat_data *snat_data, + const struct netnat_portmap_key *portmap_key, + struct netnat_portoverload_tbl_key *portoverload_tbl_key) +{ + uint32_t iface_src_info_hash; + uint16_t min_port = snat_data->nat_port_range[0]; + uint16_t max_port = snat_data->nat_port_range[1]; + uint16_t tmp_port; + uint64_t timestamp; + int ret; + + iface_src_info_hash = (uint32_t)rte_hash_hash(ipv4_netnat_portmap_tbl, portmap_key); + + for (uint16_t p = 0; p < max_port - min_port; ++p) { + tmp_port = min_port + (uint16_t)((iface_src_info_hash + p) % (uint32_t)(max_port - min_port)); + portoverload_tbl_key->nat_port = tmp_port; + ret = rte_hash_lookup(ipv4_netnat_portoverload_tbl, portoverload_tbl_key); + if (ret == -ENOENT) { + return DP_OK; + } else if (DP_FAILED(ret)) { + DPS_LOG_ERR("Cannot lookup ipv4 port overload key", DP_LOG_RET(ret)); + return ret; + } + // on success continue the search (this port is already in use) + } + + // No free port found + // This is normal once the port range gets saturated, but still helpful in logs. + // Therefore the log must be present, just rate-limited (per interface). + timestamp = rte_rdtsc(); + if (timestamp > snat_data->log_timestamp + dp_nat_full_log_delay) { + snat_data->log_timestamp = timestamp; + if (portmap_key->src_ip.is_v6) { + DPS_LOG_WARNING("NAT64 portmap range is full", DP_LOG_IPV4(snat_data->nat_ip), DP_LOG_VNI(portmap_key->vni), + DP_LOG_SRC_IPV6(portmap_key->src_ip.ipv6), DP_LOG_SRC_PORT(portmap_key->iface_src_port)); + } else { + DPS_LOG_WARNING("NAT portmap range is full", DP_LOG_IPV4(snat_data->nat_ip), DP_LOG_VNI(portmap_key->vni), + DP_LOG_SRC_IPV4(portmap_key->src_ip.ipv4), DP_LOG_SRC_PORT(portmap_key->iface_src_port)); + } + } + + return -ENOENT; +} + +static int dp_create_new_portmap_entry(const struct netnat_portmap_key *portmap_key, + const struct netnat_portoverload_tbl_key *portoverload_key) +{ + struct netnat_portmap_data *portmap_data; + int ret; + + ret = rte_hash_add_key(ipv4_netnat_portoverload_tbl, portoverload_key); + if (DP_FAILED(ret)) { + DPS_LOG_ERR("Failed to add ipv4 network nat port overload key", DP_LOG_RET(ret)); + return ret; + } + + portmap_data = rte_zmalloc("netnat_portmap_val", sizeof(struct netnat_portmap_data), RTE_CACHE_LINE_SIZE); + if (!portmap_data) { + rte_hash_del_key(ipv4_netnat_portoverload_tbl, portoverload_key); + DPS_LOG_ERR("Failed to allocate ipv4 network nat portmap data"); + return DP_ERROR; + } + portmap_data->nat_ip = portoverload_key->nat_ip; + portmap_data->nat_port = portoverload_key->nat_port; + portmap_data->flow_cnt = 1; + + ret = rte_hash_add_key_data(ipv4_netnat_portmap_tbl, portmap_key, portmap_data); + if (DP_FAILED(ret)) { + rte_hash_del_key(ipv4_netnat_portoverload_tbl, portoverload_key); + rte_free(portmap_data); + DPS_LOG_ERR("Failed to add ipv4 network nat portmap data", DP_LOG_RET(ret)); + return ret; + } + + return DP_OK; +} + +static int dp_use_existing_portmap_entry(const struct netnat_portmap_key *portmap_key, + struct netnat_portoverload_tbl_key *portoverload_key) +{ + struct netnat_portmap_data *portmap_data; + int ret; + + ret = rte_hash_lookup_data(ipv4_netnat_portmap_tbl, portmap_key, (void **)&portmap_data); + if (DP_FAILED(ret)) { + if (ret != -ENOENT) + DPS_LOG_ERR("Cannot lookup ipv4 portmap key", DP_LOG_RET(ret)); + return ret; + } + + portoverload_key->nat_port = portmap_data->nat_port; + + ret = rte_hash_lookup(ipv4_netnat_portoverload_tbl, portoverload_key); + if (DP_SUCCESS(ret)) { + // we already have this one + // allocating port for the same flow should never happen (conntrack should prevent this) + return -EEXIST; + } else if (ret != -ENOENT) { + DPS_LOG_ERR("Cannot lookup ipv4 port overload key for an existing nat port", DP_LOG_RET(ret)); + return ret; + } + + // ENOENT: nat_port is the same, but the protocol is different -> just create a portoverload entry + ret = rte_hash_add_key(ipv4_netnat_portoverload_tbl, portoverload_key); + if (DP_FAILED(ret)) { + DPS_LOG_ERR("Failed to add ipv4 network nat port overload key", DP_LOG_RET(ret)); + return ret; + } + + portmap_data->flow_cnt++; + + return DP_OK; +} + +int dp_allocate_network_snat_port(struct snat_data *snat_data, struct dp_flow *df, struct dp_port *port) { struct netnat_portoverload_tbl_key portoverload_tbl_key; struct netnat_portmap_key portmap_key; - struct netnat_portmap_data *portmap_data; - uint16_t min_port, max_port, allocated_port = 0, tmp_port; - uint32_t iface_src_info_hash; int ret; - bool need_to_find_new_port = true; - uint32_t iface_src_ip = ntohl(df->src.src_addr); - uint16_t iface_src_port; - uint64_t timestamp; if (df->l3_type == RTE_ETHER_TYPE_IPV4) { - dp_set_ipaddr4(&portmap_key.src_ip, iface_src_ip); + dp_set_ipaddr4(&portmap_key.src_ip, ntohl(df->src.src_addr)); portoverload_tbl_key.dst_ip = ntohl(df->dst.dst_addr); } else if (df->l3_type == RTE_ETHER_TYPE_IPV6) { dp_set_ipaddr6(&portmap_key.src_ip, &df->src.src_addr6); portoverload_tbl_key.dst_ip = ntohl(dp_get_ipv6_nat64(&df->dst.dst_addr6)); - } else { - return DP_GRPC_ERR_BAD_IPVER; - } + } else + return DP_ERROR; if (df->l4_type == IPPROTO_ICMP || df->l4_type == IPPROTO_ICMPV6) - iface_src_port = ntohs(df->l4_info.icmp_field.icmp_identifier); + portmap_key.iface_src_port = ntohs(df->l4_info.icmp_field.icmp_identifier); else - iface_src_port = ntohs(df->l4_info.trans_port.src_port); + portmap_key.iface_src_port = ntohs(df->l4_info.trans_port.src_port); - portmap_key.iface_src_port = iface_src_port; - portmap_key.vni = vni; + portmap_key.vni = port->iface.vni; portoverload_tbl_key.nat_ip = snat_data->nat_ip; portoverload_tbl_key.l4_type = df->l4_type; @@ -635,88 +741,23 @@ int dp_allocate_network_snat_port(struct snat_data *snat_data, struct dp_flow *d else portoverload_tbl_key.dst_port = ntohs(df->l4_info.trans_port.dst_port); - ret = rte_hash_lookup_data(ipv4_netnat_portmap_tbl, &portmap_key, (void **)&portmap_data); - if (ret != -ENOENT) { - if (DP_FAILED(ret)) { - DPS_LOG_ERR("Cannot lookup ipv4 portmap key", DP_LOG_RET(ret)); + ret = dp_use_existing_portmap_entry(&portmap_key, &portoverload_tbl_key); + if (DP_FAILED(ret)) { + if (ret != -ENOENT) return ret; - } - - portoverload_tbl_key.nat_port = portmap_data->nat_port; - ret = rte_hash_lookup(ipv4_netnat_portoverload_tbl, &portoverload_tbl_key); - if (likely(ret == -ENOENT)) { - portmap_data->flow_cnt++; - allocated_port = portmap_data->nat_port; - need_to_find_new_port = false; - } else if (DP_FAILED(ret)) { - DPS_LOG_ERR("Cannot lookup ipv4 port overload key for an existing nat port", DP_LOG_RET(ret)); + // ENOENT: need to create a new entry + ret = dp_find_new_port(snat_data, &portmap_key, &portoverload_tbl_key); + if (DP_FAILED(ret)) return ret; - } - } - - if (need_to_find_new_port) { - min_port = snat_data->nat_port_range[0]; - max_port = snat_data->nat_port_range[1]; - - iface_src_info_hash = (uint32_t)rte_hash_hash(ipv4_netnat_portmap_tbl, &portmap_key); - - for (uint16_t p = 0; p < max_port - min_port; p++) { - tmp_port = min_port + (uint16_t)((iface_src_info_hash + p) % (uint32_t)(max_port - min_port)); - portoverload_tbl_key.nat_port = tmp_port; - ret = rte_hash_lookup(ipv4_netnat_portoverload_tbl, &portoverload_tbl_key); - if (ret == -ENOENT) { - allocated_port = tmp_port; - break; - } else if (DP_FAILED(ret)) { - DPS_LOG_ERR("Cannot lookup ipv4 port overload key", DP_LOG_RET(ret)); - return ret; - } - } - - if (!allocated_port) { - // This is normal once the port range gets saturated, but still helpful in logs. - // Therefore the log must be present, just rate-limited (per interface). - timestamp = rte_rdtsc(); - if (timestamp > snat_data->log_timestamp + dp_nat_full_log_delay) { - snat_data->log_timestamp = timestamp; - DPS_LOG_WARNING("NAT portmap range is full", - DP_LOG_IPV4(snat_data->nat_ip), - DP_LOG_VNI(vni), DP_LOG_SRC_IPV4(iface_src_ip), - DP_LOG_SRC_PORT(iface_src_port)); - } - return DP_ERROR; - } - - } - - ret = rte_hash_add_key(ipv4_netnat_portoverload_tbl, &portoverload_tbl_key); - if (DP_FAILED(ret)) { - DPS_LOG_ERR("Failed to add ipv4 network nat port overload key", DP_LOG_RET(ret)); - return ret; - } - - if (need_to_find_new_port) { - portmap_data = rte_zmalloc("netnat_portmap_val", sizeof(struct netnat_portmap_data), RTE_CACHE_LINE_SIZE); - if (!portmap_data) { - rte_hash_del_key(ipv4_netnat_portoverload_tbl, &portoverload_tbl_key); - DPS_LOG_ERR("Failed to allocate ipv4 network nat portmap data"); - return DP_ERROR; - } - portmap_data->nat_ip = snat_data->nat_ip; - portmap_data->nat_port = allocated_port; - portmap_data->flow_cnt = 1; - ret = rte_hash_add_key_data(ipv4_netnat_portmap_tbl, &portmap_key, portmap_data); - if (DP_FAILED(ret)) { - rte_hash_del_key(ipv4_netnat_portoverload_tbl, &portoverload_tbl_key); - rte_free(portmap_data); - DPS_LOG_ERR("Failed to add ipv4 network nat portmap data", DP_LOG_RET(ret)); + ret = dp_create_new_portmap_entry(&portmap_key, &portoverload_tbl_key); + if (DP_FAILED(ret)) return ret; - } } - return allocated_port; + DP_STATS_NAT_INC_USED_PORT_CNT(port); + return portoverload_tbl_key.nat_port; } int dp_remove_network_snat_port(const struct flow_value *cntrack) @@ -746,8 +787,10 @@ int dp_remove_network_snat_port(const struct flow_value *cntrack) // forcefully delete, if it was never there, it's fine ret = rte_hash_del_key(ipv4_netnat_portoverload_tbl, &portoverload_tbl_key); - if (DP_FAILED(ret) && ret != -ENOENT) + if (DP_FAILED(ret) && ret != -ENOENT) { + DPS_LOG_ERR("Cannot delete portoverload key", DP_LOG_RET(ret)); return ret; + } dp_copy_ipaddr(&portmap_key.src_ip, &flow_key_org->l3_src); portmap_key.iface_src_port = flow_key_org->src.port_src; @@ -759,18 +802,22 @@ int dp_remove_network_snat_port(const struct flow_value *cntrack) portmap_key.iface_src_port = flow_key_org->src.port_src; ret = rte_hash_lookup_data(ipv4_netnat_portmap_tbl, &portmap_key, (void **)&portmap_data); - if (DP_FAILED(ret)) - return ret == -ENOENT ? DP_OK : ret; - - portmap_data->flow_cnt--; - if (portmap_data->flow_cnt == 0) { - ret = rte_hash_del_key(ipv4_netnat_portmap_tbl, &portmap_key); - if (DP_FAILED(ret)) { - portmap_data->flow_cnt++; - DPS_LOG_ERR("Cannot delete portmap key", DP_LOG_RET(ret)); - return DP_ERROR; + if (DP_SUCCESS(ret)) { + portmap_data->flow_cnt--; + if (portmap_data->flow_cnt == 0) { + ret = rte_hash_del_key(ipv4_netnat_portmap_tbl, &portmap_key); + if (DP_FAILED(ret)) { + portmap_data->flow_cnt++; + DPS_LOG_ERR("Cannot delete portmap key", DP_LOG_RET(ret)); + return DP_ERROR; + } + rte_free(portmap_data); } - rte_free(portmap_data); + } else { + DPS_LOG_ERR("Cannot lookup portmap key", DP_LOG_RET(ret)); + if (ret != -ENOENT) + return ret; + // otherwise already deleted, finish } created_port = dp_get_port_by_id(cntrack->created_port_id); diff --git a/src/nodes/snat_node.c b/src/nodes/snat_node.c index f08b3d9ef..ad4374721 100644 --- a/src/nodes/snat_node.c +++ b/src/nodes/snat_node.c @@ -35,14 +35,12 @@ static __rte_always_inline int dp_process_ipv4_snat(struct rte_mbuf *m, struct d cntrack->nf_info.nat_type = DP_FLOW_NAT_TYPE_VIP; } if (snat_data->nat_ip != 0) { - ret = dp_allocate_network_snat_port(snat_data, df, port->iface.vni); + ret = dp_allocate_network_snat_port(snat_data, df, port); if (DP_FAILED(ret)) return DP_ERROR; nat_port = (uint16_t)ret; ipv4_hdr->src_addr = htonl(snat_data->nat_ip); - DP_STATS_NAT_INC_USED_PORT_CNT(port); - if (df->l4_type == IPPROTO_ICMP) { dp_change_icmp_identifier(m, nat_port); cntrack->offload_state.orig = DP_FLOW_OFFLOADED; @@ -69,8 +67,11 @@ static __rte_always_inline int dp_process_ipv4_snat(struct rte_mbuf *m, struct d if (snat_data->nat_ip != 0) cntrack->flow_key[DP_FLOW_DIR_REPLY].port_dst = df->nat_port; - if (DP_FAILED(dp_add_flow(&cntrack->flow_key[DP_FLOW_DIR_REPLY], cntrack))) + if (DP_FAILED(dp_add_flow(&cntrack->flow_key[DP_FLOW_DIR_REPLY], cntrack))) { + if (snat_data->nat_ip != 0) + dp_remove_network_snat_port(cntrack); return DP_ERROR; + } dp_ref_inc(&cntrack->ref_count); return DP_OK; @@ -88,19 +89,16 @@ static __rte_always_inline int dp_process_ipv6_nat64(struct rte_mbuf *m, struct snat64_data.nat_ip = port->iface.nat_ip; snat64_data.nat_port_range[0] = port->iface.nat_port_range[0]; snat64_data.nat_port_range[1] = port->iface.nat_port_range[1]; - ret = dp_allocate_network_snat_port(&snat64_data, df, port->iface.vni); + ret = dp_allocate_network_snat_port(&snat64_data, df, port); if (DP_FAILED(ret)) return DP_ERROR; nat_port = (uint16_t)ret; - DP_STATS_NAT_INC_USED_PORT_CNT(port); - df->nat_port = nat_port; df->nat_type = DP_NAT_64_CHG_SRC_IP; df->nat_addr = snat64_data.nat_ip; if (DP_FAILED(dp_nat_chg_ipv6_to_ipv4_hdr(df, m, snat64_data.nat_ip, &dest_ip4))) { dp_remove_network_snat_port(cntrack); - DP_STATS_NAT_DEC_USED_PORT_CNT(port); return DP_ERROR; } @@ -133,8 +131,10 @@ static __rte_always_inline int dp_process_ipv6_nat64(struct rte_mbuf *m, struct cntrack->flow_key[DP_FLOW_DIR_REPLY].port_dst = df->nat_port; cntrack->flow_key[DP_FLOW_DIR_REPLY].proto = df->l4_type; - if (DP_FAILED(dp_add_flow(&cntrack->flow_key[DP_FLOW_DIR_REPLY], cntrack))) + if (DP_FAILED(dp_add_flow(&cntrack->flow_key[DP_FLOW_DIR_REPLY], cntrack))) { + dp_remove_network_snat_port(cntrack); return DP_ERROR; + } dp_ref_inc(&cntrack->ref_count); return DP_OK;