diff --git a/dpd-client/tests/integration_tests/mcast.rs b/dpd-client/tests/integration_tests/mcast.rs index 70e6f58..8b2085f 100644 --- a/dpd-client/tests/integration_tests/mcast.rs +++ b/dpd-client/tests/integration_tests/mcast.rs @@ -53,6 +53,30 @@ impl ToIpAddr for types::UnderlayMulticastIpv6 { } } +/// Count table entries matching a specific IP address in any key field. +fn count_entries_for_ip(entries: &[types::TableEntry], ip: &str) -> usize { + entries + .iter() + .filter(|e| e.keys.values().any(|v| v.contains(ip))) + .count() +} + +/// Check if any table entry for the given IP has a `forward_vlan` action with +/// the specified VLAN ID. +fn has_vlan_action_for_ip( + entries: &[types::TableEntry], + ip: &str, + vlan: u16, +) -> bool { + entries.iter().any(|e| { + e.keys.values().any(|v| v.contains(ip)) + && e.action_args + .get("vlan_id") + .map(|v| v == &vlan.to_string()) + .unwrap_or(false) + }) +} + async fn check_counter_incremented( switch: &Switch, counter_name: &str, @@ -542,6 +566,66 @@ async fn test_group_creation_with_validation() -> TestResult { _ => panic!("Expected ErrorResponse for invalid group ID"), } + // Test with reserved VLAN ID 0 (also invalid) + let external_vlan0 = types::MulticastGroupCreateExternalEntry { + group_ip: IpAddr::V4(MULTICAST_TEST_IPV4), + tag: Some(TEST_TAG.to_string()), + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { + vlan_id: Some(0), // Invalid: VLAN 0 is reserved + }, + sources: None, + }; + + let res = switch + .client + .multicast_group_create_external(&external_vlan0) + .await + .expect_err("Should fail with reserved VLAN ID 0"); + + match res { + Error::ErrorResponse(inner) => { + assert_eq!( + inner.status(), + 400, + "Expected 400 Bad Request status code for VLAN 0" + ); + } + _ => panic!("Expected ErrorResponse for reserved VLAN ID 0"), + } + + // Test with reserved VLAN ID 1 (also invalid) + let external_vlan1 = types::MulticastGroupCreateExternalEntry { + group_ip: IpAddr::V4(MULTICAST_TEST_IPV4), + tag: Some(TEST_TAG.to_string()), + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { + vlan_id: Some(1), // Invalid: VLAN 1 is reserved + }, + sources: None, + }; + + let res = switch + .client + .multicast_group_create_external(&external_vlan1) + .await + .expect_err("Should fail with reserved VLAN ID 1"); + + match res { + Error::ErrorResponse(inner) => { + assert_eq!( + inner.status(), + 400, + "Expected 400 Bad Request status code for VLAN 1" + ); + } + _ => panic!("Expected ErrorResponse for reserved VLAN ID 1"), + } + // Test with valid parameters // IPv4 groups are always external let external_valid = types::MulticastGroupCreateExternalEntry { @@ -712,8 +796,52 @@ async fn test_vlan_propagation_to_internal() -> TestResult { "ff04::200".parse::().unwrap() ); - // Verify the admin-scoped group now has access to the VLAN via NAT target reference - // Check the bitmap table to see if VLAN 42 is properly set (this is where VLAN matters for P4) + // Verify IPv4 route table has ONE entry for the VLAN group. + // Route tables use simple dst_addr matching with forward_vlan(vid) action. + // VLAN isolation (preventing translation) is handled by NAT ingress tables. + let route_table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await + .expect("Should dump IPv4 route table") + .into_inner(); + let group_entries: Vec<_> = route_table + .entries + .iter() + .filter(|e| e.keys.values().any(|v| v.contains("224.1.2.3"))) + .collect(); + assert_eq!( + group_entries.len(), + 1, + "Route table uses dst_addr only - 1 entry per group" + ); + // Verify the action is forward_vlan with VLAN 42 + assert!( + group_entries[0] + .action_args + .get("vlan_id") + .map(|v| v == "42") + .unwrap_or(false), + "Route entry should have forward_vlan(42) action" + ); + + // Verify NAT ingress table has TWO entries for VLAN isolation: + // 1. Untagged match -> forward (for decapsulated Geneve) + // 2. Tagged match with VLAN 42 -> forward (for already-tagged) + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await + .expect("Should dump NAT ingress table") + .into_inner(); + assert_eq!( + count_entries_for_ip(&nat_table.entries, "224.1.2.3"), + 2, + "NAT table should have 2 entries for VLAN isolation (untagged + tagged)" + ); + + // Verify the admin-scoped group now has access to the VLAN via NAT target + // reference let bitmap_table = switch .client .table_dump("pipe.Egress.mcast_egress.tbl_decap_ports") @@ -721,7 +849,8 @@ async fn test_vlan_propagation_to_internal() -> TestResult { .expect("Should clean up internal group") .into_inner(); - // Verify the admin-scoped group's bitmap entry has VLAN 42 from external group propagation + // Verify the admin-scoped group's bitmap entry has VLAN 42 from external + // group propagation assert!( bitmap_table .entries @@ -730,7 +859,8 @@ async fn test_vlan_propagation_to_internal() -> TestResult { "Admin-scoped group bitmap should have VLAN 42 from external group" ); - // Delete external group first since it references the internal group via NAT target + // Delete external group first since it references the internal group via + // NAT target cleanup_test_group(switch, created_external.group_ip, TEST_TAG) .await .unwrap(); @@ -789,6 +919,42 @@ async fn test_group_api_lifecycle() { ); assert_eq!(created.external_forwarding.vlan_id, Some(vlan_id)); + // Route table: 1 entry (dst_addr only, VLAN via action) + let route_table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await + .expect("Should dump route table") + .into_inner(); + let route_entries: Vec<_> = route_table + .entries + .iter() + .filter(|e| e.keys.values().any(|v| v.contains("224.0.1.0"))) + .collect(); + assert_eq!( + route_entries.len(), + 1, + "Route table should have 1 entry per group (dst_addr only)" + ); + + // NAT table: 2 entries for VLAN groups (untagged + tagged match keys) + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await + .expect("Should dump NAT table") + .into_inner(); + let nat_entries: Vec<_> = nat_table + .entries + .iter() + .filter(|e| e.keys.values().any(|v| v.contains("224.0.1.0"))) + .collect(); + assert_eq!( + nat_entries.len(), + 2, + "NAT table should have 2 entries for VLAN group (untagged + tagged)" + ); + // Get all groups and verify our group is included let groups = switch .client @@ -3890,10 +4056,6 @@ async fn test_multicast_reset_all_tables() -> TestResult { Ok(()) } -/* - * Commented out untl https://github.com/oxidecomputer/dendrite/issues/107 is - * fixed - * #[tokio::test] #[ignore] async fn test_multicast_vlan_translation_not_possible() -> TestResult { @@ -3970,7 +4132,6 @@ async fn test_multicast_vlan_translation_not_possible() -> TestResult { .unwrap(); cleanup_test_group(switch, internal_multicast_ip, TEST_TAG).await } -*/ #[tokio::test] #[ignore] @@ -7048,6 +7209,229 @@ async fn test_underlay_delete_recreate_recovery_flow() -> TestResult { Ok(()) } +/// Tests the full VLAN lifecycle for multicast groups, verifying route table +/// entries are correctly managed through all VLAN transitions. +/// +/// Route tables use dst_addr only matching with action-based VLAN tagging: +/// - 1 entry per group: forward (no VLAN) or forward_vlan(vid) +/// +/// NAT tables use VLAN-aware matching for isolation: +/// - 2 entries for VLAN groups: untagged + tagged match keys +#[tokio::test] +#[ignore] +async fn test_vlan_lifecycle_route_entries() -> TestResult { + let switch = &*get_switch().await; + let tag = "vlan_lifecycle_test"; + + // Setup: create internal admin-scoped group for NAT target + let internal_ip = IpAddr::V6(MULTICAST_NAT_IP); + let egress_port = PhysPort(28); + create_test_multicast_group( + switch, + internal_ip, + Some(tag), + &[(egress_port, types::Direction::Underlay)], + types::InternalForwarding { nat_target: None }, + types::ExternalForwarding { vlan_id: None }, + None, + ) + .await; + + let group_ip = IpAddr::V4(MULTICAST_TEST_IPV4); + let nat_target = create_nat_target_ipv4(); + let test_ip = "224.0.1.0"; + + // Case: Create with NO VLAN - should have 1 route entry + let create_no_vlan = types::MulticastGroupCreateExternalEntry { + group_ip, + tag: Some(tag.to_string()), + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { vlan_id: None }, + sources: None, + }; + + switch + .client + .multicast_group_create_external(&create_no_vlan) + .await + .expect("Should create group without VLAN"); + + let table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&table.entries, test_ip), + 1, + "Group without VLAN should have 1 route entry" + ); + + // Case: Update to ADD VLAN 10 + // Route: 1 entry with forward_vlan(10) action + // NAT: 2 entries (untagged + tagged) for VLAN isolation + let update_add_vlan = types::MulticastGroupUpdateExternalEntry { + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { vlan_id: Some(10) }, + sources: None, + }; + + let updated = switch + .client + .multicast_group_update_external( + &group_ip, + &make_tag(tag), + &update_add_vlan, + ) + .await + .expect("Should update group to add VLAN"); + assert_eq!(updated.into_inner().external_forwarding.vlan_id, Some(10)); + + let table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&table.entries, test_ip), + 1, + "Route table uses dst_addr only - 1 entry per group" + ); + assert!( + has_vlan_action_for_ip(&table.entries, test_ip, 10), + "Route entry should have forward_vlan(10) action" + ); + + // Verify NAT table has 2 entries for VLAN isolation + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&nat_table.entries, test_ip), + 2, + "NAT table should have 2 entries for VLAN group (untagged + tagged)" + ); + + // Case: Update to CHANGE VLAN 10 -> 20 + // Route: same 1 entry, action changes to forward_vlan(20) + // NAT: 2 entries with new VLAN, no stale entries + let update_change_vlan = types::MulticastGroupUpdateExternalEntry { + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { vlan_id: Some(20) }, + sources: None, + }; + + let updated = switch + .client + .multicast_group_update_external( + &group_ip, + &make_tag(tag), + &update_change_vlan, + ) + .await + .expect("Should update group to change VLAN"); + assert_eq!(updated.into_inner().external_forwarding.vlan_id, Some(20)); + + let table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&table.entries, test_ip), + 1, + "Route table uses dst_addr only - 1 entry per group" + ); + assert!( + has_vlan_action_for_ip(&table.entries, test_ip, 20), + "Route entry should have forward_vlan(20) action" + ); + assert!( + !has_vlan_action_for_ip(&table.entries, test_ip, 10), + "Should NOT have stale forward_vlan(10) action" + ); + + // NAT table should still have 2 entries + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&nat_table.entries, test_ip), + 2, + "NAT table should have 2 entries for VLAN group" + ); + + // Case: Update to REMOVE VLAN + // Route: 1 entry with forward action (no VLAN) + // NAT: 1 entry (untagged only) + let update_remove_vlan = types::MulticastGroupUpdateExternalEntry { + internal_forwarding: types::InternalForwarding { + nat_target: Some(nat_target.clone()), + }, + external_forwarding: types::ExternalForwarding { vlan_id: None }, + sources: None, + }; + + let updated = switch + .client + .multicast_group_update_external( + &group_ip, + &make_tag(tag), + &update_remove_vlan, + ) + .await + .expect("Should update group to remove VLAN"); + assert_eq!(updated.into_inner().external_forwarding.vlan_id, None); + + let table = switch + .client + .table_dump("pipe.Ingress.l3_router.MulticastRouter4.tbl") + .await? + .into_inner(); + + assert_eq!( + count_entries_for_ip(&table.entries, test_ip), + 1, + "Route table uses dst_addr only - 1 entry per group" + ); + + assert!( + !has_vlan_action_for_ip(&table.entries, test_ip, 20), + "Should NOT have forward_vlan action after VLAN removal" + ); + + // NAT table should have 1 entry now (no VLAN = untagged only) + let nat_table = switch + .client + .table_dump("pipe.Ingress.nat_ingress.ingress_ipv4_mcast") + .await? + .into_inner(); + assert_eq!( + count_entries_for_ip(&nat_table.entries, test_ip), + 1, + "NAT table should have 1 entry after VLAN removal" + ); + + // Cleanup + switch + .client + .multicast_reset_by_tag(&make_tag(tag)) + .await + .expect("Should cleanup by tag"); + + Ok(()) +} + /// Tests that update operations validate tags. /// /// When updating a multicast group, the provided tag must match the existing diff --git a/dpd/p4/sidecar.p4 b/dpd/p4/sidecar.p4 index 5286f8a..a22f386 100644 --- a/dpd/p4/sidecar.p4 +++ b/dpd/p4/sidecar.p4 @@ -612,8 +612,16 @@ control NatIngress ( } // Separate table for IPv4 multicast packets that need to be encapsulated. + // Groups without VLAN match untagged only. Groups with VLAN match both + // untagged (for decapsulated Geneve from underlay) and correctly tagged. + // Packets with wrong VLAN miss and are not NAT encapsulated. + // When hdr.vlan.isValid()==false, vlan_id matches as 0. table ingress_ipv4_mcast { - key = { hdr.ipv4.dst_addr : exact; } + key = { + hdr.ipv4.dst_addr : exact; + hdr.vlan.isValid() : exact; + hdr.vlan.vlan_id : exact; + } actions = { mcast_forward_ipv4_to; } const size = IPV4_MULTICAST_TABLE_SIZE; counters = mcast_ipv4_ingress_ctr; @@ -630,8 +638,16 @@ control NatIngress ( } // Separate table for IPv6 multicast packets that need to be encapsulated. + // Groups without VLAN match untagged only. Groups with VLAN match both + // untagged (for decapsulated Geneve from underlay) and correctly tagged. + // Packets with wrong VLAN miss and are not NAT encapsulated. + // When hdr.vlan.isValid()==false, vlan_id matches as 0. table ingress_ipv6_mcast { - key = { hdr.ipv6.dst_addr : exact; } + key = { + hdr.ipv6.dst_addr : exact; + hdr.vlan.isValid() : exact; + hdr.vlan.vlan_id : exact; + } actions = { mcast_forward_ipv6_to; } const size = IPV6_MULTICAST_TABLE_SIZE; counters = mcast_ipv6_ingress_ctr; @@ -1273,9 +1289,7 @@ control MulticastRouter4( } table tbl { - key = { - hdr.ipv4.dst_addr : exact; - } + key = { hdr.ipv4.dst_addr : exact; } actions = { forward; forward_vlan; unreachable; } default_action = unreachable; const size = IPV4_MULTICAST_TABLE_SIZE; @@ -1284,16 +1298,18 @@ control MulticastRouter4( apply { // If the packet came in with a VLAN tag, we need to invalidate - // the VLAN header before we do the lookup. The VLAN header - // will be re-attached if set in the forward_vlan action. + // the VLAN header before we do the lookup. The VLAN header + // will be re-attached if set in the forward_vlan action (or + // untagged for groups without VLAN). This prevents unintended + // VLAN translation. if (hdr.vlan.isValid()) { hdr.ethernet.ether_type = hdr.vlan.ether_type; hdr.vlan.setInvalid(); } if (!tbl.apply().hit) { - icmp_error(ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE); - meta.drop_reason = DROP_IPV6_UNROUTEABLE; + icmp_error(ICMP_DEST_UNREACH, ICMP_DST_UNREACH_NET); + meta.drop_reason = DROP_IPV4_UNROUTEABLE; // Dont set meta.dropped because we want an error packet // to go out. } else if (hdr.ipv4.ttl == 1 && !meta.service_routed) { @@ -1411,9 +1427,7 @@ control MulticastRouter6 ( } table tbl { - key = { - hdr.ipv6.dst_addr : exact; - } + key = { hdr.ipv6.dst_addr : exact; } actions = { forward; forward_vlan; unreachable; } default_action = unreachable; const size = IPV6_MULTICAST_TABLE_SIZE; @@ -1422,8 +1436,10 @@ control MulticastRouter6 ( apply { // If the packet came in with a VLAN tag, we need to invalidate - // the VLAN header before we do the lookup. The VLAN header - // will be re-attached if set in the forward_vlan action. + // the VLAN header before we do the lookup. The VLAN header + // will be re-attached if set in the forward_vlan action (or + // untagged for groups without VLAN). This prevents unintended + // VLAN translation. if (hdr.vlan.isValid()) { hdr.ethernet.ether_type = hdr.vlan.ether_type; hdr.vlan.setInvalid(); @@ -2251,12 +2267,18 @@ control Egress( if (is_link_local_ipv6_mcast) { link_local_mcast_ctr.count(eg_intr_md.egress_port); - } else if (hdr.geneve.isValid()) { - external_mcast_ctr.count(eg_intr_md.egress_port); } else if (hdr.geneve.isValid() && hdr.geneve_opts.oxg_mcast.isValid() && - hdr.geneve_opts.oxg_mcast.mcast_tag == MULTICAST_TAG_UNDERLAY) { + hdr.geneve_opts.oxg_mcast.mcast_tag != MULTICAST_TAG_EXTERNAL) { + // Encapsulated multicast going to sleds. Includes both + // MULTICAST_TAG_UNDERLAY and MULTICAST_TAG_UNDERLAY_EXTERNAL + // packets that were not decapped for this egress port. underlay_mcast_ctr.count(eg_intr_md.egress_port); + } else { + // Decapped external multicast going to front panel ports. + // Either originally tagged EXTERNAL, or UNDERLAY_EXTERNAL + // packets that were decapped based on port bitmap. + external_mcast_ctr.count(eg_intr_md.egress_port); } } else { // non-multicast packets should bypass the egress diff --git a/dpd/src/counters.rs b/dpd/src/counters.rs index dfad527..d10f34a 100644 --- a/dpd/src/counters.rs +++ b/dpd/src/counters.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/ // -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company /// This module contains the support for reading the indirect counters defined /// by the p4 program. While direct counters are attached to an existing table, @@ -284,6 +284,7 @@ enum DropReason { GeneveOptionsTooLong, GeneveOptionMalformed, GeneveOptionUnknown, + VlanMismatch, } impl TryFrom for DropReason { @@ -317,6 +318,7 @@ impl TryFrom for DropReason { 23 => Ok(DropReason::GeneveOptionsTooLong), 24 => Ok(DropReason::GeneveOptionMalformed), 25 => Ok(DropReason::GeneveOptionUnknown), + 26 => Ok(DropReason::VlanMismatch), x => Err(format!("Unrecognized drop reason: {x}")), } } @@ -343,8 +345,8 @@ fn reason_label(ctr: u8) -> Result, String> { DropReason::Ipv4TtlExceeded => "ipv4_ttl_exceeded".to_string(), DropReason::Ipv6TtlInvalid => "ipv6_ttl_invalid".to_string(), DropReason::Ipv6TtlExceeded => "ipv6_ttl_exceeded".to_string(), - DropReason::Ipv4Unrouteable => "ipv6_unrouteable".to_string(), - DropReason::Ipv6Unrouteable => "ipv4_unrouteable".to_string(), + DropReason::Ipv4Unrouteable => "ipv4_unrouteable".to_string(), + DropReason::Ipv6Unrouteable => "ipv6_unrouteable".to_string(), DropReason::NatIngressMiss => "nat_ingress_miss".to_string(), DropReason::MulticastNoGroup => "multicast_no_group".to_string(), DropReason::MulticastInvalidMac => "multicast_invalid_mac".to_string(), @@ -362,6 +364,7 @@ fn reason_label(ctr: u8) -> Result, String> { "geneve_option_malformed".to_string() } DropReason::GeneveOptionUnknown => "geneve_option_unknown".to_string(), + DropReason::VlanMismatch => "vlan_mismatch".to_string(), }; Ok(Some(label)) } diff --git a/dpd/src/mcast/mod.rs b/dpd/src/mcast/mod.rs index eaf7f5c..e2f88a2 100644 --- a/dpd/src/mcast/mod.rs +++ b/dpd/src/mcast/mod.rs @@ -354,6 +354,7 @@ pub(crate) fn add_group_external( scoped_underlay_id.id(), nat_target, group_info.sources.as_deref(), + group_info.external_forwarding.vlan_id, ); // Configure external tables and handle VLAN propagation @@ -664,8 +665,12 @@ pub(crate) fn modify_group_external( // Create rollback context for external group update let group_entry_for_rollback = group_entry.clone(); - let rollback_ctx = - GroupUpdateRollbackContext::new(s, group_ip, &group_entry_for_rollback); + let rollback_ctx = GroupUpdateRollbackContext::new( + s, + group_ip, + &group_entry_for_rollback, + new_group_info.external_forwarding.vlan_id, + ); // Pre-compute normalized sources for rollback purposes let normalized_sources = normalize_sources(new_group_info.sources.clone()); @@ -701,10 +706,9 @@ pub(crate) fn modify_group_external( updated_group.int_fwding.nat_target = Some(nat_target); let old_vlan_id = updated_group.ext_fwding.vlan_id; - updated_group.ext_fwding.vlan_id = new_group_info - .external_forwarding - .vlan_id - .or(updated_group.ext_fwding.vlan_id); + // VLAN is assigned directly -> Some(x) sets VLAN, None removes VLAN + updated_group.ext_fwding.vlan_id = + new_group_info.external_forwarding.vlan_id; updated_group.sources = normalize_sources( new_group_info.sources.clone().or(updated_group.sources), ); @@ -797,9 +801,11 @@ pub(crate) fn modify_group_internal( s, group_ip.into(), &group_entry_for_rollback, + // Internal groups don't have VLANs, so `attempted_vlan_id` is None. + None, ); - // Internal groups don't update sources - always `None` + // Internal groups don't update sources, so always `None` debug_assert!( group_entry.sources.is_none(), "Internal groups should not have sources" @@ -1190,7 +1196,7 @@ pub(super) fn sources_contain_any(sources: &[IpSrc]) -> bool { /// This ensures uniqueness across the group's lifecycle and prevents /// tag collision when group IPs are reused after deletion. fn generate_default_tag(group_ip: IpAddr) -> String { - format!("{}:{}", Uuid::new_v4(), group_ip) + format!("{}:{group_ip}", Uuid::new_v4()) } /// Multiple representations map to "allow any source" in P4: @@ -1365,12 +1371,17 @@ fn configure_external_tables( add_source_filters(s, group_ip, group_info.sources.as_deref())?; // Add NAT entry + let vlan_id = group_info.external_forwarding.vlan_id; match group_ip { IpAddr::V4(ipv4) => { - table::mcast::mcast_nat::add_ipv4_entry(s, ipv4, nat_target)?; + table::mcast::mcast_nat::add_ipv4_entry( + s, ipv4, nat_target, vlan_id, + )?; } IpAddr::V6(ipv6) => { - table::mcast::mcast_nat::add_ipv6_entry(s, ipv6, nat_target)?; + table::mcast::mcast_nat::add_ipv6_entry( + s, ipv6, nat_target, vlan_id, + )?; } } @@ -1700,46 +1711,48 @@ fn update_external_tables( add_source_filters(s, group_ip, new_sources.as_deref())?; } + let old_vlan_id = group_entry.ext_fwding.vlan_id; + let new_vlan_id = new_group_info.external_forwarding.vlan_id; + // Update NAT target - external groups always have NAT targets - if Some( - new_group_info - .internal_forwarding - .nat_target - .ok_or_else(|| { - DpdError::Invalid( - "external groups must have NAT target".to_string(), - ) - })?, - ) != group_entry.int_fwding.nat_target + // Also handles VLAN changes since VLAN is part of the NAT match key + let new_nat_target = new_group_info + .internal_forwarding + .nat_target + .ok_or_else(|| { + DpdError::Invalid( + "external groups must have NAT target".to_string(), + ) + })?; + + if Some(new_nat_target) != group_entry.int_fwding.nat_target + || old_vlan_id != new_vlan_id { update_nat_tables( s, group_ip, - Some(new_group_info.internal_forwarding.nat_target.ok_or_else( - || { - DpdError::Invalid( - "external groups must have NAT target".to_string(), - ) - }, - )?), + Some(new_nat_target), group_entry.int_fwding.nat_target, + old_vlan_id, + new_vlan_id, )?; } - // Update VLAN if it changed - if new_group_info.external_forwarding.vlan_id - != group_entry.ext_fwding.vlan_id - { + // Update route table VLAN if it changed + // Route tables use simple dst_addr matching but select forward vs forward_vlan action + if old_vlan_id != new_vlan_id { match group_ip { IpAddr::V4(ipv4) => table::mcast::mcast_route::update_ipv4_entry( s, ipv4, - new_group_info.external_forwarding.vlan_id, + old_vlan_id, + new_vlan_id, ), IpAddr::V6(ipv6) => table::mcast::mcast_route::update_ipv6_entry( s, ipv6, - new_group_info.external_forwarding.vlan_id, + old_vlan_id, + new_vlan_id, ), }?; } @@ -1830,7 +1843,11 @@ fn delete_group_tables( // (which have NAT targets). if group.int_fwding.nat_target.is_some() { remove_ipv4_source_filters(s, ipv4, group.sources.as_deref())?; - table::mcast::mcast_nat::del_ipv4_entry(s, ipv4)?; + table::mcast::mcast_nat::del_ipv4_entry( + s, + ipv4, + group.ext_fwding.vlan_id, + )?; } delete_group_bitmap_entries(s, group)?; @@ -1844,7 +1861,11 @@ fn delete_group_tables( // NAT targets). Internal groups don't have source filtering. if group.int_fwding.nat_target.is_some() { remove_ipv6_source_filters(s, ipv6, group.sources.as_deref())?; - table::mcast::mcast_nat::del_ipv6_entry(s, ipv6)?; + table::mcast::mcast_nat::del_ipv6_entry( + s, + ipv6, + group.ext_fwding.vlan_id, + )?; } delete_group_bitmap_entries(s, group)?; @@ -1882,31 +1903,47 @@ fn update_nat_tables( group_ip: IpAddr, new_nat_target: Option, old_nat_target: Option, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { match (group_ip, new_nat_target, old_nat_target) { - (IpAddr::V4(ipv4), Some(nat), Some(_)) => { - // NAT to NAT - update existing entry - table::mcast::mcast_nat::update_ipv4_entry(s, ipv4, nat) + (IpAddr::V4(ipv4), Some(new_nat), Some(old_nat)) => { + // NAT to NAT - update existing entry (handles VLAN changes internally) + table::mcast::mcast_nat::update_ipv4_entry( + s, + ipv4, + new_nat, + old_nat, + old_vlan_id, + new_vlan_id, + ) } - (IpAddr::V6(ipv6), Some(nat), Some(_)) => { - // NAT to NAT - update existing entry - table::mcast::mcast_nat::update_ipv6_entry(s, ipv6, nat) + (IpAddr::V6(ipv6), Some(new_nat), Some(old_nat)) => { + // NAT to NAT - update existing entry (handles VLAN changes internally) + table::mcast::mcast_nat::update_ipv6_entry( + s, + ipv6, + new_nat, + old_nat, + old_vlan_id, + new_vlan_id, + ) } (IpAddr::V4(ipv4), Some(nat), None) => { // No NAT to NAT - add new entry - table::mcast::mcast_nat::add_ipv4_entry(s, ipv4, nat) + table::mcast::mcast_nat::add_ipv4_entry(s, ipv4, nat, new_vlan_id) } (IpAddr::V6(ipv6), Some(nat), None) => { // No NAT to NAT - add new entry - table::mcast::mcast_nat::add_ipv6_entry(s, ipv6, nat) + table::mcast::mcast_nat::add_ipv6_entry(s, ipv6, nat, new_vlan_id) } (IpAddr::V4(ipv4), None, Some(_)) => { // NAT to no NAT - delete entry - table::mcast::mcast_nat::del_ipv4_entry(s, ipv4) + table::mcast::mcast_nat::del_ipv4_entry(s, ipv4, old_vlan_id) } (IpAddr::V6(ipv6), None, Some(_)) => { // NAT to no NAT - delete entry - table::mcast::mcast_nat::del_ipv6_entry(s, ipv6) + table::mcast::mcast_nat::del_ipv6_entry(s, ipv6, old_vlan_id) } _ => Ok(()), // No change (None → None) } @@ -1953,33 +1990,49 @@ fn update_internal_group_bitmap_tables( /// Only updates the external bitmap entry since that's the only bitmap /// entry created during group configuration. The underlay replication /// is handled separately via the ASIC's multicast group primitives. +/// +/// # Arguments +/// +/// * `s` - Switch instance for table operations. +/// * `group_ip` - IP address of the multicast group. +/// * `external_group_id` - ID of the external multicast group for bitmap updates. +/// * `members` - Group members used to recreate port bitmap. +/// * `current_vlan_id` - VLAN currently in the table (may be the attempted new VLAN). +/// * `target_vlan_id` - VLAN to restore to (the original VLAN). fn update_fwding_tables( s: &Switch, group_ip: IpAddr, external_group_id: MulticastGroupId, members: &[MulticastGroupMember], - vlan_id: Option, + current_vlan_id: Option, + target_vlan_id: Option, ) -> DpdResult<()> { match group_ip { - IpAddr::V4(ipv4) => { - table::mcast::mcast_route::update_ipv4_entry(s, ipv4, vlan_id) - } - IpAddr::V6(ipv6) => { - table::mcast::mcast_route::update_ipv6_entry(s, ipv6, vlan_id) - .and_then(|_| { - // Update external bitmap for external members - // (only external bitmap entries exist; underlay replication - // uses ASIC multicast groups directly) - let external_port_bitmap = - create_port_bitmap(members, Direction::External); - table::mcast::mcast_egress::update_bitmap_entry( - s, - external_group_id, - &external_port_bitmap, - vlan_id, - ) - }) - } + IpAddr::V4(ipv4) => table::mcast::mcast_route::update_ipv4_entry( + s, + ipv4, + current_vlan_id, + target_vlan_id, + ), + IpAddr::V6(ipv6) => table::mcast::mcast_route::update_ipv6_entry( + s, + ipv6, + current_vlan_id, + target_vlan_id, + ) + .and_then(|_| { + // Update external bitmap for external members + // (only external bitmap entries exist, underlay replication + // uses ASIC multicast groups directly) + let external_port_bitmap = + create_port_bitmap(members, Direction::External); + table::mcast::mcast_egress::update_bitmap_entry( + s, + external_group_id, + &external_port_bitmap, + target_vlan_id, + ) + }), } } diff --git a/dpd/src/mcast/rollback.rs b/dpd/src/mcast/rollback.rs index 37f727f..ac63827 100644 --- a/dpd/src/mcast/rollback.rs +++ b/dpd/src/mcast/rollback.rs @@ -57,12 +57,12 @@ trait RollbackOps { ) -> DpdResult<()> { self.log_rollback_error( "remove new source filters", - &format!("for group {}", self.group_ip()), + &format!("for group {group_ip}", group_ip = self.group_ip()), remove_source_filters(self.switch(), self.group_ip(), new_sources), ); self.log_rollback_error( "restore original source filters", - &format!("for group {}", self.group_ip()), + &format!("for group {group_ip}", group_ip = self.group_ip()), add_source_filters(self.switch(), self.group_ip(), orig_sources), ); Ok(()) @@ -128,6 +128,7 @@ pub(crate) struct GroupCreateRollbackContext<'a> { underlay_id: MulticastGroupId, nat_target: Option, sources: Option<&'a [IpSrc]>, + vlan_id: Option, } impl RollbackOps for GroupCreateRollbackContext<'_> { @@ -253,6 +254,7 @@ impl<'a> GroupCreateRollbackContext<'a> { underlay_id: MulticastGroupId, nat_target: NatTarget, sources: Option<&'a [IpSrc]>, + vlan_id: Option, ) -> Self { Self { switch, @@ -261,6 +263,7 @@ impl<'a> GroupCreateRollbackContext<'a> { underlay_id, nat_target: Some(nat_target), sources, + vlan_id, } } @@ -278,6 +281,7 @@ impl<'a> GroupCreateRollbackContext<'a> { underlay_id, nat_target: None, sources: None, + vlan_id: None, } } @@ -343,16 +347,18 @@ impl<'a> GroupCreateRollbackContext<'a> { self.log_rollback_error( "remove external multicast group", &format!( - "for IP {} with ID {}", - self.group_ip, self.external_id + "for IP {group_ip} with ID {external_id}", + group_ip = self.group_ip, + external_id = self.external_id ), self.switch.asic_hdl.mc_group_destroy(self.external_id), ); self.log_rollback_error( "remove underlay multicast group", &format!( - "for IP {} with ID {}", - self.group_ip, self.underlay_id + "for IP {group_ip} with ID {underlay_id}", + group_ip = self.group_ip, + underlay_id = self.underlay_id ), self.switch.asic_hdl.mc_group_destroy(self.underlay_id), ); @@ -417,6 +423,7 @@ impl<'a> GroupCreateRollbackContext<'a> { table::mcast::mcast_nat::del_ipv4_entry( self.switch, ipv4, + self.vlan_id, ), ); } @@ -434,7 +441,10 @@ impl<'a> GroupCreateRollbackContext<'a> { // (bitmap entries are only created for internal groups with both group types) self.log_rollback_error( "delete IPv6 egress bitmap entry", - &format!("for external group {}", self.external_id), + &format!( + "for external group {external_id}", + external_id = self.external_id + ), table::mcast::mcast_egress::del_bitmap_entry( self.switch, self.external_id, @@ -507,6 +517,7 @@ impl<'a> GroupCreateRollbackContext<'a> { table::mcast::mcast_nat::del_ipv6_entry( self.switch, ipv6, + self.vlan_id, ), ); } @@ -529,6 +540,10 @@ pub(crate) struct GroupUpdateRollbackContext<'a> { switch: &'a Switch, group_ip: IpAddr, original_group: &'a MulticastGroup, + /// The VLAN that the update is attempting to set. This may differ from + /// `original_group.ext_fwding.vlan_id` when a VLAN change is in progress. + /// Used during rollback to delete entries that may have been added. + attempted_vlan_id: Option, } impl RollbackOps for GroupUpdateRollbackContext<'_> { @@ -559,7 +574,7 @@ impl RollbackOps for GroupUpdateRollbackContext<'_> { return Ok(()); } - // Internal group - perform actual port rollback + // Internal group -> perform actual port rollback debug!( self.switch.log, "rolling back multicast group update"; @@ -707,15 +722,25 @@ impl RollbackOps for GroupUpdateRollbackContext<'_> { impl<'a> GroupUpdateRollbackContext<'a> { /// Create rollback context for group update operations. + /// + /// # Arguments + /// + /// * `switch` - Switch instance for table operations. + /// * `group_ip` - IP address of the multicast group being updated. + /// * `original_group` - Reference to the group's state before the update. + /// * `attempted_vlan_id` - The VLAN that the update is attempting to set. + /// Used during rollback to delete entries that may have been added. pub(crate) fn new( switch: &'a Switch, group_ip: IpAddr, original_group: &'a MulticastGroup, + attempted_vlan_id: Option, ) -> Self { Self { switch, group_ip, original_group, + attempted_vlan_id, } } @@ -744,7 +769,7 @@ impl<'a> GroupUpdateRollbackContext<'a> { if let Some(replication_info) = replication_info { self.log_rollback_error( "restore replication settings", - &format!("for group {}", self.group_ip), + &format!("for group {group_ip}", group_ip = self.group_ip), update_replication_tables( self.switch, self.group_ip, @@ -755,54 +780,57 @@ impl<'a> GroupUpdateRollbackContext<'a> { ); } - // Restore NAT settings - match (self.group_ip, nat_target) { - (IpAddr::V4(ipv4), Some(nat)) => { - self.log_rollback_error( - "restore IPv4 NAT settings", - &format!("for group {}", self.group_ip), - table::mcast::mcast_nat::update_ipv4_entry( - self.switch, - ipv4, - nat, - ), - ); - } - (IpAddr::V6(ipv6), Some(nat)) => { - self.log_rollback_error( - "restore IPv6 NAT settings", - &format!("for group {}", self.group_ip), - table::mcast::mcast_nat::update_ipv6_entry( - self.switch, - ipv6, - nat, - ), - ); - } - (IpAddr::V4(ipv4), None) => { - self.log_rollback_error( - "remove IPv4 NAT settings", - &format!("for group {}", self.group_ip), - table::mcast::mcast_nat::del_ipv4_entry(self.switch, ipv4), - ); - } - (IpAddr::V6(ipv6), None) => { - self.log_rollback_error( - "remove IPv6 NAT settings", - &format!("for group {}", self.group_ip), - table::mcast::mcast_nat::del_ipv6_entry(self.switch, ipv6), - ); + // Restore NAT settings for external groups (internal groups have no + // NAT entries). Both new_tgt and old_tgt are the same original NAT + // target since we're restoring to the original state. + if let Some(nat) = nat_target { + match self.group_ip { + IpAddr::V4(ipv4) => { + self.log_rollback_error( + "restore IPv4 NAT settings", + &format!( + "for group {group_ip}", + group_ip = self.group_ip + ), + table::mcast::mcast_nat::update_ipv4_entry( + self.switch, + ipv4, + nat, + nat, + self.attempted_vlan_id, + vlan_id, + ), + ); + } + IpAddr::V6(ipv6) => { + self.log_rollback_error( + "restore IPv6 NAT settings", + &format!( + "for group {group_ip}", + group_ip = self.group_ip + ), + table::mcast::mcast_nat::update_ipv6_entry( + self.switch, + ipv6, + nat, + nat, + self.attempted_vlan_id, + vlan_id, + ), + ); + } } } self.log_rollback_error( "restore VLAN settings", - &format!("for group {}", self.group_ip), + &format!("for group {group_ip}", group_ip = self.group_ip), update_fwding_tables( self.switch, self.group_ip, external_group_id, &prev_members, + self.attempted_vlan_id, vlan_id, ), ); diff --git a/dpd/src/mcast/validate.rs b/dpd/src/mcast/validate.rs index 0dbdd4c..f5d94d6 100644 --- a/dpd/src/mcast/validate.rs +++ b/dpd/src/mcast/validate.rs @@ -52,16 +52,16 @@ pub(crate) fn validate_multicast_address( pub(crate) fn validate_nat_target(nat_target: NatTarget) -> DpdResult<()> { if !nat_target.inner_mac.is_multicast() { return Err(DpdError::Invalid(format!( - "NAT target inner MAC address {} is not a multicast MAC address", - nat_target.inner_mac + "NAT target inner MAC address {inner_mac} is not a multicast MAC address", + inner_mac = nat_target.inner_mac ))); } if !UNDERLAY_MULTICAST_SUBNET.contains(nat_target.internal_ip) { return Err(DpdError::Invalid(format!( - "NAT target internal IP address {} is not in the reserved \ + "NAT target internal IP address {internal_ip} is not in the reserved \ underlay multicast subnet (ff04::/64)", - nat_target.internal_ip + internal_ip = nat_target.internal_ip ))); } diff --git a/dpd/src/table/mcast/mcast_nat.rs b/dpd/src/table/mcast/mcast_nat.rs index 36fe0fa..280edd8 100644 --- a/dpd/src/table/mcast/mcast_nat.rs +++ b/dpd/src/table/mcast/mcast_nat.rs @@ -10,8 +10,7 @@ use std::net::{Ipv4Addr, Ipv6Addr}; use crate::{Switch, table::*}; -use super::{Ipv4MatchKey, Ipv6MatchKey}; - +use super::{Ipv4VlanMatchKey, Ipv6VlanMatchKey}; use aal::ActionParse; use aal_macros::*; use common::network::{MacAddr, NatTarget}; @@ -44,64 +43,246 @@ enum Ipv6Action { }, } -/// Add a NAT entry for IPv4 multicast traffic, keyed on `ip`. +/// Add NAT entries for IPv4 multicast traffic. +/// +/// For groups with a VLAN, two entries are added: +/// 1. Untagged ingress match -> forward (for decapsulated Geneve packets) +/// 2. Correctly tagged ingress match -> forward (for already-tagged packets) +/// +/// This allows both packet types to match, while packets with the wrong VLAN +/// will miss both entries and not be NAT encapsulated. pub(crate) fn add_ipv4_entry( s: &Switch, ip: Ipv4Addr, tgt: NatTarget, + vlan_id: Option, ) -> DpdResult<()> { - let match_key = Ipv4MatchKey::new(ip); let action_key = Ipv4Action::Forward { target: tgt.internal_ip, inner_mac: tgt.inner_mac, vni: tgt.vni.as_u32(), }; - debug!( - s.log, - "add ingress mcast entry {} -> {:?}", match_key, action_key - ); - - s.table_entry_add(TableType::NatIngressIpv4Mcast, &match_key, &action_key) + match vlan_id { + None => { + // Untagged only + let match_key = Ipv4VlanMatchKey::new(ip, None); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", match_key, action_key + ); + s.table_entry_add( + TableType::NatIngressIpv4Mcast, + &match_key, + &action_key, + ) + } + Some(vid) => { + common::network::validate_vlan(vid)?; + + // Untagged entry + let match_key_untagged = Ipv4VlanMatchKey::new(ip, None); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", + match_key_untagged, + action_key + ); + s.table_entry_add( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + &action_key, + )?; + + // Tagged entry + let match_key_tagged = Ipv4VlanMatchKey::new(ip, Some(vid)); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", + match_key_tagged, + action_key + ); + if let Err(e) = s.table_entry_add( + TableType::NatIngressIpv4Mcast, + &match_key_tagged, + &action_key, + ) { + // Rollback untagged entry + debug!(s.log, "rollback: removing untagged entry"); + let _ = s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + ); + return Err(e); + } + Ok(()) + } + } } -/// Update a NAT entry for IPv4 multicast traffic. +/// Update NAT entries for IPv4 multicast traffic. +/// +/// When VLAN changes, old entries are deleted and new ones added because +/// the VLAN is part of the match key and cannot be updated in place. +/// +/// # Arguments +/// +/// * `new_tgt` - NAT target for the new entries. +/// * `old_tgt` - NAT target for restoring entries on failure. Required when +/// VLAN changes since entries must be deleted and re-added. pub(crate) fn update_ipv4_entry( s: &Switch, ip: Ipv4Addr, - tgt: NatTarget, + new_tgt: NatTarget, + old_tgt: NatTarget, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { - let match_key = Ipv4MatchKey::new(ip); - let action_key = Ipv4Action::Forward { - target: tgt.internal_ip, - inner_mac: tgt.inner_mac, - vni: tgt.vni.as_u32(), - }; - - debug!( - s.log, - "update ingress mcast entry {} -> {:?}", match_key, action_key - ); - - s.table_entry_update( - TableType::NatIngressIpv4Mcast, - &match_key, - &action_key, - ) + if old_vlan_id == new_vlan_id { + let action_key = Ipv4Action::Forward { + target: new_tgt.internal_ip, + inner_mac: new_tgt.inner_mac, + vni: new_tgt.vni.as_u32(), + }; + + let match_key_untagged = Ipv4VlanMatchKey::new(ip, None); + debug!( + s.log, + "update ingress mcast entry {} -> {:?}", + match_key_untagged, + action_key + ); + s.table_entry_update( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + &action_key, + )?; + + if let Some(vid) = old_vlan_id { + let match_key_tagged = Ipv4VlanMatchKey::new(ip, Some(vid)); + debug!( + s.log, + "update ingress mcast entry {} -> {:?}", + match_key_tagged, + action_key + ); + s.table_entry_update( + TableType::NatIngressIpv4Mcast, + &match_key_tagged, + &action_key, + )?; + } + return Ok(()); + } + + del_ipv4_entry_with_tgt(s, ip, old_tgt, old_vlan_id)?; + if let Err(e) = add_ipv4_entry(s, ip, new_tgt, new_vlan_id) { + // Restore deleted entries with old target + debug!(s.log, "add failed, restoring old NAT entries for {ip}"); + let _ = add_ipv4_entry(s, ip, old_tgt, old_vlan_id); + return Err(e); + } + Ok(()) } -/// Delete a NAT entry for IPv4 multicast traffic, keyed on `ip`. -pub(crate) fn del_ipv4_entry(s: &Switch, ip: Ipv4Addr) -> DpdResult<()> { - let match_key = Ipv4MatchKey::new(ip); - - debug!(s.log, "delete ingress mcast entry {}", match_key); +/// Delete NAT entries for IPv4 multicast traffic. +/// +/// Deletes both entries for VLAN groups (see `add_ipv4_entry` for details). +/// This version does not support rollback on partial failure. +pub(crate) fn del_ipv4_entry( + s: &Switch, + ip: Ipv4Addr, + vlan_id: Option, +) -> DpdResult<()> { + match vlan_id { + None => { + let match_key = Ipv4VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key); + s.table_entry_del(TableType::NatIngressIpv4Mcast, &match_key) + } + Some(vid) => { + // Untagged + let match_key_untagged = Ipv4VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key_untagged); + s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + )?; + + // Tagged + let match_key_tagged = Ipv4VlanMatchKey::new(ip, Some(vid)); + debug!(s.log, "delete ingress mcast entry {}", match_key_tagged); + if let Err(e) = s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_tagged, + ) { + // Can't rollback without original action + debug!(s.log, "rollback not possible for untagged entry"); + return Err(e); + } + Ok(()) + } + } +} - s.table_entry_del(TableType::NatIngressIpv4Mcast, &match_key) +/// Delete NAT entries for IPv4 multicast traffic with rollback support. +/// +/// Deletes both entries for VLAN groups. If the tagged entry deletion fails +/// after the untagged entry was deleted, attempts to restore the untagged +/// entry using the provided NAT target. +pub(crate) fn del_ipv4_entry_with_tgt( + s: &Switch, + ip: Ipv4Addr, + tgt: NatTarget, + vlan_id: Option, +) -> DpdResult<()> { + match vlan_id { + None => { + let match_key = Ipv4VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key); + s.table_entry_del(TableType::NatIngressIpv4Mcast, &match_key) + } + Some(vid) => { + // Delete untagged first + let match_key_untagged = Ipv4VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key_untagged); + s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + )?; + + // Delete tagged + let match_key_tagged = Ipv4VlanMatchKey::new(ip, Some(vid)); + debug!(s.log, "delete ingress mcast entry {}", match_key_tagged); + if let Err(e) = s.table_entry_del( + TableType::NatIngressIpv4Mcast, + &match_key_tagged, + ) { + // Rollback: restore the untagged entry + debug!( + s.log, + "tagged delete failed, restoring untagged entry for {ip}" + ); + let action_key = Ipv4Action::Forward { + target: tgt.internal_ip, + inner_mac: tgt.inner_mac, + vni: tgt.vni.as_u32(), + }; + let _ = s.table_entry_add( + TableType::NatIngressIpv4Mcast, + &match_key_untagged, + &action_key, + ); + return Err(e); + } + Ok(()) + } + } } /// Dump the IPv4 NAT table's contents. pub(crate) fn ipv4_table_dump(s: &Switch) -> DpdResult { - s.table_dump::(TableType::NatIngressIpv4Mcast) + s.table_dump::(TableType::NatIngressIpv4Mcast) } /// Fetch the IPv4 NAT table's counters. @@ -109,7 +290,10 @@ pub(crate) fn ipv4_counter_fetch( s: &Switch, force_sync: bool, ) -> DpdResult> { - s.counter_fetch::(force_sync, TableType::NatIngressIpv4Mcast) + s.counter_fetch::( + force_sync, + TableType::NatIngressIpv4Mcast, + ) } /// Reset the Ipv4 NAT table. @@ -117,64 +301,246 @@ pub(crate) fn reset_ipv4(s: &Switch) -> DpdResult<()> { s.table_clear(TableType::NatIngressIpv4Mcast) } -/// Add a NAT entry for IPv6 multicast traffic, keyed on `ip`. +/// Add NAT entries for IPv6 multicast traffic. +/// +/// For groups with a VLAN, two entries are added: +/// 1. Untagged ingress match -> forward (for decapsulated Geneve packets) +/// 2. Correctly tagged ingress match -> forward (for already-tagged packets) +/// +/// This allows both packet types to match, while packets with the wrong VLAN +/// will miss both entries and not be NAT encapsulated. pub(crate) fn add_ipv6_entry( s: &Switch, ip: Ipv6Addr, tgt: NatTarget, + vlan_id: Option, ) -> DpdResult<()> { - let match_key = Ipv6MatchKey::new(ip); let action_key = Ipv6Action::Forward { target: tgt.internal_ip, inner_mac: tgt.inner_mac, vni: tgt.vni.as_u32(), }; - debug!( - s.log, - "add ingress mcast entry {} -> {:?}", match_key, action_key - ); - - s.table_entry_add(TableType::NatIngressIpv6Mcast, &match_key, &action_key) + match vlan_id { + None => { + // Untagged only + let match_key = Ipv6VlanMatchKey::new(ip, None); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", match_key, action_key + ); + s.table_entry_add( + TableType::NatIngressIpv6Mcast, + &match_key, + &action_key, + ) + } + Some(vid) => { + common::network::validate_vlan(vid)?; + + // Untagged entry + let match_key_untagged = Ipv6VlanMatchKey::new(ip, None); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", + match_key_untagged, + action_key + ); + s.table_entry_add( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + &action_key, + )?; + + // Tagged entry + let match_key_tagged = Ipv6VlanMatchKey::new(ip, Some(vid)); + debug!( + s.log, + "add ingress mcast entry {} -> {:?}", + match_key_tagged, + action_key + ); + if let Err(e) = s.table_entry_add( + TableType::NatIngressIpv6Mcast, + &match_key_tagged, + &action_key, + ) { + // Rollback untagged entry + debug!(s.log, "rollback: removing untagged entry"); + let _ = s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + ); + return Err(e); + } + Ok(()) + } + } } -/// Update a NAT entry for IPv6 multicast traffic. +/// Update NAT entries for IPv6 multicast traffic. +/// +/// When VLAN changes, old entries are deleted and new ones added because +/// the VLAN is part of the match key and cannot be updated in place. +/// +/// # Arguments +/// +/// * `new_tgt` - NAT target for the new entries. +/// * `old_tgt` - NAT target for restoring entries on failure. Required when +/// VLAN changes since entries must be deleted and re-added. pub(crate) fn update_ipv6_entry( s: &Switch, ip: Ipv6Addr, - tgt: NatTarget, + new_tgt: NatTarget, + old_tgt: NatTarget, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { - let match_key = Ipv6MatchKey::new(ip); - let action_key = Ipv6Action::Forward { - target: tgt.internal_ip, - inner_mac: tgt.inner_mac, - vni: tgt.vni.as_u32(), - }; - - debug!( - s.log, - "update ingress mcast entry {} -> {:?}", match_key, action_key - ); - - s.table_entry_update( - TableType::NatIngressIpv6Mcast, - &match_key, - &action_key, - ) + if old_vlan_id == new_vlan_id { + let action_key = Ipv6Action::Forward { + target: new_tgt.internal_ip, + inner_mac: new_tgt.inner_mac, + vni: new_tgt.vni.as_u32(), + }; + + let match_key_untagged = Ipv6VlanMatchKey::new(ip, None); + debug!( + s.log, + "update ingress mcast entry {} -> {:?}", + match_key_untagged, + action_key + ); + s.table_entry_update( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + &action_key, + )?; + + if let Some(vid) = old_vlan_id { + let match_key_tagged = Ipv6VlanMatchKey::new(ip, Some(vid)); + debug!( + s.log, + "update ingress mcast entry {} -> {:?}", + match_key_tagged, + action_key + ); + s.table_entry_update( + TableType::NatIngressIpv6Mcast, + &match_key_tagged, + &action_key, + )?; + } + return Ok(()); + } + + del_ipv6_entry_with_tgt(s, ip, old_tgt, old_vlan_id)?; + if let Err(e) = add_ipv6_entry(s, ip, new_tgt, new_vlan_id) { + // Restore deleted entries with old target + debug!(s.log, "add failed, restoring old NAT entries for {ip}"); + let _ = add_ipv6_entry(s, ip, old_tgt, old_vlan_id); + return Err(e); + } + Ok(()) } -/// Delete a NAT entry for IPv6 multicast traffic, keyed on `ip`. -pub(crate) fn del_ipv6_entry(s: &Switch, ip: Ipv6Addr) -> DpdResult<()> { - let match_key = Ipv6MatchKey::new(ip); - - debug!(s.log, "delete ingress mcast entry {}", match_key); +/// Delete NAT entries for IPv6 multicast traffic. +/// +/// Deletes both entries for VLAN groups (see `add_ipv6_entry` for details). +/// This version does not support rollback on partial failure. +pub(crate) fn del_ipv6_entry( + s: &Switch, + ip: Ipv6Addr, + vlan_id: Option, +) -> DpdResult<()> { + match vlan_id { + None => { + let match_key = Ipv6VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key); + s.table_entry_del(TableType::NatIngressIpv6Mcast, &match_key) + } + Some(vid) => { + // Untagged + let match_key_untagged = Ipv6VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key_untagged); + s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + )?; + + // Tagged + let match_key_tagged = Ipv6VlanMatchKey::new(ip, Some(vid)); + debug!(s.log, "delete ingress mcast entry {}", match_key_tagged); + if let Err(e) = s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_tagged, + ) { + // Can't rollback without original action + debug!(s.log, "rollback not possible for untagged entry"); + return Err(e); + } + Ok(()) + } + } +} - s.table_entry_del(TableType::NatIngressIpv6Mcast, &match_key) +/// Delete NAT entries for IPv6 multicast traffic with rollback support. +/// +/// Deletes both entries for VLAN groups. If the tagged entry deletion fails +/// after the untagged entry was deleted, attempts to restore the untagged +/// entry using the provided NAT target. +pub(crate) fn del_ipv6_entry_with_tgt( + s: &Switch, + ip: Ipv6Addr, + tgt: NatTarget, + vlan_id: Option, +) -> DpdResult<()> { + match vlan_id { + None => { + let match_key = Ipv6VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key); + s.table_entry_del(TableType::NatIngressIpv6Mcast, &match_key) + } + Some(vid) => { + // Delete untagged first + let match_key_untagged = Ipv6VlanMatchKey::new(ip, None); + debug!(s.log, "delete ingress mcast entry {}", match_key_untagged); + s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + )?; + + // Delete tagged + let match_key_tagged = Ipv6VlanMatchKey::new(ip, Some(vid)); + debug!(s.log, "delete ingress mcast entry {}", match_key_tagged); + if let Err(e) = s.table_entry_del( + TableType::NatIngressIpv6Mcast, + &match_key_tagged, + ) { + // Rollback: restore the untagged entry + debug!( + s.log, + "tagged delete failed, restoring untagged entry for {ip}" + ); + let action_key = Ipv6Action::Forward { + target: tgt.internal_ip, + inner_mac: tgt.inner_mac, + vni: tgt.vni.as_u32(), + }; + let _ = s.table_entry_add( + TableType::NatIngressIpv6Mcast, + &match_key_untagged, + &action_key, + ); + return Err(e); + } + Ok(()) + } + } } /// Dump the IPv6 NAT table's contents. pub(crate) fn ipv6_table_dump(s: &Switch) -> DpdResult { - s.table_dump::(TableType::NatIngressIpv6Mcast) + s.table_dump::(TableType::NatIngressIpv6Mcast) } /// Fetch the IPv6 NAT table's counters. @@ -182,7 +548,10 @@ pub(crate) fn ipv6_counter_fetch( s: &Switch, force_sync: bool, ) -> DpdResult> { - s.counter_fetch::(force_sync, TableType::NatIngressIpv6Mcast) + s.counter_fetch::( + force_sync, + TableType::NatIngressIpv6Mcast, + ) } /// Reset the Ipv6 NAT table. diff --git a/dpd/src/table/mcast/mcast_route.rs b/dpd/src/table/mcast/mcast_route.rs index bd097e7..292b612 100644 --- a/dpd/src/table/mcast/mcast_route.rs +++ b/dpd/src/table/mcast/mcast_route.rs @@ -5,18 +5,24 @@ // Copyright 2026 Oxide Computer Company //! Table operations for multicast routing entries (on Ingress to the switch). +//! +//! Route tables match only on destination address and select the egress action: +//! - `forward`: Forward without VLAN modification +//! - `forward_vlan(vid)`: Add VLAN tag on egress +//! +//! VLAN-based access control (preventing VLAN translation) is handled by NAT +//! ingress tables before encapsulation, not by route tables. use std::net::{Ipv4Addr, Ipv6Addr}; -use crate::{Switch, table::*}; - -use super::{Ipv4MatchKey, Ipv6MatchKey}; - use aal::ActionParse; use aal_macros::*; use omicron_common::address::UNDERLAY_MULTICAST_SUBNET; use slog::debug; +use super::{Ipv4MatchKey, Ipv6MatchKey}; +use crate::{Switch, table::*}; + /// IPv4 Table for multicast routing entries. pub(crate) const IPV4_TABLE_NAME: &str = "pipe.Ingress.l3_router.MulticastRouter4.tbl"; @@ -40,61 +46,63 @@ enum Ipv6Action { ForwardVLAN { vlan_id: u16 }, } -/// Add an IPv4 multicast route entry to the routing table, keyed on -/// `route`, with an optional `vlan_id`. +/// Add an IPv4 multicast route entry. +/// +/// The action is selected based on VLAN configuration: +/// - No VLAN: `forward` (no VLAN modification on egress) +/// - With VLAN: `forward_vlan(vid)` (add VLAN tag on egress) pub(crate) fn add_ipv4_entry( s: &Switch, route: Ipv4Addr, vlan_id: Option, ) -> DpdResult<()> { let match_key = Ipv4MatchKey::new(route); - - let action_data = match vlan_id { + let action = match vlan_id { None => Ipv4Action::Forward, - Some(vlan_id) => { - common::network::validate_vlan(vlan_id)?; - Ipv4Action::ForwardVLAN { vlan_id } + Some(vid) => { + common::network::validate_vlan(vid)?; + Ipv4Action::ForwardVLAN { vlan_id: vid } } }; - debug!( - s.log, - "add multicast route entry {} -> {:?}", route, action_data - ); - - s.table_entry_add(TableType::RouteIpv4Mcast, &match_key, &action_data) + debug!(s.log, "add multicast route entry {match_key} -> {action:?}"); + s.table_entry_add(TableType::RouteIpv4Mcast, &match_key, &action) } -/// Update an IPv4 multicast route entry in the routing table. +/// Update an IPv4 multicast route entry. +/// +/// Updates the action when VLAN configuration changes. Since the match key +/// is just the destination address, this can be done in place. pub(crate) fn update_ipv4_entry( s: &Switch, route: Ipv4Addr, - vlan_id: Option, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { + if old_vlan_id == new_vlan_id { + return Ok(()); + } + let match_key = Ipv4MatchKey::new(route); - let action_data = match vlan_id { + let action = match new_vlan_id { None => Ipv4Action::Forward, - Some(vlan_id) => { - common::network::validate_vlan(vlan_id)?; - Ipv4Action::ForwardVLAN { vlan_id } + Some(vid) => { + common::network::validate_vlan(vid)?; + Ipv4Action::ForwardVLAN { vlan_id: vid } } }; debug!( s.log, - "update multicast route entry {} -> {:?}", route, action_data + "update multicast route entry {match_key} -> {action:?}" ); - - s.table_entry_update(TableType::RouteIpv4Mcast, &match_key, &action_data) + s.table_entry_update(TableType::RouteIpv4Mcast, &match_key, &action) } -/// Delete an IPv4 multicast route entry from table, keyed on -/// `route`. +/// Delete an IPv4 multicast route entry. pub(crate) fn del_ipv4_entry(s: &Switch, route: Ipv4Addr) -> DpdResult<()> { let match_key = Ipv4MatchKey::new(route); - - debug!(s.log, "delete multicast route entry {}", match_key); - + debug!(s.log, "delete multicast route entry {match_key}"); s.table_entry_del(TableType::RouteIpv4Mcast, &match_key) } @@ -116,8 +124,15 @@ pub(crate) fn reset_ipv4(s: &Switch) -> DpdResult<()> { s.table_clear(TableType::RouteIpv4Mcast) } -/// Add an IPv6 multicast route entry to the routing table, keyed on -/// `route`, with an optional `vlan_id`. +/// Add an IPv6 multicast route entry. +/// +/// The action is selected based on VLAN configuration: +/// - No VLAN: `forward` (no VLAN modification on egress) +/// - With VLAN: `forward_vlan(vid)` (add VLAN tag on egress) +/// +/// Reserved underlay multicast subnet (ff04::/64) is internal to the rack +/// and always uses Forward without VLAN tagging, regardless of the vlan_id +/// parameter. pub(crate) fn add_ipv6_entry( s: &Switch, route: Ipv6Addr, @@ -126,68 +141,64 @@ pub(crate) fn add_ipv6_entry( let match_key = Ipv6MatchKey::new(route); // Reserved underlay multicast subnet (ff04::/64) is internal to the rack - // and doesn't require VLAN tagging. Other admin-local addresses - // (e.g., ff04:0:0:1::/64) may be used by customer external groups and - // can receive VLAN tagging. - let action_data: Ipv6Action = if UNDERLAY_MULTICAST_SUBNET.contains(route) { + // and doesn't require VLAN tagging. + let action: Ipv6Action = if UNDERLAY_MULTICAST_SUBNET.contains(route) { Ipv6Action::Forward } else { match vlan_id { None => Ipv6Action::Forward, - Some(vlan_id) => { - common::network::validate_vlan(vlan_id)?; - Ipv6Action::ForwardVLAN { vlan_id } + Some(vid) => { + common::network::validate_vlan(vid)?; + Ipv6Action::ForwardVLAN { vlan_id: vid } } } }; - debug!( - s.log, - "add multicast route entry {} -> {:?}", route, action_data - ); - - s.table_entry_add(TableType::RouteIpv6Mcast, &match_key, &action_data) + debug!(s.log, "add multicast route entry {match_key} -> {action:?}"); + s.table_entry_add(TableType::RouteIpv6Mcast, &match_key, &action) } -/// Update an IPv6 multicast route entry in the routing table. +/// Update an IPv6 multicast route entry. +/// +/// Updates the action when VLAN configuration changes. Since the match key +/// is just the destination address, this can be done in place. pub(crate) fn update_ipv6_entry( s: &Switch, route: Ipv6Addr, - vlan_id: Option, + old_vlan_id: Option, + new_vlan_id: Option, ) -> DpdResult<()> { + if old_vlan_id == new_vlan_id { + return Ok(()); + } + let match_key = Ipv6MatchKey::new(route); // Reserved underlay multicast subnet (ff04::/64) is internal to the rack - // and doesn't require VLAN tagging. Other admin-local addresses - // (e.g., ff04:0:0:1::/64) may be used by customer external groups and - // can receive VLAN tagging. - let action_data: Ipv6Action = if UNDERLAY_MULTICAST_SUBNET.contains(route) { + // and doesn't require VLAN tagging. + let action: Ipv6Action = if UNDERLAY_MULTICAST_SUBNET.contains(route) { Ipv6Action::Forward } else { - match vlan_id { + match new_vlan_id { None => Ipv6Action::Forward, - Some(vlan_id) => { - common::network::validate_vlan(vlan_id)?; - Ipv6Action::ForwardVLAN { vlan_id } + Some(vid) => { + common::network::validate_vlan(vid)?; + Ipv6Action::ForwardVLAN { vlan_id: vid } } } }; debug!( s.log, - "update multicast route entry {} -> {:?}", route, action_data + "update multicast route entry {match_key} -> {action:?}" ); - - s.table_entry_update(TableType::RouteIpv6Mcast, &match_key, &action_data) + s.table_entry_update(TableType::RouteIpv6Mcast, &match_key, &action) } -/// Delete an IPv6 multicast entry from routing table, keyed on -/// `route`. +/// Delete an IPv6 multicast route entry. pub(crate) fn del_ipv6_entry(s: &Switch, route: Ipv6Addr) -> DpdResult<()> { let match_key = Ipv6MatchKey::new(route); - - debug!(s.log, "delete multicast route entry {}", match_key); - + debug!(s.log, "delete multicast route entry {match_key}"); s.table_entry_del(TableType::RouteIpv6Mcast, &match_key) } diff --git a/dpd/src/table/mcast/mod.rs b/dpd/src/table/mcast/mod.rs index ee3ff5b..a3b3f24 100644 --- a/dpd/src/table/mcast/mod.rs +++ b/dpd/src/table/mcast/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/ // -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! Multicast table operations. @@ -55,3 +55,85 @@ impl fmt::Display for Ipv6MatchKey { write!(f, "{}", self.dst_addr) } } + +/// VLAN-aware match key for NAT ingress tables. +/// +/// Matches on destination address, VLAN header validity, and VLAN ID to prevent +/// VLAN translation. For groups with a VLAN, two entries are installed (untagged +/// and tagged), packets with a mismatched VLAN miss both and are dropped rather +/// than being translated to another customer's VLAN. +#[derive(MatchParse, Hash)] +pub(super) struct Ipv4VlanMatchKey { + dst_addr: Ipv4Addr, + #[match_xlate(name = "$valid")] + vlan_valid: bool, + vlan_id: u16, +} + +impl Ipv4VlanMatchKey { + pub(super) fn new(dst_addr: Ipv4Addr, vlan_id: Option) -> Self { + match vlan_id { + Some(id) => Self { + dst_addr, + vlan_valid: true, + vlan_id: id, + }, + None => Self { + dst_addr, + vlan_valid: false, + vlan_id: 0, + }, + } + } +} + +impl fmt::Display for Ipv4VlanMatchKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.vlan_valid { + write!(f, "{} vlan={}", self.dst_addr, self.vlan_id) + } else { + write!(f, "{} untagged", self.dst_addr) + } + } +} + +/// VLAN-aware match key for NAT ingress tables. +/// +/// Matches on destination address, VLAN header validity, and VLAN ID to prevent +/// VLAN translation. For groups with a VLAN, two entries are installed (untagged +/// and tagged), packets with a mismatched VLAN miss both and are dropped rather +/// than being translated to another customer's VLAN. +#[derive(MatchParse, Hash)] +pub(super) struct Ipv6VlanMatchKey { + dst_addr: Ipv6Addr, + #[match_xlate(name = "$valid")] + vlan_valid: bool, + vlan_id: u16, +} + +impl Ipv6VlanMatchKey { + pub(super) fn new(dst_addr: Ipv6Addr, vlan_id: Option) -> Self { + match vlan_id { + Some(id) => Self { + dst_addr, + vlan_valid: true, + vlan_id: id, + }, + None => Self { + dst_addr, + vlan_valid: false, + vlan_id: 0, + }, + } + } +} + +impl fmt::Display for Ipv6VlanMatchKey { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.vlan_valid { + write!(f, "{} vlan={}", self.dst_addr, self.vlan_id) + } else { + write!(f, "{} untagged", self.dst_addr) + } + } +}