diff --git a/.github/buildomat/jobs/opte-api.sh b/.github/buildomat/jobs/opte-api.sh index 52e6c43c..8b2fd764 100755 --- a/.github/buildomat/jobs/opte-api.sh +++ b/.github/buildomat/jobs/opte-api.sh @@ -28,7 +28,7 @@ header "analyze std" ptime -m cargo clippy --all-targets header "analyze no_std" -ptime -m cargo clippy --no-default-features --all-targets +ptime -m cargo clippy --no-default-features --all-targets -- --deny warnings header "test" ptime -m cargo test diff --git a/.github/buildomat/jobs/opte-ioctl.sh b/.github/buildomat/jobs/opte-ioctl.sh index b0363aaa..6340ebab 100755 --- a/.github/buildomat/jobs/opte-ioctl.sh +++ b/.github/buildomat/jobs/opte-ioctl.sh @@ -22,4 +22,4 @@ header "check style" ptime -m cargo +$NIGHTLY fmt -- --check header "analyze" -ptime -m cargo clippy --all-targets +ptime -m cargo clippy --all-targets -- --deny warnings diff --git a/.github/buildomat/jobs/opte.sh b/.github/buildomat/jobs/opte.sh index 7885d01b..82734005 100755 --- a/.github/buildomat/jobs/opte.sh +++ b/.github/buildomat/jobs/opte.sh @@ -31,10 +31,10 @@ RUSTDOCFLAGS="-D warnings" ptime -m \ cargo +$NIGHTLY doc --no-default-features --features=api,std,engine,kernel header "analyze std + api" -ptime -m cargo clippy --all-targets +ptime -m cargo clippy --all-targets -- --deny warnings header "analyze no_std + engine + kernel" -ptime -m cargo +$NIGHTLY clippy --no-default-features --features engine,kernel +ptime -m cargo +$NIGHTLY clippy --no-default-features --features engine,kernel -- --deny warnings header "test" ptime -m cargo test diff --git a/.github/buildomat/jobs/opteadm.sh b/.github/buildomat/jobs/opteadm.sh index f533d38c..1bfc523a 100755 --- a/.github/buildomat/jobs/opteadm.sh +++ b/.github/buildomat/jobs/opteadm.sh @@ -31,7 +31,7 @@ header "check style" ptime -m cargo +$NIGHTLY fmt -- --check header "analyze" -ptime -m cargo clippy --all-targets +ptime -m cargo clippy --all-targets -- --deny warnings header "debug build" ptime -m cargo build diff --git a/.github/buildomat/jobs/oxide-vpc.sh b/.github/buildomat/jobs/oxide-vpc.sh index bde40131..9e9638a4 100755 --- a/.github/buildomat/jobs/oxide-vpc.sh +++ b/.github/buildomat/jobs/oxide-vpc.sh @@ -31,7 +31,7 @@ RUSTDOCFLAGS="-D warnings" ptime -m \ cargo +$NIGHTLY doc --no-default-features --features=api,std,engine,kernel header "analyze std + api + usdt" -ptime -m cargo clippy --features usdt --all-targets +ptime -m cargo clippy --features usdt --all-targets -- --deny warnings header "analyze no_std + engine + kernel" ptime -m cargo +$NIGHTLY clippy --no-default-features --features engine,kernel diff --git a/.github/buildomat/jobs/xde.sh b/.github/buildomat/jobs/xde.sh index 82baf11c..7f34b1d9 100755 --- a/.github/buildomat/jobs/xde.sh +++ b/.github/buildomat/jobs/xde.sh @@ -113,7 +113,7 @@ sha256sum $REL_TGT/xde_link.so > $REL_TGT/xde_link.so.sha256 header "build xde integration tests" pushd xde-tests cargo +$NIGHTLY fmt -- --check -cargo clippy --all-targets +cargo clippy --all-targets -- --deny warnings cargo build --test loopback loopback_test=$( cargo build -q --test loopback --message-format=json |\ diff --git a/bench/src/packet.rs b/bench/src/packet.rs index b555f946..0788ca60 100644 --- a/bench/src/packet.rs +++ b/bench/src/packet.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use opte::ddi::mblk::MsgBlk; use opte::engine::Direction; @@ -26,6 +26,7 @@ use opte_test_utils::icmp::gen_icmp_echo; use opte_test_utils::icmp::gen_icmpv6_echo; use opte_test_utils::icmp::generate_ndisc; use opte_test_utils::*; +use std::collections::BTreeMap; pub type TestCase = (MsgBlk, Direction); @@ -91,6 +92,8 @@ impl BenchPacket for UlpProcess { ephemeral_ip: Some("10.60.1.20".parse().unwrap()), floating_ips: vec![], }, + attached_subnets: BTreeMap::default(), + transit_ips: BTreeMap::default(), }, ipv6: Ipv6Cfg { vpc_subnet: "fd00::/64".parse().unwrap(), @@ -104,6 +107,8 @@ impl BenchPacket for UlpProcess { ephemeral_ip: Some("2001:db8::2".parse().unwrap()), floating_ips: vec![], }, + attached_subnets: BTreeMap::default(), + transit_ips: BTreeMap::default(), }, }; @@ -269,18 +274,13 @@ impl BenchPacketInstance for UlpProcessInstance { let out_pkt = match self.direction { Direction::Out => inner_pkt, Direction::In => { - let bsvc_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; let guest_phys = TestIpPhys { ip: self.cfg.phys_ip, mac: self.cfg.guest_mac, vni: self.cfg.vni, }; - encap_external(inner_pkt, bsvc_phys, guest_phys) + encap_external(inner_pkt, *BSVC_PHYS, guest_phys) } }; diff --git a/bin/opteadm/src/bin/opteadm.rs b/bin/opteadm/src/bin/opteadm.rs index fb4334db..a177dbb7 100644 --- a/bin/opteadm/src/bin/opteadm.rs +++ b/bin/opteadm/src/bin/opteadm.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use anyhow::Context; use clap::Args; @@ -70,6 +70,7 @@ use oxide_vpc::print::print_mcast_fwd; use oxide_vpc::print::print_mcast_subs; use oxide_vpc::print::print_v2b; use oxide_vpc::print::print_v2p; +use std::collections::BTreeMap; use std::io; use std::io::Write; use std::str::FromStr; @@ -403,6 +404,36 @@ enum Command { #[arg(long = "dir")] direction: Option, }, + + /// Give a guest ownership of a given CIDR block. + /// + /// This is equivalent to a bidirectional `AllowCidr`, with an exemption + /// from NAT if the subnet is marked as `external`. + /// + /// Repeated calls on any given `prefix` will update its configuration. + AttachSubnet { + /// The OPTE port to configure. + #[arg(short)] + port: String, + + /// The subnet to attach. + prefix: IpCidr, + + /// Marks the subnet as a block of external IPs for which in/outbound + /// NAT should not be performed. + #[arg(long, short)] + external: bool, + }, + + /// Rescind a guest's ownership of a given CIDR block. + DetachSubnet { + /// The OPTE port to configure. + #[arg(short)] + port: String, + + /// The subnet to detach. + prefix: IpCidr, + }, } #[derive(Debug, Parser)] @@ -805,6 +836,8 @@ fn main() -> anyhow::Result<()> { private_ip, gateway_ip, external_ips, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }) } IpAddr::Ip6(private_ip) => { @@ -823,6 +856,8 @@ fn main() -> anyhow::Result<()> { private_ip, gateway_ip, external_ips, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }) } }; @@ -833,9 +868,10 @@ fn main() -> anyhow::Result<()> { gateway_mac, vni: vpc_vni, phys_ip: src_underlay_addr, + dhcp: dhcp.into(), }; - hdl.create_xde(&name, cfg, dhcp.into(), passthrough)?; + hdl.create_xde(&name, cfg, passthrough)?; } Command::DeleteXde { name } => { @@ -1054,6 +1090,14 @@ fn main() -> anyhow::Result<()> { })?; } } + + Command::AttachSubnet { port, prefix, external } => { + hdl.attach_subnet(&port, prefix, external)?; + } + + Command::DetachSubnet { port, prefix } => { + hdl.detach_subnet(&port, prefix)?; + } } Ok(()) diff --git a/crates/opte-api/src/cmd.rs b/crates/opte-api/src/cmd.rs index d69a0a8a..8238511e 100644 --- a/crates/opte-api/src/cmd.rs +++ b/crates/opte-api/src/cmd.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use super::API_VERSION; use super::RuleId; @@ -25,40 +25,95 @@ pub const XDE_IOC_OPTE_CMD: i32 = XDE_IOC as i32 | 0x01; #[derive(Clone, Copy, Debug)] #[repr(C)] pub enum OpteCmd { - ListPorts = 1, // list all ports - AddFwRule = 20, // add firewall rule - RemFwRule = 21, // remove firewall rule - SetFwRules = 22, // set/replace all firewall rules at once - DumpTcpFlows = 30, // dump TCP flows - DumpLayer = 31, // dump the specified Layer - DumpUft = 32, // dump the Unified Flow Table - ListLayers = 33, // list the layers on a given port - ClearUft = 40, // clear the UFT - ClearLft = 41, // clear the given Layer's Flow Table - SetVirt2Phys = 50, // set a v2p mapping - DumpVirt2Phys = 51, // dump the v2p mappings - SetVirt2Boundary = 52, // set a v2b mapping - ClearVirt2Boundary = 53, // clear a v2b mapping - DumpVirt2Boundary = 54, // dump the v2b mappings - ClearVirt2Phys = 55, // clear a v2p mapping - AddRouterEntry = 60, // add a router entry for IP dest - DelRouterEntry = 61, // remove a router entry for IP dest - CreateXde = 70, // create a new xde device - DeleteXde = 71, // delete an xde device - SetXdeUnderlay = 72, // set xde underlay devices - ClearXdeUnderlay = 73, // clear xde underlay devices - SetExternalIps = 80, // set xde external IPs for a port - AllowCidr = 90, // allow ip block through gateway tx/rx - RemoveCidr = 91, // deny ip block through gateway tx/rx - SetMcastForwarding = 100, // set multicast forwarding entries - ClearMcastForwarding = 101, // clear multicast forwarding entries - DumpMcastForwarding = 102, // dump multicast forwarding table - McastSubscribe = 103, // subscribe a port to a multicast group - McastUnsubscribe = 104, // unsubscribe a port from a multicast group - SetMcast2Phys = 105, // set M2P mapping (group -> underlay mcast) - ClearMcast2Phys = 106, // clear M2P mapping - DumpMcastSubscriptions = 107, // dump multicast subscription table - McastUnsubscribeAll = 108, // unsubscribe all ports from a multicast group + /// List all ports. + ListPorts = 1, + + /// Add a firewall rule. + AddFwRule = 20, + /// Remove a firewall rule. + RemFwRule = 21, + /// Set/replace all firewall rules at once. + SetFwRules = 22, + + /// Read out TCP flows and statistics. + DumpTcpFlows = 30, + /// Read out installed rules and hit counters in a given layer. + DumpLayer = 31, + /// Read out UFT (fastpath) flow entries and their associated counters. + DumpUft = 32, + /// List the layers on a given port. + ListLayers = 33, + + /// Clear the UFT (fastpath) for a port. + ClearUft = 40, + /// Clear a layer's flow table. + ClearLft = 41, + + /// Set a V2P mapping. + SetVirt2Phys = 50, + /// Read out all V2P mappings. + DumpVirt2Phys = 51, + /// Set a V2B mapping. + SetVirt2Boundary = 52, + /// Remove a V2B mapping. + ClearVirt2Boundary = 53, + /// Read out all V2B mappings. + DumpVirt2Boundary = 54, + /// Remove a V2P mapping. + ClearVirt2Phys = 55, + + /// Add a router entry for an IP destination CIDR. + AddRouterEntry = 60, + /// Remove a router entry for an IP destination CIDR. + DelRouterEntry = 61, + + /// Create a new XDE device. + /// + /// Requires that `SetXdeUnderlay` has been successfully called. + CreateXde = 70, + /// Delete an XDE device. + DeleteXde = 71, + /// Set the physical devices which XDE should transmit over. + SetXdeUnderlay = 72, + /// Unbind the underlay devices. + /// + /// Requires that no XDE ports exist. + ClearXdeUnderlay = 73, + + /// Set all external IP config for a port. + SetExternalIps = 80, + + /// Add a transit IP CIDR to this port's allow list. + /// + /// NOOPs if the given CIDR is an attached subnet. + AllowCidr = 90, + /// Remove a transit IP CIDR from this port's allow list. + /// + /// NOOPs if the given CIDR is an attached subnet. + RemoveCidr = 91, + /// Add or set the config of an attached subnet. + AttachSubnet = 92, + /// Remove an attached subnet. + DetachSubnet = 93, + + /// Set multicast forwarding entries. + SetMcastForwarding = 100, + /// Clear multicast forwarding entries. + ClearMcastForwarding = 101, + /// Read out the multicast forwarding table. + DumpMcastForwarding = 102, + /// Subscribe a port to a multicast group. + McastSubscribe = 103, + /// Unsubscribe a port to a multicast group. + McastUnsubscribe = 104, + /// Set an M2P mapping (group -> underlay mcast). + SetMcast2Phys = 105, + /// Remove an M2P mapping. + ClearMcast2Phys = 106, + /// Read out the table of multicast subscriptions. + DumpMcastSubscriptions = 107, + /// Unsubscribe all ports from a multicast group. + McastUnsubscribeAll = 108, } impl TryFrom for OpteCmd { diff --git a/crates/opte-api/src/lib.rs b/crates/opte-api/src/lib.rs index 558a6e41..99fb077e 100644 --- a/crates/opte-api/src/lib.rs +++ b/crates/opte-api/src/lib.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company #![no_std] #![deny(unreachable_patterns)] @@ -51,7 +51,7 @@ pub use ulp::*; /// /// We rely on CI and the check-api-version.sh script to verify that /// this number is incremented anytime the oxide-api code changes. -pub const API_VERSION: u64 = 38; +pub const API_VERSION: u64 = 39; /// Major version of the OPTE package. pub const MAJOR_VERSION: u64 = 0; diff --git a/lib/opte-ioctl/src/lib.rs b/lib/opte-ioctl/src/lib.rs index 510fc9a1..1c4147d6 100644 --- a/lib/opte-ioctl/src/lib.rs +++ b/lib/opte-ioctl/src/lib.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use opte::api::API_VERSION; use opte::api::ClearLftReq; @@ -27,6 +27,8 @@ use opte::api::XDE_IOC_OPTE_CMD; use oxide_vpc::api::AddFwRuleReq; use oxide_vpc::api::AddRouterEntryReq; use oxide_vpc::api::AllowCidrReq; +use oxide_vpc::api::AttachSubnetReq; +use oxide_vpc::api::AttachedSubnetConfig; use oxide_vpc::api::ClearMcast2PhysReq; use oxide_vpc::api::ClearMcastForwardingReq; use oxide_vpc::api::ClearVirt2BoundaryReq; @@ -35,7 +37,8 @@ use oxide_vpc::api::CreateXdeReq; use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DelRouterEntryResp; use oxide_vpc::api::DeleteXdeReq; -use oxide_vpc::api::DhcpCfg; +use oxide_vpc::api::DetachSubnetReq; +use oxide_vpc::api::DetachSubnetResp; use oxide_vpc::api::DumpMcastForwardingResp; use oxide_vpc::api::DumpMcastSubscriptionsResp; use oxide_vpc::api::DumpVirt2BoundaryResp; @@ -123,7 +126,6 @@ impl OpteHdl { &self, name: &str, cfg: VpcCfg, - dhcp: DhcpCfg, passthrough: bool, ) -> Result { use libnet::link; @@ -136,7 +138,7 @@ impl OpteHdl { let xde_devname = name.into(); let cmd = OpteCmd::CreateXde; - let req = CreateXdeReq { xde_devname, linkid, cfg, dhcp, passthrough }; + let req = CreateXdeReq { xde_devname, linkid, cfg, passthrough }; let res = run_cmd_ioctl(self.device.as_raw_fd(), cmd, Some(&req)); @@ -395,6 +397,37 @@ impl OpteHdl { ) } + pub fn attach_subnet( + &self, + port_name: &str, + cidr: IpCidr, + is_external: bool, + ) -> Result { + let cmd = OpteCmd::AttachSubnet; + run_cmd_ioctl( + self.device.as_raw_fd(), + cmd, + Some(&AttachSubnetReq { + cidr, + port_name: port_name.into(), + cfg: AttachedSubnetConfig { is_external }, + }), + ) + } + + pub fn detach_subnet( + &self, + port_name: &str, + cidr: IpCidr, + ) -> Result { + let cmd = OpteCmd::DetachSubnet; + run_cmd_ioctl( + self.device.as_raw_fd(), + cmd, + Some(&DetachSubnetReq { cidr, port_name: port_name.into() }), + ) + } + /// Return the TCP flows. pub fn dump_tcp_flows( &self, diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index bb128b44..e3c4da18 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! Common routines for integration tests. @@ -93,8 +93,10 @@ pub use oxide_vpc::engine::overlay::VpcMappings; pub use oxide_vpc::engine::router; pub use port_state::*; pub use smoltcp::wire::IpProtocol; +use std::collections::BTreeMap; pub use std::num::NonZeroU32; pub use std::sync::Arc; +use std::sync::LazyLock; /// Expects that a packet result is modified, and applies that modification. #[macro_export] @@ -179,6 +181,8 @@ pub fn g1_cfg() -> VpcCfg { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, ipv6: Ipv6Cfg { vpc_subnet: "fd00::/64".parse().unwrap(), @@ -192,6 +196,8 @@ pub fn g1_cfg() -> VpcCfg { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, }; g1_cfg2(ip_cfg) @@ -207,6 +213,7 @@ pub fn g1_cfg2(ip_cfg: IpCfg) -> VpcCfg { phys_ip: Ipv6Addr::from([ 0xFD00, 0x0000, 0x00F7, 0x0101, 0x0000, 0x0000, 0x0000, 0x0001, ]), + dhcp: base_dhcp_config(), } } @@ -224,6 +231,8 @@ pub fn g2_cfg() -> VpcCfg { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, ipv6: Ipv6Cfg { vpc_subnet: "fd00::/64".parse().unwrap(), @@ -237,6 +246,8 @@ pub fn g2_cfg() -> VpcCfg { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, }; VpcCfg { @@ -248,6 +259,7 @@ pub fn g2_cfg() -> VpcCfg { phys_ip: Ipv6Addr::from([ 0xFD00, 0x0000, 0x00F7, 0x0116, 0x0000, 0x0000, 0x0000, 0x0001, ]), + dhcp: base_dhcp_config(), } } @@ -268,10 +280,8 @@ fn oxide_net_builder( let snat_limit = NonZeroU32::new(8096).unwrap(); let one_limit = NonZeroU32::new(1).unwrap(); - let dhcp = base_dhcp_config(); - firewall::setup(&mut pb, fw_limit).expect("failed to add firewall layer"); - gateway::setup(&pb, cfg, vpc_map, fw_limit, &dhcp) + gateway::setup(&pb, cfg, vpc_map, fw_limit) .expect("failed to setup gateway layer"); router::setup(&pb, cfg, one_limit).expect("failed to add router layer"); nat::setup(&mut pb, cfg, snat_limit).expect("failed to add nat layer"); @@ -355,14 +365,14 @@ pub fn oxide_net_setup2( v2b.set( "0.0.0.0/0".parse().unwrap(), vec![TunnelEndpoint { - ip: "fd00:9900::1".parse().unwrap(), + ip: BS_IP_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }], ); v2b.set( "::/0".parse().unwrap(), vec![TunnelEndpoint { - ip: "fd00:9900::1".parse().unwrap(), + ip: BS_IP_ADDR, vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), }], ); @@ -987,6 +997,12 @@ pub struct TestIpPhys { pub vni: Vni, } +pub static BSVC_PHYS: LazyLock = LazyLock::new(|| TestIpPhys { + ip: BS_IP_ADDR, + mac: BS_MAC_ADDR, + vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), +}); + /// Encapsulate a guest packet, marking that it has arrived from beyond /// the rack. #[must_use] diff --git a/lib/opte/src/dynamic.rs b/lib/opte/src/dynamic.rs index 16584419..b3e8e72a 100644 --- a/lib/opte/src/dynamic.rs +++ b/lib/opte/src/dynamic.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! A KRwLock-based wrapper for dynamically updateable resources (e.g., config), //! and for memoizing the outputs generated from those resources. @@ -49,6 +49,22 @@ impl Dynamic { _ = self.0.epoch.fetch_add(1, Ordering::Relaxed); } + /// Conditionally update `self`, holding exclusive access on the inner + /// value. + /// + /// `f(...)` should return `Some(val)` if an update should be applied. + /// Returns `true` if `f(...)` returned `Some`. + pub fn update(&self, f: impl FnOnce(&T) -> Option) -> bool { + let mut inner = self.0.inner.write(); + if let Some(new_val) = f(&inner) { + *inner = new_val.into(); + _ = self.0.epoch.fetch_add(1, Ordering::Relaxed); + true + } else { + false + } + } + pub fn load(&self) -> Snapshot { let value_locked = self.0.inner.read(); let value = Arc::clone(&*value_locked); diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index d6413e27..643da97e 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! A layer in a port. @@ -841,7 +841,7 @@ impl Layer { Some(ActionDescEntry::Desc(desc)) => { self.stats.vals.in_lft_hit += 1; let flow_before = *pkt.flow(); - let ht = desc.gen_ht(Direction::In); + let ht = desc.gen_ht(Direction::In, ameta); pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -1032,7 +1032,7 @@ impl Layer { }; let flow_before = *pkt.flow(); - let ht_in = desc.gen_ht(In); + let ht_in = desc.gen_ht(In, ameta); pkt.hdr_transform(&ht_in)?; xforms.hdr.push(ht_in); ht_probe( @@ -1127,7 +1127,7 @@ impl Layer { Some(ActionDescEntry::Desc(desc)) => { self.stats.vals.out_lft_hit += 1; let flow_before = *pkt.flow(); - let ht = desc.gen_ht(Direction::Out); + let ht = desc.gen_ht(Direction::Out, ameta); pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -1320,7 +1320,7 @@ impl Layer { }; let flow_before = *pkt.flow(); - let ht_out = desc.gen_ht(Out); + let ht_out = desc.gen_ht(Out, ameta); pkt.hdr_transform(&ht_out)?; xforms.hdr.push(ht_out); ht_probe( diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index e8f7e190..99e26267 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! 1:1 NAT. @@ -22,15 +22,19 @@ use super::packet::Packet; use super::parse::Ulp; use super::parse::UlpRepr; use super::port::meta::ActionMeta; +use super::port::meta::ActionMetaValue; use super::predicate::DataPredicate; use super::predicate::Predicate; use super::rule; use super::rule::ActionDesc; use super::rule::AllowOrDeny; use super::rule::HdrTransform; +use super::rule::MetaAction; use super::rule::StatefulAction; use crate::engine::snat::ConcreteIpAddr; +use alloc::borrow::Cow; use alloc::boxed::Box; +use alloc::string::String; use alloc::string::ToString; use alloc::sync::Arc; use alloc::vec::Vec; @@ -106,7 +110,7 @@ impl StatefulAction for OutboundNat { &self, flow_id: &InnerFlowId, _pkt: &Packet, - _meta: &mut ActionMeta, + _meta: &ActionMeta, ) -> rule::GenDescResult { // When we have several external IPs at our disposal, we are // to use them equally. @@ -169,7 +173,7 @@ impl StatefulAction for InboundNat { &self, flow_id: &InnerFlowId, _pkt: &Packet, - _meta: &mut ActionMeta, + _meta: &ActionMeta, ) -> rule::GenDescResult { // We rely on the attached predicates to filter out IPs which are *not* // registered to this port. @@ -199,11 +203,13 @@ pub struct NatDesc { pub const NAT_NAME: &str = "NAT"; impl ActionDesc for NatDesc { - fn gen_ht(&self, dir: Direction) -> HdrTransform { + fn gen_ht(&self, dir: Direction, meta: &mut ActionMeta) -> HdrTransform { match dir { Direction::Out => { let ip = IpMod::new_src(self.external_ip); + meta.insert_typed(&ExternalIpTag); + HdrTransform { name: NAT_NAME.to_string(), inner_ip: HeaderAction::Modify(ip), @@ -385,6 +391,51 @@ impl fmt::Display for IcmpV6Nat { } } +/// Mark matching packets as being sent outbound from an external IP. +#[derive(Debug)] +pub struct ExternalIpTagger; + +impl fmt::Display for ExternalIpTagger { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "ExternalIpTagger") + } +} + +impl MetaAction for ExternalIpTagger { + fn implicit_preds(&self) -> (Vec, Vec) { + (vec![], vec![]) + } + + fn mod_meta( + &self, + _flow_id: &InnerFlowId, + meta: &mut ActionMeta, + ) -> rule::ModMetaResult { + meta.insert_typed(&ExternalIpTag); + rule::ModMetaResult::Ok(AllowOrDeny::Allow(())) + } +} + +/// A unit-valued tag marking outbound packets using an external IP. +#[derive(Debug)] +pub struct ExternalIpTag; + +impl ActionMetaValue for ExternalIpTag { + const KEY: &'static str = "external-ip-applied"; + + fn as_meta(&self) -> Cow<'static, str> { + Cow::Borrowed(Self::KEY) + } + + fn from_meta(s: &str) -> Result { + if s == Self::KEY { + Ok(Self) + } else { + Err("malformed ExternalIpTag value".into()) + } + } +} + #[cfg(test)] mod test { use super::*; @@ -459,7 +510,7 @@ mod test { // Verify descriptor generation. // ================================================================ let flow_out = InnerFlowId::from(pkt.meta()); - let desc = match nat.gen_desc(&flow_out, &pkt, &mut ameta) { + let desc = match nat.gen_desc(&flow_out, &pkt, &ameta) { Ok(AllowOrDeny::Allow(desc)) => desc, _ => panic!("expected AllowOrDeny::Allow(desc) result"), }; @@ -467,7 +518,7 @@ mod test { // ================================================================ // Verify outbound header transformation // ================================================================ - let out_ht = desc.gen_ht(Direction::Out); + let out_ht = desc.gen_ht(Direction::Out, &mut ameta); let pmo = pkt.meta_mut(); out_ht.run(pmo).unwrap(); @@ -524,7 +575,7 @@ mod test { .to_full_meta(); let pmi = pkt.meta_mut(); - let in_ht = desc.gen_ht(Direction::In); + let in_ht = desc.gen_ht(Direction::In, &mut ameta); in_ht.run(pmi).unwrap(); let ether_meta = pmi.inner_ether(); diff --git a/lib/opte/src/engine/port/meta.rs b/lib/opte/src/engine/port/meta.rs index 968b6986..b6f92413 100644 --- a/lib/opte/src/engine/port/meta.rs +++ b/lib/opte/src/engine/port/meta.rs @@ -2,11 +2,12 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use alloc::borrow::Cow; use alloc::collections::BTreeMap; use alloc::string::String; +use core::marker::PhantomData; /// A value meant to be used in the [`ActionMeta`] map. /// @@ -34,7 +35,7 @@ pub trait ActionMetaValue: Sized { fn as_meta(&self) -> Cow<'static, str>; /// Attempt to create a value assuming that `s` was created - /// with [`Self::as_meta()`]. + /// with [`ActionMetaValue::as_meta`]. fn from_meta(s: &str) -> Result; } @@ -77,6 +78,16 @@ impl ActionMeta { self.inner.insert(key, val) } + /// Insert a value with a type-determined key into the map, + /// replacing any existing key-value pair. Return the value + /// being replaced, or `None`. + pub fn insert_typed( + &mut self, + val: &impl ActionMetaValue, + ) -> Option> { + self.insert(val.key(), val.as_meta()) + } + /// Remove the key-value pair with the specified key. Return /// the value, or `None` if no such entry exists. pub fn remove(&mut self, key: &str) -> Option> { @@ -89,23 +100,57 @@ impl ActionMeta { self.inner.get(key).map(|v| &**v) } + /// Get a reference to the value at a well known key for `T`, or `None` + /// if no such entry exists. + pub fn get_typed( + &self, + ) -> Result> { + let raw_val = + self.get(T::KEY).ok_or(ActionMetaError::NotFound(PhantomData))?; + + T::from_meta(raw_val) + .map_err(|err| ActionMetaError::ParseFailed { raw_val, err }) + } + /// Records whether this packet's destination can be reached using only /// internal/private paths. /// /// The dataplane may use this to choose a larger (jumbo-frame) MSS for /// TCP segmentation, or rely on other aspects of its internal network. pub fn set_internal_target(&mut self, val: bool) { - _ = self - .insert(InternalTarget::KEY.into(), InternalTarget(val).as_meta()); + _ = self.insert_typed(&InternalTarget(val)); } /// Returns whether this packet's destination can be reached using only /// internal/private paths. pub fn is_internal_target(&self) -> bool { - self.get(InternalTarget::KEY) - .and_then(|v| InternalTarget::from_meta(v).ok()) - .unwrap_or_default() - .0 + self.get_typed::().unwrap_or_default().0 + } +} + +/// Failure modes when reading a target `impl ActionMetaValue` from [`ActionMeta`]. +#[derive(Debug)] +pub enum ActionMetaError<'a, T> { + /// No value was stored using the type's well-known key. + NotFound(PhantomData), + /// The stored value could not be deserialised into the requested type. + ParseFailed { raw_val: &'a str, err: String }, +} + +impl<'a, T: core::fmt::Debug> core::error::Error for ActionMetaError<'a, T> {} + +impl<'a, T> core::fmt::Display for ActionMetaError<'a, T> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + ActionMetaError::NotFound(_) => write!( + f, + "no {} metadata entry found", + core::any::type_name::() + ), + ActionMetaError::ParseFailed { raw_val, err } => { + write!(f, "failed to parse metadata entry '{raw_val}': {err}") + } + } } } diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 4527efe9..c23c44a3 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! Predicates used for `Rule` matching. @@ -23,6 +23,7 @@ use super::ip::v6::Ipv6Ref; use super::ip::v6::v6_get_next_header; use super::packet::MblkPacketData; use super::port::meta::ActionMeta; +use super::port::meta::ActionMetaValue; use alloc::boxed::Box; use alloc::string::String; use alloc::string::ToString; @@ -610,6 +611,11 @@ impl Predicate { false } + + /// Create a `Predicate::Meta` matching a well-specified value. + pub fn from_action_meta(val: T) -> Self { + Self::Meta(val.key().into(), val.as_meta().into()) + } } #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 4247adb8..9cc4f9c0 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! Rules and actions. @@ -174,8 +174,9 @@ where /// [`HdrTransform`] which implements the desired action. An /// ActionDesc is created by a [`StatefulAction`] implementation. pub trait ActionDesc: Send + Sync { - /// Generate the [`HdrTransform`] which implements this descriptor. - fn gen_ht(&self, dir: Direction) -> HdrTransform; + /// Generate the [`HdrTransform`] which implements this descriptor, and + /// apply any modifications to the [`ActionMeta`]. + fn gen_ht(&self, dir: Direction, meta: &mut ActionMeta) -> HdrTransform; /// Generate a body transformation. /// @@ -251,7 +252,7 @@ impl IdentityDesc { } impl ActionDesc for IdentityDesc { - fn gen_ht(&self, _dir: Direction) -> HdrTransform { + fn gen_ht(&self, _dir: Direction, _meta: &mut ActionMeta) -> HdrTransform { Default::default() } @@ -758,7 +759,7 @@ pub trait StatefulAction: Display + Send + Sync { &self, flow_id: &InnerFlowId, pkt: &Packet, - meta: &mut ActionMeta, + meta: &ActionMeta, ) -> GenDescResult; fn implicit_preds(&self) -> (Vec, Vec); diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index d3bbff22..1765abd7 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! Types for working with IP Source NAT, both IPv4 and IPv6. @@ -33,6 +33,7 @@ use crate::api::L4Info; use crate::api::PortInfo; use crate::ddi::sync::KMutex; use crate::engine::icmp::QueryEcho; +use crate::engine::nat::ExternalIpTag; use alloc::collections::btree_map::BTreeMap; use alloc::string::ToString; use alloc::sync::Arc; @@ -91,7 +92,7 @@ type SNatAlloc = FiniteHandle>; mod private { use opte_api::Protocol; - pub trait Ip: Into { + pub trait Ip: Into + Send + Sync { const MESSAGE_PROTOCOL: Protocol; } @@ -218,7 +219,7 @@ impl From for GenDescError { } } -impl SNat { +impl SNat { pub fn new(addr: T) -> Self { SNat { priv_ip: addr, @@ -299,7 +300,7 @@ impl Display for SNat { } } -impl StatefulAction for SNat +impl StatefulAction for SNat where SNat: Display, { @@ -307,7 +308,7 @@ where &self, flow_id: &InnerFlowId, pkt: &Packet, - _meta: &mut ActionMeta, + _meta: &ActionMeta, ) -> GenDescResult { let proto = flow_id.protocol(); let priv_port = match flow_id.l4_info() { @@ -367,13 +368,15 @@ pub struct SNatDesc { pub const SNAT_NAME: &str = "SNAT"; -impl ActionDesc for SNatDesc { - fn gen_ht(&self, dir: Direction) -> HdrTransform { +impl ActionDesc for SNatDesc { + fn gen_ht(&self, dir: Direction, meta: &mut ActionMeta) -> HdrTransform { match dir { // Outbound traffic needs its source IP and source port Direction::Out => { let ip = IpMod::new_src(self.nat.entry.ip.into()); + meta.insert_typed(&ExternalIpTag); + HdrTransform { name: SNAT_NAME.to_string(), inner_ip: HeaderAction::Modify(ip), @@ -425,16 +428,18 @@ pub struct SNatIcmpEchoDesc { pub const SNAT_ICMP_ECHO_NAME: &str = "SNAT_ICMP_ECHO"; -impl ActionDesc for SNatIcmpEchoDesc { +impl ActionDesc for SNatIcmpEchoDesc { // SNAT needs to generate an additional transform for ICMP traffic in // order to treat the Echo Identifier as a psuedo ULP port. - fn gen_ht(&self, dir: Direction) -> HdrTransform { + fn gen_ht(&self, dir: Direction, meta: &mut ActionMeta) -> HdrTransform { match dir { // Outbound traffic needs its source IP rewritten, and its // 'source port' placed into the ICMP echo ID field. Direction::Out => { let ip = IpMod::new_src(self.nat.entry.ip.into()); + meta.insert_typed(&ExternalIpTag); + HdrTransform { name: SNAT_NAME.to_string(), inner_ip: HeaderAction::Modify(ip), @@ -560,7 +565,7 @@ mod test { // Verify descriptor generation. // ================================================================ let flow_out = InnerFlowId::from(pkt.meta()); - let desc = match snat.gen_desc(&flow_out, &pkt, &mut action_meta) { + let desc = match snat.gen_desc(&flow_out, &pkt, &action_meta) { Ok(AllowOrDeny::Allow(desc)) => desc, _ => panic!("expected AllowOrDeny::Allow(desc) result"), }; @@ -569,7 +574,7 @@ mod test { // ================================================================ // Verify outbound header transformation // ================================================================ - let out_ht = desc.gen_ht(Direction::Out); + let out_ht = desc.gen_ht(Direction::Out, &mut action_meta); out_ht.run(pkt.meta_mut()).unwrap(); let pmo = pkt.meta(); @@ -623,7 +628,7 @@ mod test { .to_full_meta(); pkt.compute_checksums(); - let in_ht = desc.gen_ht(Direction::In); + let in_ht = desc.gen_ht(Direction::In, &mut action_meta); in_ht.run(pkt.meta_mut()).unwrap(); let pmi = pkt.meta(); diff --git a/lib/oxide-vpc/src/api.rs b/lib/oxide-vpc/src/api.rs index 8c67ec25..d8a49f17 100644 --- a/lib/oxide-vpc/src/api.rs +++ b/lib/oxide-vpc/src/api.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use alloc::collections::BTreeMap; use alloc::collections::BTreeSet; @@ -130,6 +130,26 @@ pub struct BoundaryServices { pub mac: MacAddr, } +/// Configuration for a subnet completely owned by a NIC. +/// +/// When configured this port will allow all in/out traffic matching a CIDR to +/// be received/sent. +#[derive(Debug, Clone, Serialize, Deserialize, Default, Eq, PartialEq)] +pub struct AttachedSubnetConfig { + /// Denotes whether this attached subnet is an external IP block, + /// in which case OPTE will not apply NAT on matching traffic. + pub is_external: bool, +} + +/// Configuration for an exception to source/destination address filtering. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct TransitIpConfig { + /// Allow inbound traffic with a destination IP in the target CIDR. + pub allow_in: bool, + /// Allow outbound traffic with a source IP in the target CIDR. + pub allow_out: bool, +} + /// The IPv4 configuration of a VPC guest. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Ipv4Cfg { @@ -149,6 +169,13 @@ pub struct Ipv4Cfg { /// External IP assignments used for rack-external communication. pub external_ips: ExternalIpCfg, + + /// Subnets owned by this NIC. + pub attached_subnets: BTreeMap, + + /// Exceptions to source/destination address filtering without the guarantee + /// of ownership provided by `attached_subnets`. + pub transit_ips: BTreeMap, } /// The IPv6 configuration of a VPC guest. @@ -174,6 +201,13 @@ pub struct Ipv6Cfg { /// External IP assignments used for rack-external communication. pub external_ips: ExternalIpCfg, + + /// Subnets owned by this NIC. + pub attached_subnets: BTreeMap, + + /// Exceptions to source/destination address filtering without the guarantee + /// of ownership provided by `attached_subnets`. + pub transit_ips: BTreeMap, } /// Configuration of NAT assignments used by a VPC guest for external networking. @@ -264,6 +298,9 @@ pub struct VpcCfg { /// The host (sled) IPv6 address. All guests on the same sled are /// sourced to a single IPv6 address. pub phys_ip: Ipv6Addr, + + /// Configuration for DHCP responses created by OPTE. + pub dhcp: DhcpCfg, } impl VpcCfg { @@ -581,9 +618,6 @@ pub struct CreateXdeReq { /// details. pub cfg: VpcCfg, - /// Configuration for DHCP responses created by OPTE - pub dhcp: DhcpCfg, - /// This is a development tool for completely bypassing OPTE processing. /// /// XXX Pretty sure we aren't making much use of this anymore, and @@ -837,12 +871,14 @@ pub struct McastUnsubscribeAllReq { pub group: IpAddr, } +pub type InternetGatewayMap = BTreeMap>; + #[derive(Clone, Debug, Deserialize, Serialize)] pub struct SetExternalIpsReq { pub port_name: String, pub external_ips_v4: Option>, pub external_ips_v6: Option>, - pub inet_gw_map: Option>>, + pub inet_gw_map: Option, } #[derive(Debug, Deserialize, Serialize)] @@ -1291,6 +1327,31 @@ pub enum RemoveCidrResp { impl opte::api::cmd::CmdOk for RemoveCidrResp {} +/// Add an entry to the gateway allowing a port to send or receive +/// traffic on a CIDR other than its private IP. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct AttachSubnetReq { + pub port_name: String, + pub cidr: IpCidr, + pub cfg: AttachedSubnetConfig, +} + +/// Remove entries from the gateway allowing a port to send or receive +/// traffic on a specific CIDR other than its private IP. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct DetachSubnetReq { + pub port_name: String, + pub cidr: IpCidr, +} + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub enum DetachSubnetResp { + Ok(IpCidr), + NotFound, +} + +impl opte::api::cmd::CmdOk for DetachSubnetResp {} + #[cfg(test)] pub mod tests { use super::*; @@ -1392,6 +1453,8 @@ pub mod tests { floating_ips: vec![], }, vpc_subnet: "10.0.0.0/24".parse().unwrap(), + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, ipv6: Ipv6Cfg { private_ip: "fd00::5".parse().unwrap(), @@ -1405,9 +1468,12 @@ pub mod tests { floating_ips: vec![], }, vpc_subnet: "fd00::/64".parse().unwrap(), + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, }, vni: Vni::new(100u32).unwrap(), + dhcp: DhcpCfg::default(), } } } diff --git a/lib/oxide-vpc/src/cfg.rs b/lib/oxide-vpc/src/cfg.rs index d79767b4..e0377515 100644 --- a/lib/oxide-vpc/src/cfg.rs +++ b/lib/oxide-vpc/src/cfg.rs @@ -1,9 +1,18 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2026 Oxide Computer Company + //! Reconfigurable, internal configuration built from `oxide_vpc::api`. use crate::api; +use crate::api::AttachedSubnetConfig; use crate::api::ExternalIpCfg; #[cfg(any(feature = "test-help", test))] use crate::api::PhysNet; +use crate::api::TransitIpConfig; +use alloc::collections::BTreeMap; use opte::api::*; use opte::dynamic::Dynamic; @@ -25,6 +34,13 @@ pub struct Ipv4Cfg { /// External IP assignments used for rack-external communication. pub external_ips: Dynamic>, + + /// Subnets owned by this NIC. + pub attached_subnets: Dynamic>, + + /// Exceptions to source/destination address filtering without the guarantee + /// of ownership provided by `attached_subnets`. + pub transit_ips: Dynamic>, } /// The IPv6 configuration of a VPC guest. @@ -50,6 +66,13 @@ pub struct Ipv6Cfg { /// External IP assignments used for rack-external communication. pub external_ips: Dynamic>, + + /// Subnets owned by this NIC. + pub attached_subnets: Dynamic>, + + /// Exceptions to source/destination address filtering without the guarantee + /// of ownership provided by `attached_subnets`. + pub transit_ips: Dynamic>, } /// The IP configuration of a VPC guest. @@ -84,6 +107,9 @@ pub struct VpcCfg { /// The host (sled) IPv6 address. All guests on the same sled are /// sourced to a single IPv6 address. pub phys_ip: Ipv6Addr, + + /// Configuration for DHCP responses created by OPTE. + pub dhcp: DhcpCfg, } impl VpcCfg { @@ -184,6 +210,7 @@ impl From for VpcCfg { gateway_mac: value.gateway_mac, vni: value.vni, phys_ip: value.phys_ip, + dhcp: value.dhcp, } } } @@ -207,6 +234,8 @@ impl From for Ipv4Cfg { private_ip: value.private_ip, gateway_ip: value.gateway_ip, external_ips: value.external_ips.into(), + attached_subnets: value.attached_subnets.into(), + transit_ips: value.transit_ips.into(), } } } @@ -218,6 +247,8 @@ impl From for Ipv6Cfg { private_ip: value.private_ip, gateway_ip: value.gateway_ip, external_ips: value.external_ips.into(), + attached_subnets: value.attached_subnets.into(), + transit_ips: value.transit_ips.into(), } } } diff --git a/lib/oxide-vpc/src/engine/firewall.rs b/lib/oxide-vpc/src/engine/firewall.rs index ad0bbb05..78e16266 100644 --- a/lib/oxide-vpc/src/engine/firewall.rs +++ b/lib/oxide-vpc/src/engine/firewall.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! The Oxide VPC firewall. //! @@ -18,9 +18,8 @@ use crate::api::Ports; pub use crate::api::ProtoFilter; use crate::api::RemFwRuleReq; use crate::api::SetFwRulesReq; -use crate::engine::overlay::ACTION_META_VNI; +use crate::engine::overlay::VniTag; use alloc::collections::BTreeSet; -use alloc::string::ToString; use alloc::vec::Vec; use core::num::NonZeroU32; use opte::api::Direction; @@ -236,10 +235,9 @@ impl Address { Predicate::InnerSrcIp6(vec![Ipv6AddrMatch::Prefix(ip6_sub)]), ), - (_, Address::Vni(vni)) => Some(Predicate::Meta( - ACTION_META_VNI.to_string(), - vni.to_string(), - )), + (_, Address::Vni(vni)) => { + Some(Predicate::from_action_meta(VniTag(vni))) + } } } } diff --git a/lib/oxide-vpc/src/engine/gateway/arp.rs b/lib/oxide-vpc/src/engine/gateway/arp.rs index d530ce16..c71b8e93 100644 --- a/lib/oxide-vpc/src/engine/gateway/arp.rs +++ b/lib/oxide-vpc/src/engine/gateway/arp.rs @@ -2,23 +2,21 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! The ARP implementation of the Virtual Gateway. -use crate::cfg::VpcCfg; -use opte::api::Direction; +use super::BuildCtx; use opte::api::MacAddr; use opte::api::OpteError; use opte::engine::ether::ETHER_TYPE_ARP; -use opte::engine::layer::Layer; use opte::engine::predicate::EtherAddrMatch; use opte::engine::predicate::EtherTypeMatch; use opte::engine::predicate::Predicate; use opte::engine::rule::Action; use opte::engine::rule::Rule; -pub fn setup(layer: &mut Layer, cfg: &VpcCfg) -> Result<(), OpteError> { +pub(super) fn setup(ctx: &mut BuildCtx) -> Result<(), OpteError> { // ================================================================ // Outbound ARP Request for Gateway, from Guest // @@ -31,9 +29,11 @@ pub fn setup(layer: &mut Layer, cfg: &VpcCfg) -> Result<(), OpteError> { Predicate::InnerEtherDst(vec![EtherAddrMatch::Exact( MacAddr::BROADCAST, )]), - Predicate::InnerEtherSrc(vec![EtherAddrMatch::Exact(cfg.guest_mac)]), + Predicate::InnerEtherSrc(vec![EtherAddrMatch::Exact( + ctx.cfg.guest_mac, + )]), ]); - layer.add_rule(Direction::Out, rule.finalize()); + ctx.out_rules.push(rule.finalize()); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/dhcp.rs b/lib/oxide-vpc/src/engine/gateway/dhcp.rs index d10698e6..c834e101 100644 --- a/lib/oxide-vpc/src/engine/gateway/dhcp.rs +++ b/lib/oxide-vpc/src/engine/gateway/dhcp.rs @@ -2,31 +2,26 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! The DHCP implementation of the Virtual Gateway. +use super::BuildCtx; use crate::cfg::Ipv4Cfg; -use crate::cfg::VpcCfg; use alloc::sync::Arc; -use opte::api::DhcpCfg; use opte::api::DhcpReplyType; -use opte::api::Direction; use opte::api::Ipv4Addr; use opte::api::Ipv4PrefixLen; use opte::api::OpteError; use opte::api::SubnetRouterPair; use opte::engine::dhcp::DhcpAction; use opte::engine::ip::v4::Ipv4Cidr; -use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; -pub fn setup( - layer: &mut Layer, - cfg: &VpcCfg, +pub(super) fn setup( + ctx: &mut BuildCtx, ip_cfg: &Ipv4Cfg, - dhcp_cfg: DhcpCfg, ) -> Result<(), OpteError> { // All guest interfaces live on a `/32`-network in the Oxide VPC; // restricting the L2 domain to two nodes: the guest NIC and the @@ -65,35 +60,36 @@ pub fn setup( ); let offer = Action::Hairpin(Arc::new(DhcpAction { - client_mac: cfg.guest_mac, + client_mac: ctx.cfg.guest_mac, client_ip: ip_cfg.private_ip, subnet_prefix_len: Ipv4PrefixLen::NETMASK_ALL, - gw_mac: cfg.gateway_mac, + gw_mac: ctx.cfg.gateway_mac, gw_ip: ip_cfg.gateway_ip, reply_type: DhcpReplyType::Offer, re1, re2: Some(re2), re3: None, - dhcp_cfg: dhcp_cfg.clone(), + dhcp_cfg: ctx.cfg.dhcp.clone(), })); let ack = Action::Hairpin(Arc::new(DhcpAction { - client_mac: cfg.guest_mac, + client_mac: ctx.cfg.guest_mac, client_ip: ip_cfg.private_ip, subnet_prefix_len: Ipv4PrefixLen::NETMASK_ALL, - gw_mac: cfg.gateway_mac, + gw_mac: ctx.cfg.gateway_mac, gw_ip: ip_cfg.gateway_ip, reply_type: DhcpReplyType::Ack, re1, re2: Some(re2), re3: None, - dhcp_cfg, + dhcp_cfg: ctx.cfg.dhcp.clone(), })); let discover_rule = Rule::new(1, offer); - layer.add_rule(Direction::Out, discover_rule.finalize()); - let request_rule = Rule::new(1, ack); - layer.add_rule(Direction::Out, request_rule.finalize()); + + ctx.out_rules.push(discover_rule.finalize()); + ctx.out_rules.push(request_rule.finalize()); + Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs b/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs index 00bbec2a..273be2b3 100644 --- a/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs +++ b/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs @@ -2,28 +2,21 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! The DHCPv6 implementation of the Virtual Gateway. -use crate::cfg::VpcCfg; +use super::BuildCtx; use alloc::sync::Arc; -use opte::api::DhcpCfg; -use opte::api::Direction; use opte::api::OpteError; use opte::engine::dhcpv6::AddressInfo; use opte::engine::dhcpv6::Dhcpv6Action; use opte::engine::dhcpv6::LeasedAddress; -use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; -pub fn setup( - layer: &mut Layer, - cfg: &VpcCfg, - dhcp_cfg: DhcpCfg, -) -> Result<(), OpteError> { - let ip_cfg = match cfg.ipv6_cfg() { +pub(super) fn setup(ctx: &mut BuildCtx) -> Result<(), OpteError> { + let ip_cfg = match ctx.cfg.ipv6_cfg() { None => return Ok(()), Some(ip_cfg) => ip_cfg, }; @@ -35,15 +28,14 @@ pub fn setup( renew: u32::MAX, }; let action = Dhcpv6Action { - client_mac: cfg.guest_mac, - server_mac: cfg.gateway_mac, + client_mac: ctx.cfg.guest_mac, + server_mac: ctx.cfg.gateway_mac, addrs, sntp_servers: vec![], - dhcp_cfg, + dhcp_cfg: ctx.cfg.dhcp.clone(), }; let server = Action::Hairpin(Arc::new(action)); - let rule = Rule::new(1, server); - layer.add_rule(Direction::Out, rule.finalize()); + ctx.out_rules.push(Rule::new(1, server).finalize()); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/icmp.rs b/lib/oxide-vpc/src/engine/gateway/icmp.rs index c4c48550..d9584332 100644 --- a/lib/oxide-vpc/src/engine/gateway/icmp.rs +++ b/lib/oxide-vpc/src/engine/gateway/icmp.rs @@ -2,23 +2,20 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! The ICMP implementation of the Virtual Gateway. +use super::BuildCtx; use crate::cfg::Ipv4Cfg; -use crate::cfg::VpcCfg; use alloc::sync::Arc; -use opte::api::Direction; use opte::api::OpteError; use opte::engine::icmp::v4::IcmpEchoReply; -use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; -pub fn setup( - layer: &mut Layer, - cfg: &VpcCfg, +pub(super) fn setup( + ctx: &mut BuildCtx, ip_cfg: &Ipv4Cfg, ) -> Result<(), OpteError> { // ================================================================ @@ -27,12 +24,12 @@ pub fn setup( let reply = Action::Hairpin(Arc::new(IcmpEchoReply { // Map an Echo from guest (src) -> gateway (dst) to an Echo // Reply from gateway (dst) -> guest (src). - echo_src_mac: cfg.guest_mac, + echo_src_mac: ctx.cfg.guest_mac, echo_src_ip: ip_cfg.private_ip, - echo_dst_mac: cfg.gateway_mac, + echo_dst_mac: ctx.cfg.gateway_mac, echo_dst_ip: ip_cfg.gateway_ip, })); let rule = Rule::new(1, reply); - layer.add_rule(Direction::Out, rule.finalize()); + ctx.out_rules.push(rule.finalize()); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/icmpv6.rs b/lib/oxide-vpc/src/engine/gateway/icmpv6.rs index 0009acb1..6e2dbbb3 100644 --- a/lib/oxide-vpc/src/engine/gateway/icmpv6.rs +++ b/lib/oxide-vpc/src/engine/gateway/icmpv6.rs @@ -2,20 +2,18 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! The ICMPv6 implementation of the Virtual Gateway. +use super::BuildCtx; use crate::cfg::Ipv6Cfg; -use crate::cfg::VpcCfg; use alloc::sync::Arc; -use opte::api::Direction; use opte::api::Ipv6Addr; use opte::api::OpteError; use opte::engine::icmp::v6::Icmpv6EchoReply; use opte::engine::icmp::v6::NeighborAdvertisement; use opte::engine::icmp::v6::RouterAdvertisement; -use opte::engine::layer::Layer; use opte::engine::predicate::Predicate; use opte::engine::rule::Action; use opte::engine::rule::Rule; @@ -34,34 +32,33 @@ use smoltcp::wire::Icmpv6Message; // - Respond to NDP Neighbor Solicitations from the guest to the gateway. This // includes solicitations unicast to the gateway, and also delivered to the // solicited-node multicast group. -pub fn setup( - layer: &mut Layer, - cfg: &VpcCfg, +pub(super) fn setup( + ctx: &mut BuildCtx, ip_cfg: &Ipv6Cfg, ) -> Result<(), OpteError> { - let dst_ip = Ipv6Addr::from_eui64(&cfg.gateway_mac); + let dst_ip = Ipv6Addr::from_eui64(&ctx.cfg.gateway_mac); let hairpins = [ // We need to hairpin echo requests from either the VPC-private or // link-local address of the guest, to OPTE's link-local. Action::Hairpin(Arc::new(Icmpv6EchoReply { - src_mac: cfg.guest_mac, + src_mac: ctx.cfg.guest_mac, src_ip: ip_cfg.private_ip, - dst_mac: cfg.gateway_mac, + dst_mac: ctx.cfg.gateway_mac, dst_ip, })), Action::Hairpin(Arc::new(Icmpv6EchoReply { - src_mac: cfg.guest_mac, - src_ip: Ipv6Addr::from_eui64(&cfg.guest_mac), - dst_mac: cfg.gateway_mac, + src_mac: ctx.cfg.guest_mac, + src_ip: Ipv6Addr::from_eui64(&ctx.cfg.guest_mac), + dst_mac: ctx.cfg.gateway_mac, dst_ip, })), // Map an NDP Router Solicitation from the guest to a Router Advertisement // from the OPTE virtual gateway's link-local IPv6 address. Action::Hairpin(Arc::new(RouterAdvertisement::new( // From the guest's VPC MAC. - cfg.guest_mac, + ctx.cfg.guest_mac, // The MAC from which we respond, i.e., OPTE's MAC. - cfg.gateway_mac, + ctx.cfg.gateway_mac, // "Managed Configuration", indicating the guest needs to use DHCPv6 to // acquire an IPv6 address. true, @@ -71,9 +68,9 @@ pub fn setup( // per RFC 4861 so that the guest does not mark the neighbor failed. Action::Hairpin(Arc::new(NeighborAdvertisement::new( // From the guest's VPC MAC. - cfg.guest_mac, + ctx.cfg.guest_mac, // To OPTE's MAC. - cfg.gateway_mac, + ctx.cfg.gateway_mac, // Set the ROUTER flag to true. true, // Respond to solicitations from `::` @@ -84,11 +81,12 @@ pub fn setup( // UNWRAP SAFETY: There are far fewer than 65535 rules inserted here. let next_out_prio = u16::try_from(hairpins.len() + 1).unwrap(); // Add rules for the above actions. - hairpins.into_iter().enumerate().for_each(|(i, action)| { - let priority = u16::try_from(i + 1).unwrap(); - let rule = Rule::new(priority, action); - layer.add_rule(Direction::Out, rule.finalize()); - }); + ctx.out_rules.extend(hairpins.into_iter().enumerate().map( + |(i, action)| { + let priority = u16::try_from(i + 1).unwrap(); + Rule::new(priority, action).finalize() + }, + )); // Filter any uncaught in/out-bound NDP traffic. let pred = Predicate::Icmpv6MsgType(vec![ @@ -99,11 +97,11 @@ pub fn setup( let mut ndp_filter = Rule::new(next_out_prio, Action::Deny); ndp_filter.add_predicate(pred); - layer.add_rule(Direction::Out, ndp_filter.finalize()); + ctx.out_rules.push(ndp_filter.finalize()); let mut ndp_filter = Rule::new(1, Action::Deny); ndp_filter.add_predicate(in_pred); - layer.add_rule(Direction::In, ndp_filter.finalize()); + ctx.in_rules.push(ndp_filter.finalize()); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index b3ad7d4a..e2f410e8 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! The Oxide VPC Virtual Gateway. //! @@ -55,19 +55,22 @@ //! allow multicast packets to reach guests and rewrite the source MAC //! to the gateway MAC, similar to unicast traffic. -use crate::api::DhcpCfg; +use crate::api::AttachedSubnetConfig; use crate::api::MacAddr; +use crate::api::TransitIpConfig; use crate::cfg::Ipv4Cfg; use crate::cfg::Ipv6Cfg; use crate::cfg::VpcCfg; -use crate::engine::overlay::ACTION_META_VNI; +use crate::engine::overlay::VniTag; use crate::engine::overlay::VpcMappings; -use alloc::string::ToString; +use alloc::collections::BTreeMap; +use alloc::collections::BTreeSet; use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; use core::fmt::Display; use opte::api::Direction; +use opte::api::NoResp; use opte::api::OpteError; use opte::engine::ether::EtherMod; use opte::engine::headers::HeaderAction; @@ -78,6 +81,7 @@ use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; use opte::engine::packet::MblkPacketData; +use opte::engine::port::Port; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; use opte::engine::port::meta::ActionMeta; @@ -88,6 +92,7 @@ use opte::engine::predicate::Ipv6AddrMatch; use opte::engine::predicate::Predicate; use opte::engine::rule::Action; use opte::engine::rule::AllowOrDeny; +use opte::engine::rule::Finalized; use opte::engine::rule::GenHtResult; use opte::engine::rule::HdrTransform; use opte::engine::rule::MetaAction; @@ -103,14 +108,22 @@ pub mod icmpv6; mod transit; pub use transit::*; +use super::VpcNetwork; + pub const NAME: &str = "gateway"; +struct BuildCtx<'a> { + in_rules: Vec>, + out_rules: Vec>, + cfg: &'a VpcCfg, + vpc_meta: Arc, +} + pub fn setup( pb: &PortBuilder, cfg: &VpcCfg, vpc_mappings: Arc, ft_limit: core::num::NonZeroU32, - dhcp_cfg: &DhcpCfg, ) -> Result<(), OpteError> { // We implement the gateway as a filtering layer in order to // enforce that any traffic that makes it past this layer is @@ -128,23 +141,50 @@ pub fn setup( let mut layer = Layer::new(NAME, pb.name(), actions, ft_limit); + let mut ctx = BuildCtx { + in_rules: vec![], + out_rules: vec![], + cfg, + vpc_meta: Arc::new(VpcMeta::new(vpc_mappings)), + }; + if let Some(ipv4_cfg) = cfg.ipv4_cfg() { - setup_ipv4( - &mut layer, - cfg, - ipv4_cfg, - vpc_mappings.clone(), - dhcp_cfg.clone(), - )?; + setup_ipv4(&mut ctx, ipv4_cfg)?; } if let Some(ipv6_cfg) = cfg.ipv6_cfg() { - setup_ipv6(&mut layer, cfg, ipv6_cfg, vpc_mappings, dhcp_cfg.clone())?; + setup_ipv6(&mut ctx, ipv6_cfg)?; } + layer.set_rules(ctx.in_rules, ctx.out_rules); + pb.add_layer(layer, Pos::Before("firewall")) } +// Recreates the full set of gateway rules on a given port in response to a +// change to the set of transit IPs or overall `IpCfg`. +pub fn set_gateway_rules( + port: &Port, + vpc_mappings: Arc, +) -> Result { + let mut ctx = BuildCtx { + in_rules: vec![], + out_rules: vec![], + cfg: &port.network().cfg, + vpc_meta: Arc::new(VpcMeta::new(vpc_mappings)), + }; + + if let Some(ipv4_cfg) = ctx.cfg.ipv4_cfg() { + setup_ipv4(&mut ctx, ipv4_cfg)?; + } + + if let Some(ipv6_cfg) = ctx.cfg.ipv6_cfg() { + setup_ipv6(&mut ctx, ipv6_cfg)?; + } + + port.set_rules(NAME, ctx.in_rules, ctx.out_rules).map(|_| NoResp::default()) +} + struct RewriteSrcMac { gateway_mac: MacAddr, } @@ -177,18 +217,31 @@ impl StaticAction for RewriteSrcMac { } } -fn setup_ipv4( - layer: &mut Layer, - cfg: &VpcCfg, - ip_cfg: &Ipv4Cfg, - vpc_mappings: Arc, - dhcp_cfg: DhcpCfg, -) -> Result<(), OpteError> { - arp::setup(layer, cfg)?; - dhcp::setup(layer, cfg, ip_cfg, dhcp_cfg)?; - icmp::setup(layer, cfg, ip_cfg)?; +struct Exceptions<'a, T> { + allow_in: BTreeSet<&'a T>, + allow_out: BTreeSet<&'a T>, +} + +fn compute_exceptions<'a, T: Ord>( + attached: &'a BTreeMap, + transit: &'a BTreeMap, +) -> Exceptions<'a, T> { + let allow_in: BTreeSet<_> = attached + .keys() + .chain(transit.iter().filter_map(|(k, v)| v.allow_in.then_some(k))) + .collect(); + let allow_out: BTreeSet<_> = attached + .keys() + .chain(transit.iter().filter_map(|(k, v)| v.allow_out.then_some(k))) + .collect(); + + Exceptions { allow_in, allow_out } +} - let vpc_meta = Arc::new(VpcMeta::new(vpc_mappings)); +fn setup_ipv4(ctx: &mut BuildCtx, ip_cfg: &Ipv4Cfg) -> Result<(), OpteError> { + arp::setup(ctx)?; + dhcp::setup(ctx, ip_cfg)?; + icmp::setup(ctx, ip_cfg)?; // Outbound no-spoof rule: only allow traffic from the guest's IP and MAC. // This rule has no destination IP predicate, so it matches both unicast @@ -200,28 +253,28 @@ fn setup_ipv4( // unless the group is configured. In the future, we may want to explicitly // filter outbound multicast to only the groups configured via M2P to further // tighten spoof prevention at the gateway layer. - let mut nospoof_out = Rule::new(1000, Action::Meta(vpc_meta)); + let mut nospoof_out = Rule::new(1000, Action::Meta(ctx.vpc_meta.clone())); nospoof_out.add_predicate(Predicate::InnerSrcIp4(vec![ Ipv4AddrMatch::Exact(ip_cfg.private_ip), ])); nospoof_out.add_predicate(Predicate::InnerEtherSrc(vec![ - EtherAddrMatch::Exact(cfg.guest_mac), + EtherAddrMatch::Exact(ctx.cfg.guest_mac), ])); - layer.add_rule(Direction::Out, nospoof_out.finalize()); + ctx.out_rules.push(nospoof_out.finalize()); let mut unicast_in = Rule::new( 1000, Action::Static(Arc::new(RewriteSrcMac { - gateway_mac: cfg.gateway_mac, + gateway_mac: ctx.cfg.gateway_mac, })), ); unicast_in.add_predicate(Predicate::InnerDstIp4(vec![ Ipv4AddrMatch::Exact(ip_cfg.private_ip), ])); unicast_in.add_predicate(Predicate::InnerEtherDst(vec![ - EtherAddrMatch::Exact(cfg.guest_mac), + EtherAddrMatch::Exact(ctx.cfg.guest_mac), ])); - layer.add_rule(Direction::In, unicast_in.finalize()); + ctx.in_rules.push(unicast_in.finalize()); // Inbound IPv4 multicast - rewrite source MAC to gateway and allow let ipv4_mcast = vec![Ipv4AddrMatch::Prefix(Ipv4Cidr::MCAST)]; @@ -230,28 +283,43 @@ fn setup_ipv4( let mut mcast_in_v4 = Rule::new( 1001, Action::Static(Arc::new(RewriteSrcMac { - gateway_mac: cfg.gateway_mac, + gateway_mac: ctx.cfg.gateway_mac, })), ); mcast_in_v4.add_predicate(Predicate::InnerDstIp4(ipv4_mcast)); mcast_in_v4.add_predicate(Predicate::InnerEtherDst(vec![ EtherAddrMatch::Multicast, ])); - layer.add_rule(Direction::In, mcast_in_v4.finalize()); + ctx.in_rules.push(mcast_in_v4.finalize()); + + // Plumb in any required exceptions to spoof prevention/filtering. + let transit = ip_cfg.transit_ips.load(); + let attached = ip_cfg.attached_subnets.load(); + + let Exceptions { allow_in, allow_out } = + compute_exceptions(&attached, &transit); + + for (place, dir, from) in [ + (&mut ctx.in_rules, Direction::In, allow_in), + (&mut ctx.out_rules, Direction::Out, allow_out), + ] { + place.extend(from.into_iter().map(|cidr| { + make_holepunch_rule( + ctx.cfg.guest_mac, + ctx.cfg.gateway_mac, + (*cidr).into(), + dir, + &ctx.vpc_meta, + ) + })); + } Ok(()) } -fn setup_ipv6( - layer: &mut Layer, - cfg: &VpcCfg, - ip_cfg: &Ipv6Cfg, - vpc_mappings: Arc, - dhcp_cfg: DhcpCfg, -) -> Result<(), OpteError> { - icmpv6::setup(layer, cfg, ip_cfg)?; - dhcpv6::setup(layer, cfg, dhcp_cfg)?; - let vpc_meta = Arc::new(VpcMeta::new(vpc_mappings)); +fn setup_ipv6(ctx: &mut BuildCtx, ip_cfg: &Ipv6Cfg) -> Result<(), OpteError> { + icmpv6::setup(ctx, ip_cfg)?; + dhcpv6::setup(ctx)?; // Outbound no-spoof rule: only allow traffic from the guest's IP and MAC. // This rule has no destination IP predicate, so it matches both unicast @@ -263,42 +331,64 @@ fn setup_ipv6( // unless the group is configured. In the future, we may want to explicitly // filter outbound multicast to only the groups configured via M2P to further // tighten spoof prevention at the gateway layer. - let mut nospoof_out = Rule::new(1000, Action::Meta(vpc_meta)); + let mut nospoof_out = Rule::new(1000, Action::Meta(ctx.vpc_meta.clone())); nospoof_out.add_predicate(Predicate::InnerSrcIp6(vec![ Ipv6AddrMatch::Exact(ip_cfg.private_ip), ])); nospoof_out.add_predicate(Predicate::InnerEtherSrc(vec![ - EtherAddrMatch::Exact(cfg.guest_mac), + EtherAddrMatch::Exact(ctx.cfg.guest_mac), ])); - layer.add_rule(Direction::Out, nospoof_out.finalize()); + ctx.out_rules.push(nospoof_out.finalize()); let mut unicast_in = Rule::new( 1000, Action::Static(Arc::new(RewriteSrcMac { - gateway_mac: cfg.gateway_mac, + gateway_mac: ctx.cfg.gateway_mac, })), ); unicast_in.add_predicate(Predicate::InnerDstIp6(vec![ Ipv6AddrMatch::Exact(ip_cfg.private_ip), ])); unicast_in.add_predicate(Predicate::InnerEtherDst(vec![ - EtherAddrMatch::Exact(cfg.guest_mac), + EtherAddrMatch::Exact(ctx.cfg.guest_mac), ])); - layer.add_rule(Direction::In, unicast_in.finalize()); + ctx.in_rules.push(unicast_in.finalize()); // Inbound IPv6 multicast - rewrite source MAC to gateway and allow let ipv6_mcast = vec![Ipv6AddrMatch::Prefix(Ipv6Cidr::MCAST)]; let mut mcast_in = Rule::new( 1001, Action::Static(Arc::new(RewriteSrcMac { - gateway_mac: cfg.gateway_mac, + gateway_mac: ctx.cfg.gateway_mac, })), ); mcast_in.add_predicate(Predicate::InnerDstIp6(ipv6_mcast)); mcast_in.add_predicate(Predicate::InnerEtherDst(vec![ EtherAddrMatch::Multicast, ])); - layer.add_rule(Direction::In, mcast_in.finalize()); + ctx.in_rules.push(mcast_in.finalize()); + + // Plumb in any required exceptions to spoof prevention/filtering. + let transit = ip_cfg.transit_ips.load(); + let attached = ip_cfg.attached_subnets.load(); + + let Exceptions { allow_in, allow_out } = + compute_exceptions(&attached, &transit); + + for (place, dir, from) in [ + (&mut ctx.in_rules, Direction::In, allow_in), + (&mut ctx.out_rules, Direction::Out, allow_out), + ] { + place.extend(from.into_iter().map(|cidr| { + make_holepunch_rule( + ctx.cfg.guest_mac, + ctx.cfg.gateway_mac, + (*cidr).into(), + dir, + &ctx.vpc_meta, + ) + })); + } Ok(()) } @@ -325,8 +415,7 @@ impl MetaAction for VpcMeta { ) -> ModMetaResult { match self.vpc_mappings.ip_to_vni(&flow.dst_ip()) { Some(vni) => { - action_meta - .insert(ACTION_META_VNI.into(), vni.to_string().into()); + action_meta.insert_typed(&VniTag(vni)); Ok(AllowOrDeny::Allow(())) } diff --git a/lib/oxide-vpc/src/engine/gateway/transit.rs b/lib/oxide-vpc/src/engine/gateway/transit.rs index 9d58d3a1..f8206003 100644 --- a/lib/oxide-vpc/src/engine/gateway/transit.rs +++ b/lib/oxide-vpc/src/engine/gateway/transit.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! Utility functions to allow a port to permit traffic on an //! additional set of CIDR blocks, e.g. to enable transit for @@ -10,18 +10,20 @@ use super::*; use crate::api::RemoveCidrResp; +use crate::cfg::IpCfg; use crate::engine::VpcNetwork; +use alloc::collections::btree_map::Entry; use opte::api::IpCidr; use opte::api::NoResp; use opte::engine::port::Port; use opte::engine::rule::Finalized; -fn make_holepunch_rule( +pub(super) fn make_holepunch_rule( guest_mac: MacAddr, gateway_mac: MacAddr, dest: IpCidr, dir: Direction, - vpc_mappings: Arc, + vpc_meta: &Arc, ) -> Rule { let (cidr_in_pred, cidr_out_pred) = match dest { IpCidr::Ip4(v4) => ( @@ -48,8 +50,7 @@ fn make_holepunch_rule( cidr_in.finalize() } Direction::Out => { - let vpc_meta = Arc::new(VpcMeta::new(vpc_mappings)); - let mut cidr_out = Rule::new(1000, Action::Meta(vpc_meta)); + let mut cidr_out = Rule::new(1000, Action::Meta(vpc_meta.clone())); cidr_out.add_predicate(Predicate::InnerEtherSrc(vec![ EtherAddrMatch::Exact(guest_mac), ])); @@ -68,15 +69,7 @@ pub fn allow_cidr( dir: Direction, vpc_mappings: Arc, ) -> Result { - let rule = make_holepunch_rule( - port.mac_addr(), - port.network().cfg.gateway_mac, - dest, - dir, - vpc_mappings, - ); - port.add_rule(NAME, dir, rule)?; - Ok(NoResp::default()) + modify_cidr(port, dest, dir, vpc_mappings, true).map(|_| NoResp::default()) } /// Prevents a guest from sending/receiving traffic on a CIDR block @@ -87,22 +80,70 @@ pub fn remove_cidr( dir: Direction, vpc_mappings: Arc, ) -> Result { - let rule = make_holepunch_rule( - port.mac_addr(), - port.network().cfg.gateway_mac, - dest, - dir, - vpc_mappings, - ); + modify_cidr(port, dest, dir, vpc_mappings, false).map(|changed| { + if changed { + RemoveCidrResp::Ok(dest) + } else { + RemoveCidrResp::NotFound + } + }) +} - let maybe_id = port.find_rule(NAME, dir, &rule)?; - if let Some(id) = maybe_id { - port.remove_rule(NAME, dir, id)?; +fn modify_cidr( + port: &Port, + dest: IpCidr, + dir: Direction, + vpc_mappings: Arc, + allow: bool, +) -> Result { + let mut existing = false; + let mut remove = false; + + match (&port.network().cfg.ip_cfg, dest) { + (IpCfg::Ipv4(ipv4), IpCidr::Ip4(ipv4_cidr)) + | (IpCfg::DualStack { ipv4, .. }, IpCidr::Ip4(ipv4_cidr)) => { + ipv4.transit_ips.update(|v| { + let mut new = v.clone(); + let el = new.entry(ipv4_cidr); + existing = matches!(el, Entry::Occupied(_)); + if allow || existing { + let el = el.or_default(); + match dir { + Direction::In => el.allow_in = allow, + Direction::Out => el.allow_out = allow, + } + remove = !allow && !el.allow_in && !el.allow_out; + } + if remove { + new.remove(&ipv4_cidr); + } + Some(new) + }); + } + (IpCfg::Ipv6(ipv6), IpCidr::Ip6(ipv6_cidr)) + | (IpCfg::DualStack { ipv6, .. }, IpCidr::Ip6(ipv6_cidr)) => { + ipv6.transit_ips.update(|v| { + let mut new = v.clone(); + let el = new.entry(ipv6_cidr); + existing = matches!(el, Entry::Occupied(_)); + if allow || existing { + let el = el.or_default(); + match dir { + Direction::In => el.allow_in = allow, + Direction::Out => el.allow_out = allow, + } + remove = !allow && !el.allow_in && !el.allow_out; + } + if remove { + new.remove(&ipv6_cidr); + } + Some(new) + }); + } + _ => return Err(OpteError::InvalidIpCfg), } - Ok(if maybe_id.is_none() { - RemoveCidrResp::NotFound - } else { - RemoveCidrResp::Ok(dest) - }) + super::set_gateway_rules(port, vpc_mappings)?; + + Ok(existing) } diff --git a/lib/oxide-vpc/src/engine/geneve.rs b/lib/oxide-vpc/src/engine/geneve.rs index f26a2fd1..19ed2b01 100644 --- a/lib/oxide-vpc/src/engine/geneve.rs +++ b/lib/oxide-vpc/src/engine/geneve.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! Geneve option types specific to the Oxide VPC dataplane. //! @@ -532,7 +532,7 @@ mod test { // Build a minimal packet with just one Multicast option #[rustfmt::skip] - let buf = vec![ + let buf = [ // UDP source 0x1E, 0x61, // UDP dest diff --git a/lib/oxide-vpc/src/engine/nat.rs b/lib/oxide-vpc/src/engine/nat.rs index 9f39d66a..473b63ba 100644 --- a/lib/oxide-vpc/src/engine/nat.rs +++ b/lib/oxide-vpc/src/engine/nat.rs @@ -2,13 +2,19 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use super::VpcNetwork; +use super::gateway; +use super::overlay::VpcMappings; use super::router::ROUTER_LAYER_NAME; use super::router::RouterTargetClass; use super::router::RouterTargetInternal; +use crate::api::AttachSubnetReq; +use crate::api::DetachSubnetReq; +use crate::api::DetachSubnetResp; use crate::api::ExternalIpCfg; +use crate::api::InternetGatewayMap; use crate::api::SetExternalIpsReq; use crate::cfg::IpCfg; use crate::cfg::Ipv4Cfg; @@ -21,6 +27,7 @@ use alloc::sync::Arc; use alloc::vec::Vec; use core::num::NonZeroU32; use opte::api::IpAddr; +use opte::api::IpCidr; use opte::api::Ipv4Addr; use opte::api::Ipv6Addr; use opte::api::OpteError; @@ -30,6 +37,7 @@ use opte::engine::ether::ETHER_TYPE_IPV6; use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; +use opte::engine::nat::ExternalIpTagger; use opte::engine::nat::InboundNat; use opte::engine::nat::OutboundNat; use opte::engine::nat::VerifyAddr; @@ -49,6 +57,7 @@ use opte::engine::snat::SNat; use uuid::Uuid; pub const NAT_LAYER_NAME: &str = "nat"; +const EXTERNAL_ATTACHED_SUBNET_PRIORITY: u16 = 4; const FLOATING_ONE_TO_ONE_NAT_PRIORITY: u16 = 5; const EPHEMERAL_ONE_TO_ONE_NAT_PRIORITY: u16 = 10; const SNAT_PRIORITY: u16 = 100; @@ -115,25 +124,15 @@ pub fn setup( #[allow(clippy::type_complexity)] fn create_nat_rules( cfg: &VpcCfg, - inet_gw_map: Option>>, + inet_gw_map: Option<&InternetGatewayMap>, ) -> Result<(Vec>, Vec>), OpteError> { let mut in_rules = vec![]; let mut out_rules = vec![]; if let Some(ipv4_cfg) = cfg.ipv4_cfg() { - setup_ipv4_nat( - ipv4_cfg, - &mut in_rules, - &mut out_rules, - inet_gw_map.as_ref(), - )?; + setup_ipv4_nat(ipv4_cfg, &mut in_rules, &mut out_rules, inet_gw_map)?; } if let Some(ipv6_cfg) = cfg.ipv6_cfg() { - setup_ipv6_nat( - ipv6_cfg, - &mut in_rules, - &mut out_rules, - inet_gw_map.as_ref(), - )?; + setup_ipv6_nat(ipv6_cfg, &mut in_rules, &mut out_rules, inet_gw_map)?; } // Append an additional rule to drop any InternetGateway packets @@ -142,9 +141,8 @@ fn create_nat_rules( // internet gateways but have no valid source address on a selected // IGW. let mut out_igw_nat_miss = Rule::new(NO_EIP_PRIORITY, Action::Deny); - out_igw_nat_miss.add_predicate(Predicate::Meta( - RouterTargetClass::KEY.to_string(), - RouterTargetClass::InternetGateway.as_meta().into_owned(), + out_igw_nat_miss.add_predicate(Predicate::from_action_meta( + RouterTargetClass::InternetGateway, )); out_rules.push(out_igw_nat_miss.finalize()); @@ -166,6 +164,37 @@ fn setup_ipv4_nat( let in_nat = Arc::new(InboundNat::new(ip_cfg.private_ip, verifier.clone())); let external_cfg = ip_cfg.external_ips.load(); + let attached_subnets: Vec<_> = ip_cfg + .attached_subnets + .load() + .iter() + .filter_map(|(k, v)| v.is_external.then_some(Ipv4AddrMatch::Prefix(*k))) + .collect(); + + if !attached_subnets.is_empty() { + // Use of this rule implicitly requires that we have selected *an* + // InternetGateway routing target by the time we reach the overlay layer. + // Don't match on the RouterTargetClass as a predicate here, as we need + // to record that a known EIP was used as a source. + let mut out_subnet = Rule::new( + EXTERNAL_ATTACHED_SUBNET_PRIORITY, + Action::Meta(Arc::new(ExternalIpTagger)), + ); + out_subnet + .add_predicate(Predicate::InnerSrcIp4(attached_subnets.clone())); + out_rules.push(out_subnet.finalize()); + + // Inbound rules here aren't *strictly* necessary, as the control plane + // should not be assigning us EIPs which overlap with these subnets. + // We would then fall through to the default `Allow`. + // + // Install these as belts and braces, regardless. + let mut in_subnet = + Rule::new(EXTERNAL_ATTACHED_SUBNET_PRIORITY, Action::Allow); + in_subnet.add_predicate(Predicate::InnerDstIp4(attached_subnets)); + in_rules.push(in_subnet.finalize()); + } + // Outbound IP selection needs to be gated upon which internet gateway was // chosen during routing. // We need to partition FIPs into separate lists based on which internet gateway @@ -324,6 +353,37 @@ fn setup_ipv6_nat( let in_nat = Arc::new(InboundNat::new(ip_cfg.private_ip, verifier.clone())); let external_cfg = ip_cfg.external_ips.load(); + let attached_subnets: Vec<_> = ip_cfg + .attached_subnets + .load() + .iter() + .filter_map(|(k, v)| v.is_external.then_some(Ipv6AddrMatch::Prefix(*k))) + .collect(); + + if !attached_subnets.is_empty() { + // Use of this rule implicitly requires that we have selected *an* + // InternetGateway routing target by the time we reach the overlay layer. + // Don't match on the RouterTargetClass as a predicate here, as we need + // to record that a known EIP was used as a source. + let mut out_subnet = Rule::new( + EXTERNAL_ATTACHED_SUBNET_PRIORITY, + Action::Meta(Arc::new(ExternalIpTagger)), + ); + out_subnet + .add_predicate(Predicate::InnerSrcIp6(attached_subnets.clone())); + out_rules.push(out_subnet.finalize()); + + // Inbound rules here aren't *strictly* necessary, as the control plane + // should not be assigning us EIPs which overlap with these subnets. + // We would then fall through to the default `Allow`. + // + // Install these as belts and braces, regardless. + let mut in_subnet = + Rule::new(EXTERNAL_ATTACHED_SUBNET_PRIORITY, Action::Allow); + in_subnet.add_predicate(Predicate::InnerDstIp6(attached_subnets)); + in_rules.push(in_subnet.finalize()); + } + // See `setup_ipv4_nat` for an explanation on partitioning FIPs // by internet gateway ID. if !external_cfg.floating_ips.is_empty() { @@ -465,11 +525,11 @@ fn setup_ipv6_nat( Ok(()) } -pub fn set_nat_rules( - cfg: &VpcCfg, +pub fn set_external_ips( port: &Port, req: SetExternalIpsReq, ) -> Result<(), OpteError> { + let cfg = &port.network().cfg; // This procedure only holds one lock at a time: a `Dynamic`'s shared // space writelock, *or* the table lock via set_rules_soft. // The datapath will hold the table lock for processing, *and* the `Dynamic`'s @@ -497,6 +557,108 @@ pub fn set_nat_rules( _ => return Err(OpteError::InvalidIpCfg), } - let (in_rules, out_rules) = create_nat_rules(cfg, req.inet_gw_map)?; + refresh_nat_rules(port, req.inet_gw_map.as_ref()) +} + +pub fn attach_subnet( + port: &Port, + inet_gw_map: Option<&InternetGatewayMap>, + vpc_mappings: &Arc, + req: AttachSubnetReq, +) -> Result<(), OpteError> { + let cfg = &port.network().cfg; + let changed = match (req.cidr, &cfg.ip_cfg) { + (IpCidr::Ip4(v4), IpCfg::Ipv4(v4_cfg)) + | (IpCidr::Ip4(v4), IpCfg::DualStack { ipv4: v4_cfg, .. }) => { + v4_cfg.attached_subnets.update(|map| { + let install = if let Some(val) = map.get(&v4) { + val != &req.cfg + } else { + true + }; + install.then(|| { + let mut out = map.clone(); + out.insert(v4, req.cfg); + out + }) + }) + } + (IpCidr::Ip6(v6), IpCfg::Ipv6(v6_cfg)) + | (IpCidr::Ip6(v6), IpCfg::DualStack { ipv6: v6_cfg, .. }) => { + v6_cfg.attached_subnets.update(|map| { + let install = if let Some(val) = map.get(&v6) { + val != &req.cfg + } else { + true + }; + install.then(|| { + let mut out = map.clone(); + out.insert(v6, req.cfg); + out + }) + }) + } + // Trying to attach a CIDR class which this port cannot use. + _ => return Err(OpteError::InvalidIpCfg), + }; + + if changed { + refresh_nat_rules(port, inet_gw_map)?; + gateway::set_gateway_rules(port, vpc_mappings.clone())?; + } + + Ok(()) +} + +pub fn detach_subnet( + port: &Port, + inet_gw_map: Option<&InternetGatewayMap>, + vpc_mappings: &Arc, + req: DetachSubnetReq, +) -> Result { + let cfg = &port.network().cfg; + let changed = match (req.cidr, &cfg.ip_cfg) { + (IpCidr::Ip4(v4), IpCfg::Ipv4(v4_cfg)) + | (IpCidr::Ip4(v4), IpCfg::DualStack { ipv4: v4_cfg, .. }) => { + v4_cfg.attached_subnets.update(|map| { + map.contains_key(&v4).then(|| { + let mut out = map.clone(); + out.remove(&v4); + out + }) + }) + } + (IpCidr::Ip6(v6), IpCfg::Ipv6(v6_cfg)) + | (IpCidr::Ip6(v6), IpCfg::DualStack { ipv6: v6_cfg, .. }) => { + v6_cfg.attached_subnets.update(|map| { + map.contains_key(&v6).then(|| { + let mut out = map.clone(); + out.remove(&v6); + out + }) + }) + } + // Trying to attach a CIDR class which this port cannot use. + _ => return Err(OpteError::InvalidIpCfg), + }; + + if changed { + refresh_nat_rules(port, inet_gw_map)?; + gateway::set_gateway_rules(port, vpc_mappings.clone())?; + } + + Ok(if !changed { + DetachSubnetResp::NotFound + } else { + DetachSubnetResp::Ok(req.cidr) + }) +} + +fn refresh_nat_rules( + port: &Port, + inet_gw_map: Option<&InternetGatewayMap>, +) -> Result<(), OpteError> { + let cfg = &port.network().cfg; + let (in_rules, out_rules) = create_nat_rules(cfg, inet_gw_map)?; port.set_rules_soft(NAT_LAYER_NAME, in_rules, out_rules) } diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index e4a356e8..979ddd77 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! The Oxide Network VPC Overlay. //! @@ -24,10 +24,12 @@ use crate::engine::geneve::ValidOxideOption; use alloc::borrow::Cow; use alloc::collections::BTreeSet; use alloc::collections::btree_map::BTreeMap; +use alloc::string::String; use alloc::string::ToString; use alloc::sync::Arc; use alloc::vec::Vec; use core::fmt; +use core::str::FromStr; use opte::api::Direction; use opte::api::Ipv4Addr; use opte::api::Ipv4Cidr; @@ -58,6 +60,7 @@ use opte::engine::ip::v6::Ipv6Push; use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; +use opte::engine::nat::ExternalIpTag; use opte::engine::packet::InnerFlowId; use opte::engine::packet::MblkPacketData; use opte::engine::port::PortBuilder; @@ -268,28 +271,36 @@ impl StaticAction for EncapAction { // The router layer determines a RouterTarget and stores it in // the meta map. We need to map this virtual target to a // physical one. - let target_str = match action_meta.get(RouterTargetInternal::IP_KEY) - { - Some(val) => val, - None => { - return Err(GenHtError::Unexpected { - msg: "no RouterTarget metadata entry found".to_string(), - }); - } - }; + let target = action_meta + .get_typed::() + .map_err(|e| GenHtError::Unexpected { msg: e.to_string() })?; - let target = RouterTargetInternal::from_meta(target_str).map_err( - |e| GenHtError::Unexpected { - msg: format!( - "failed to parse metadata entry '{target_str}': {e}", - ), - }, - )?; + let sent_from_eip = + action_meta.get_typed::().is_ok(); + + let recipient = match target { + RouterTargetInternal::Ip(virt_ip) => virt_ip, + _ => dst_ip, + }; match target { + // Currently, traffic directed at either attached external subnets or + // the external IPs of any other port always go through the V2B table. + // This requires a hairpin through the customer network, but provides + // strong isolation which some customers require. + // + // In future we want this to be a tunable property of the VPC. In this + // case we would require an extra table/poptrie per VPC, containing all + // external CIDR blocks visible across the VPC. We would then: + // * resolve `recipient` against this table when going via an IGW, + // pulling the address of the owner's primary NIC. + // * if found, resolve the primary NIC address against the V2P instead of + // the V2B. + // * Possibly add the Geneve external packet tag to the packet, esp. if + // crossing VPC boundaries. RouterTargetInternal::InternetGateway(_) => { - match self.v2b.get(&dst_ip) { - Some(phys) => { + match self.v2b.get(&recipient) { + Some(phys) if sent_from_eip => { // Hash the packet onto a route target. This is a very // rudimentary mechanism. Should level-up to an ECMP // algorithm with well known statistical properties. @@ -309,13 +320,17 @@ impl StaticAction for EncapAction { false, ) } - None => return Ok(AllowOrDeny::Deny), + + // Sending traffic to boundary services *requires* that + // it is originated from an external IP. + _ => return Ok(AllowOrDeny::Deny), } } - RouterTargetInternal::Ip(virt_ip) => { - match self.v2p.get(&virt_ip) { - Some(phys) => ( + RouterTargetInternal::Ip(_) + | RouterTargetInternal::VpcSubnet(_) => { + match self.v2p.get(&recipient) { + Some(phys) if !sent_from_eip => ( true, PhysNet { ether: phys.ether, @@ -325,9 +340,14 @@ impl StaticAction for EncapAction { false, ), - // The router target has specified a VPC IP we do not - // currently know about; this could be for two - // reasons: + // We have either attempted to forward traffic to a + // private IP/subnet from an external IP, or we failed + // to lookup the intended VPC IP. + // + // The former case can only occur when the guest is + // sending traffic from an attached external subnet. + // + // The latter case could arise for two reasons: // // 1. No such IP currently exists in the guest's VPC. // @@ -339,39 +359,7 @@ impl StaticAction for EncapAction { // the control plane; rather we drop the packet. If we // are dealing with scenario (2), the control plane // should eventually provide us with a mapping. - None => return Ok(AllowOrDeny::Deny), - } - } - - RouterTargetInternal::VpcSubnet(_) => { - match self.v2p.get(&flow_id.dst_ip()) { - Some(phys) => ( - true, - PhysNet { - ether: phys.ether, - ip: phys.ip, - vni: self.vni, - }, - false, - ), - - // The guest is attempting to contact a VPC IP we - // do not currently know about; this could be for - // two reasons: - // - // 1. No such IP currently exists in the guest's VPC. - // - // 2. The destination IP exists in the guest's - // VPC, but we do not yet have a mapping for - // it. - // - // We cannot differentiate these cases from the - // point of view of this code without more - // information from the control plane; rather we - // drop the packet. If we are dealing with - // scenario (2), the control plane should - // eventually provide us with a mapping. - None => return Ok(AllowOrDeny::Deny), + _ => return Ok(AllowOrDeny::Deny), } } } @@ -527,6 +515,23 @@ impl StaticAction for EncapAction { } } +/// Tag a packet with the VNI it will be sent on, or that was recorded in +/// encapsulation. +#[derive(Debug)] +pub(crate) struct VniTag(pub Vni); + +impl ActionMetaValue for VniTag { + const KEY: &'static str = "vni"; + + fn as_meta(&self) -> Cow<'static, str> { + self.0.to_string().into() + } + + fn from_meta(s: &str) -> Result { + Vni::from_str(s).map_err(|e| e.to_string()).map(Self) + } +} + #[derive(Default)] pub struct DecapAction {} @@ -544,8 +549,6 @@ impl fmt::Display for DecapAction { } } -pub const ACTION_META_VNI: &str = "vni"; - impl StaticAction for DecapAction { fn gen_ht( &self, @@ -588,7 +591,7 @@ impl StaticAction for DecapAction { // switch during NAT -- if found, `oxide_external_packet` // is filled. if !is_external { - action_meta.insert(ACTION_META_VNI.into(), vni.to_string().into()); + action_meta.insert_typed(&VniTag(vni)); } Ok(AllowOrDeny::Allow(HdrTransform { @@ -640,10 +643,7 @@ impl MetaAction for MulticastVniValidator { } // Check VNI from action metadata (set by DecapAction) - if let Some(vni_str) = action_meta.get(ACTION_META_VNI) - && let Ok(vni_val) = vni_str.parse::() - && let Ok(pkt_vni) = Vni::new(vni_val) - { + if let Ok(VniTag(pkt_vni)) = action_meta.get_typed() { let mcast_vni = Vni::new(DEFAULT_MULTICAST_VNI).unwrap(); // Allow if VNI matches this VPC or fleet-wide multicast VNI if pkt_vni == self.my_vni || pkt_vni == mcast_vni { diff --git a/lib/oxide-vpc/src/engine/router.rs b/lib/oxide-vpc/src/engine/router.rs index 6f03f892..3f8b823d 100644 --- a/lib/oxide-vpc/src/engine/router.rs +++ b/lib/oxide-vpc/src/engine/router.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! The Oxide Network VPC Router. //! @@ -68,17 +68,6 @@ pub enum RouterTargetInternal { } impl RouterTargetInternal { - pub const IP_KEY: &'static str = "router-target-ip"; - pub const GENERIC_META: &'static str = "ig"; - - pub fn generic_meta(&self) -> Cow<'static, str> { - Self::GENERIC_META.into() - } - - pub fn ip_key(&self) -> Cow<'static, str> { - Self::IP_KEY.into() - } - pub fn class(&self) -> RouterTargetClass { match self { RouterTargetInternal::InternetGateway(_) => { @@ -487,13 +476,8 @@ impl MetaAction for RouterAction { _flow_id: &InnerFlowId, meta: &mut ActionMeta, ) -> ModMetaResult { - // TODO: I don't think we need IP_KEY. - if let RouterTargetInternal::InternetGateway(_) = self.target { - meta.insert(self.target.key(), self.target.as_meta()); - } - meta.insert(self.target.ip_key(), self.target.as_meta()); - let rt_class = self.target.class(); - meta.insert(rt_class.key(), rt_class.as_meta()); + meta.insert_typed(&self.target); + meta.insert_typed(&self.target.class()); Ok(AllowOrDeny::Allow(())) } } diff --git a/lib/oxide-vpc/tests/firewall_tests.rs b/lib/oxide-vpc/tests/firewall_tests.rs index 0be752fe..2aeca0f6 100644 --- a/lib/oxide-vpc/tests/firewall_tests.rs +++ b/lib/oxide-vpc/tests/firewall_tests.rs @@ -8,7 +8,6 @@ use opte::ddi::mblk::MsgBlk; use opte_test_utils as common; use common::*; -use oxide_vpc::api::BOUNDARY_SERVICES_VNI; #[test] fn firewall_replace_rules() { @@ -348,11 +347,6 @@ fn firewall_external_inbound() { // // This will appear on the same VNI as guest. // ================================================================ - let bsvc_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; let guest_phys = TestIpPhys { ip: g1_cfg.phys_ip, mac: g1_cfg.guest_mac, @@ -365,7 +359,7 @@ fn firewall_external_inbound() { g1_cfg.guest_mac, g1_cfg.ipv4().private_ip, ); - pkt1_m = encap_external(pkt1_m, bsvc_phys, guest_phys); + pkt1_m = encap_external(pkt1_m, *BSVC_PHYS, guest_phys); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); // ================================================================ diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 7ff51ef6..c33039e2 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! Integration tests. //! @@ -60,7 +60,10 @@ use opte::ingot::types::HeaderParse; use opte::ingot::udp::Udp; use opte::ingot::udp::UdpRef; use opte_test_utils as common; +use oxide_vpc::api::AttachSubnetReq; +use oxide_vpc::api::AttachedSubnetConfig; use oxide_vpc::api::BOUNDARY_SERVICES_VNI; +use oxide_vpc::api::DetachSubnetReq; use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::FirewallRule; use oxide_vpc::api::RouterClass; @@ -105,6 +108,8 @@ fn lab_cfg() -> VpcCfg { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }); VpcCfg { ip_cfg, @@ -121,6 +126,7 @@ fn lab_cfg() -> VpcCfg { phys_ip: Ipv6Addr::from([ 0xFD00, 0x0000, 0x00F7, 0x0101, 0x0000, 0x0000, 0x0000, 0x0001, ]), + dhcp: base_dhcp_config(), } } @@ -1018,6 +1024,8 @@ fn multi_external_setup( ephemeral_ip: v4_eph, floating_ips: v4s[first_float..].to_vec(), }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, ipv6: Ipv6Cfg { vpc_subnet: "fd00::/64".parse().unwrap(), @@ -1031,6 +1039,8 @@ fn multi_external_setup( ephemeral_ip: v6_eph, floating_ips: v6s[first_float..].to_vec(), }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, }; @@ -1094,11 +1104,6 @@ fn check_external_ip_inbound_behaviour( ext_v4: &[Ipv4Addr], ext_v6: &[Ipv6Addr], ) { - let bsvc_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; let g1_phys = TestIpPhys { ip: cfg.phys_ip, mac: cfg.guest_mac, vni: cfg.vni }; @@ -1129,7 +1134,7 @@ fn check_external_ip_inbound_behaviour( flow_port, 80, ); - let mut pkt1_m = encap_external(pkt1, bsvc_phys, g1_phys); + let mut pkt1_m = encap_external(pkt1, *BSVC_PHYS, g1_phys); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); let res = port.port.process(In, pkt1); @@ -1349,11 +1354,6 @@ fn external_ip_balanced_over_floating_ips() { #[test] fn external_ip_epoch_affinity_preserved() { let (mut g1, g1_cfg, ext_v4, ext_v6) = multi_external_ip_setup(2, true); - let bsvc_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; let g1_phys = TestIpPhys { ip: g1_cfg.phys_ip, mac: g1_cfg.guest_mac, @@ -1405,7 +1405,7 @@ fn external_ip_epoch_affinity_preserved() { }; let pkt1 = http_syn2(BS_MAC_ADDR, partner_ip, g1_cfg.guest_mac, ext_ip); - let mut pkt1_m = encap_external(pkt1, bsvc_phys, g1_phys); + let mut pkt1_m = encap_external(pkt1, *BSVC_PHYS, g1_phys); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); let res = g1.port.process(In, pkt1); @@ -1425,8 +1425,8 @@ fn external_ip_epoch_affinity_preserved() { // Bumping epoch on other layers (e.g., firewall) is typically fine, // since that won't affect the internal flowtable for NAT. // ==================================================================== - nat::set_nat_rules(&g1.cfg, &g1.port, req.clone()).unwrap(); - update!(g1, ["incr:epoch", "set:nat.rules.in=4, nat.rules.out=7",]); + nat::set_external_ips(&g1.port, req.clone()).unwrap(); + update!(g1, ["incr:epoch", "set:nat.rules.in=4, nat.rules.out=7"]); // ================================================================ // The reply packet must still originate from the ephemeral port @@ -1499,7 +1499,7 @@ fn external_ip_reconfigurable() { // based on destination prefix. inet_gw_map: None, }; - nat::set_nat_rules(&g1.cfg, &g1.port, req).unwrap(); + nat::set_external_ips(&g1.port, req).unwrap(); update!( g1, [ @@ -1764,12 +1764,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { mac: g1_cfg.guest_mac, vni: g1_cfg.vni, }; - let bsvc_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - pkt2_m = encap_external(pkt2_m, bsvc_phys, g1_phys); + pkt2_m = encap_external(pkt2_m, *BSVC_PHYS, g1_phys); pcap.add_pkt(&pkt2_m); let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); @@ -1824,7 +1819,7 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { &data[..], 2, ); - pkt4_m = encap_external(pkt4_m, bsvc_phys, g1_phys); + pkt4_m = encap_external(pkt4_m, *BSVC_PHYS, g1_phys); pcap.add_pkt(&pkt4_m); let pkt4 = parse_inbound(&mut pkt4_m, VpcParser {}).unwrap(); @@ -3720,6 +3715,8 @@ fn ephemeral_ip_preferred_over_snat_outbound() { ephemeral_ip: Some("10.60.1.20".parse().unwrap()), floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, ipv6: Ipv6Cfg { vpc_subnet: "fd00::/64".parse().unwrap(), @@ -3733,6 +3730,8 @@ fn ephemeral_ip_preferred_over_snat_outbound() { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, }; @@ -3812,6 +3811,8 @@ fn tcp_inbound() { ephemeral_ip: Some("10.60.1.20".parse().unwrap()), floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, ipv6: Ipv6Cfg { vpc_subnet: "fd00::/64".parse().unwrap(), @@ -3825,6 +3826,8 @@ fn tcp_inbound() { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, }; @@ -4377,12 +4380,306 @@ fn port_as_router_target() { let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); let res = g2.port.process(Out, pkt2); - incr!(g2, ["stats.port.out_modified, stats.port.out_uft_miss, uft.out",]); + incr!(g2, ["stats.port.out_modified, stats.port.out_uft_miss, uft.out"]); expect_modified!(res, pkt2_m); let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap(); let res = g1.port.process(In, pkt2); expect_modified!(res, pkt2_m); + + // Removing CIDR blocks should piecewise remove the gateway rules. + gateway::remove_cidr(&g2.port, cidr, Direction::In, g2.vpc_map.clone()) + .unwrap(); + update!(g2, ["incr:epoch", "decr:gateway.rules.in"]); + gateway::remove_cidr(&g2.port, cidr, Direction::Out, g2.vpc_map.clone()) + .unwrap(); + update!(g2, ["incr:epoch", "decr:gateway.rules.out"]); +} + +// RFD 599 defines two mechanisms relating to attaching subnets to +// instances: attached external and attached VPC subnets. +// Both of these require in/out exceptions in the gateway layer, but differ +// on some points: +// - Attached VPC subnets require the control plane to insert a system +// router rule mapping cidr(subnet)->primary_ip(instance) on all other ports. +// This is the moral equivalent of the `port_as_router_target` test above, +// without manual user configuration. What we want to test here is how they +// differ, and that rules do not interfere with transit IPs bound to the +// same blocks. +// - Attached external subnets should exempt any matching inbound traffic +// from undergoing NAT, and must ensure that outbound traffic cannot be +// directly sent to a VPC-private address. +#[test] +fn internal_attached_subnets() { + let g1_cfg = g1_cfg(); + let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); + g1.port.start(); + set!(g1, "port_state=running"); + + // Attach the subnet. + let cidr = "10.0.0.0/8".parse().unwrap(); + nat::attach_subnet( + &g1.port, + None, + &g1.vpc_map, + AttachSubnetReq { + port_name: g1.port.name().into(), + cidr, + cfg: AttachedSubnetConfig { is_external: false }, + }, + ) + .unwrap(); + + update!(g1, ["set:epoch=5", "incr:gateway.rules.in, gateway.rules.out"]); + + // Suppose there is another port (same non-attached subnet) on G1's node. + let partner_ip: Ipv4Addr = "172.30.0.6".parse().unwrap(); + g1.vpc_map.add(partner_ip.into(), g1_cfg.phys_addr()); + + let my_ip = "10.0.123.45".parse().unwrap(); + + let data = b"1234\0"; + + // We can receive traffic on this attached subnet. + let guest_phys = TestIpPhys { + ip: g1_cfg.phys_ip, + mac: g1_cfg.guest_mac, + vni: g1_cfg.vni, + }; + let partner_phys = TestIpPhys { + ip: g1_cfg.phys_ip, + mac: ox_vpc_mac([0xF0, 0x00, 0x66]), + vni: g1_cfg.vni, + }; + let mut pkt1_m = gen_icmpv4_echo_req( + partner_phys.mac, + g1_cfg.guest_mac, + partner_ip, + my_ip, + 7777, + 1, + data, + 1, + ); + pkt1_m = encap(pkt1_m, partner_phys, guest_phys); + + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); + incr!( + g1, + [ + "firewall.flows.in, firewall.flows.out", + "stats.port.in_modified, stats.port.in_uft_miss, uft.in", + ] + ); + + // And we can send traffic from an arbitrary IP in the subnet. + let mut pkt2_m = gen_icmpv4_echo_reply( + g1_cfg.guest_mac, + g1_cfg.gateway_mac, + my_ip, + partner_ip, + 7777, + 1, + data, + 1, + ); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); + incr!(g1, ["stats.port.out_modified, stats.port.out_uft_miss, uft.out"]); + + // Add/remove of an identical transit IP range should be a NO-OP. + // (`incr` here implicitly asserts that the gateway rule count is unchanged). + gateway::allow_cidr(&g1.port, cidr, Direction::In, g1.vpc_map.clone()) + .unwrap(); + gateway::allow_cidr(&g1.port, cidr, Direction::Out, g1.vpc_map.clone()) + .unwrap(); + incr!(g1, ["epoch, epoch"]); + gateway::remove_cidr(&g1.port, cidr, Direction::In, g1.vpc_map.clone()) + .unwrap(); + gateway::remove_cidr(&g1.port, cidr, Direction::Out, g1.vpc_map.clone()) + .unwrap(); + incr!(g1, ["epoch, epoch"]); + + // ...until we remove the attachment itself. + nat::detach_subnet( + &g1.port, + None, + &g1.vpc_map, + DetachSubnetReq { port_name: g1.port.name().into(), cidr }, + ) + .unwrap(); + update!(g1, ["set:epoch=11", "decr:gateway.rules.in, gateway.rules.out"]); +} + +#[test] +fn external_attached_subnets_dont_apply_nat() { + let g1_cfg = g1_cfg(); + let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); + + g1.port.start(); + set!(g1, "port_state=running"); + + // Attach the subnet. + nat::attach_subnet( + &g1.port, + None, + &g1.vpc_map, + AttachSubnetReq { + port_name: g1.port.name().into(), + cidr: "8.0.0.0/8".parse().unwrap(), + cfg: AttachedSubnetConfig { is_external: true }, + }, + ) + .unwrap(); + + update!( + g1, + [ + "set:epoch=5", + "incr:gateway.rules.in, gateway.rules.out", + "incr:nat.rules.in, nat.rules.out" + ] + ); + + // Add default route. + router::add_entry( + &g1.port, + IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + RouterTarget::InternetGateway(None), + RouterClass::System, + ) + .unwrap(); + incr!(g1, ["epoch", "router.rules.out"]); + + let my_ext_ip = "8.8.8.8".parse().unwrap(); + let partner_ip = "1.1.1.1".parse().unwrap(); + + let data = b"1234\0"; + + // Have the guest receive a packet on an external IP in its owned + // 8.0.0.0/8 range. + let guest_phys = TestIpPhys { + ip: g1_cfg.phys_ip, + mac: g1_cfg.guest_mac, + vni: g1_cfg.vni, + }; + let mut pkt1_m = gen_icmpv4_echo_req( + BS_MAC_ADDR, + g1_cfg.guest_mac, + partner_ip, + my_ext_ip, + 7777, + 1, + data, + 1, + ); + pkt1_m = encap_external(pkt1_m, *BSVC_PHYS, guest_phys); + + let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(In, pkt1); + expect_modified!(res, pkt1_m); + incr!( + g1, + [ + "firewall.flows.in, firewall.flows.out", + "stats.port.in_modified, stats.port.in_uft_miss, uft.in", + ] + ); + + // This packet must not have had its source/dest IP addresses altered. + let pkt1 = + parse_outbound(&mut pkt1_m, VpcParser {}).unwrap().to_full_meta(); + assert_eq!(pkt1.meta().inner_ip4().unwrap().source(), partner_ip); + assert_eq!(pkt1.meta().inner_ip4().unwrap().destination(), my_ext_ip); + + // A reply packet from the guest on these IPs should also be unchanged, + // and must be directed at boundary services. + let mut pkt2_m = gen_icmpv4_echo_reply( + g1_cfg.guest_mac, + g1_cfg.gateway_mac, + my_ext_ip, + partner_ip, + 7777, + 1, + data, + 1, + ); + let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt2); + expect_modified!(res, pkt2_m); + incr!(g1, ["stats.port.out_modified, stats.port.out_uft_miss, uft.out"]); + let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); + let L3::Ipv6(outer_ip6) = pkt2.meta().outer_ip().unwrap() else { + panic!("Encapsulation must be IPv6."); + }; + assert_eq!(outer_ip6.source(), g1_cfg.phys_ip); + assert_eq!(outer_ip6.destination(), BSVC_PHYS.ip); + assert_eq!(pkt2.meta().inner_ip4().unwrap().source(), my_ext_ip); + assert_eq!(pkt2.meta().inner_ip4().unwrap().destination(), partner_ip); +} + +#[test] +fn external_attached_subnets_cannot_reach_internal() { + let g1_cfg = g1_cfg(); + let mut g1 = oxide_net_setup("g1_port", &g1_cfg, None, None); + + g1.port.start(); + set!(g1, "port_state=running"); + + // Attach the subnet. + nat::attach_subnet( + &g1.port, + None, + &g1.vpc_map, + AttachSubnetReq { + port_name: g1.port.name().into(), + cidr: "8.0.0.0/8".parse().unwrap(), + cfg: AttachedSubnetConfig { is_external: true }, + }, + ) + .unwrap(); + + update!( + g1, + [ + "set:epoch=5", + "incr:gateway.rules.in, gateway.rules.out", + "incr:nat.rules.in, nat.rules.out" + ] + ); + + // Suppose there is another port (same non-attached subnet) on G1's node. + let partner_ip: Ipv4Addr = "172.30.0.6".parse().unwrap(); + g1.vpc_map.add(partner_ip.into(), g1_cfg.phys_addr()); + + let my_ext_ip = "8.8.8.8".parse().unwrap(); + + let data = b"1234\0"; + + // Have the guest attempt to sent a packet from an external IP in its owned + // 8.0.0.0/8 range to a VPC-private address. As the source address is + // logically outside of the VPC-private scope we need to refuse to select a + // V2P mapping. + let mut pkt1_m = gen_icmpv4_echo_req( + g1_cfg.guest_mac, + g1_cfg.gateway_mac, + my_ext_ip, + partner_ip, + 7777, + 1, + data, + 1, + ); + + let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); + let res = g1.port.process(Out, pkt1); + assert_drop!( + res, + DropReason::Layer { name: "overlay", reason: DenyReason::Action } + ); } #[test] @@ -4433,6 +4730,8 @@ fn select_eip_conditioned_on_igw() { "192.168.0.4".parse().unwrap(), ], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, // Not really testing V6 here. Same principles apply. ipv6: Ipv6Cfg { @@ -4447,6 +4746,8 @@ fn select_eip_conditioned_on_igw() { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::new(), + transit_ips: BTreeMap::new(), }, }; @@ -4531,7 +4832,7 @@ fn select_eip_conditioned_on_igw() { // enables the limiting we aim to test here. inet_gw_map: Some(inet_gw_map), }; - nat::set_nat_rules(&g1.cfg, &g1.port, req).unwrap(); + nat::set_external_ips(&g1.port, req).unwrap(); update!(g1, ["incr:epoch", "set:nat.rules.out=8"]); // Send an ICMP packet for each destination, and verify that the @@ -4794,16 +5095,10 @@ fn icmpv6_inner_has_nat_applied() { ..Default::default() }; - let bsvc_phys = TestIpPhys { - ip: BS_IP_ADDR, - mac: BS_MAC_ADDR, - vni: Vni::new(BOUNDARY_SERVICES_VNI).unwrap(), - }; - let pkt_m = MsgBlk::new_ethernet_pkt((ð, &ip, &body_bytes)); let mut pkt_m = encap_external( pkt_m, - bsvc_phys, + *BSVC_PHYS, TestIpPhys { ip: g1_cfg.phys_ip, mac: g1_cfg.guest_mac, diff --git a/xde-tests/src/lib.rs b/xde-tests/src/lib.rs index f97a99fe..784ed161 100644 --- a/xde-tests/src/lib.rs +++ b/xde-tests/src/lib.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company use anyhow::Result; use anyhow::anyhow; @@ -44,6 +44,7 @@ use oxide_vpc::api::Vni; use oxide_vpc::api::VpcCfg; use rand::Rng; use std::cell::RefCell; +use std::collections::BTreeMap; use std::collections::HashSet; use std::process::Child; use std::process::Command; @@ -300,14 +301,17 @@ impl OptePort { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::default(), + transit_ips: BTreeMap::default(), }), guest_mac: guest_mac.parse().unwrap(), gateway_mac: "a8:40:25:00:00:01".parse().unwrap(), vni: Vni::new(DEFAULT_MULTICAST_VNI).unwrap(), phys_ip: phys_ip.parse().unwrap(), + dhcp: DhcpCfg::default(), }; let adm = OpteHdl::open()?; - adm.create_xde(name, cfg.clone(), DhcpCfg::default(), false)?; + adm.create_xde(name, cfg.clone(), false)?; Ok(OptePort { name: name.into(), cfg, @@ -337,6 +341,8 @@ impl OptePort { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::default(), + transit_ips: BTreeMap::default(), }, ipv6: Ipv6Cfg { vpc_subnet: OVERLAY_NET_V6.parse().unwrap(), @@ -350,15 +356,18 @@ impl OptePort { ephemeral_ip: None, floating_ips: vec![], }, + attached_subnets: BTreeMap::default(), + transit_ips: BTreeMap::default(), }, }, guest_mac: guest_mac.parse().unwrap(), gateway_mac: "a8:40:25:00:00:01".parse().unwrap(), vni: Vni::new(DEFAULT_MULTICAST_VNI).unwrap(), phys_ip: phys_ip.parse().unwrap(), + dhcp: DhcpCfg::default(), }; let adm = OpteHdl::open()?; - adm.create_xde(name, cfg.clone(), DhcpCfg::default(), false)?; + adm.create_xde(name, cfg.clone(), false)?; Ok(OptePort { name: name.into(), cfg, diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 576f3c24..e7653b13 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2025 Oxide Computer Company +// Copyright 2026 Oxide Computer Company //! xde - A mac provider for OPTE. //! @@ -277,11 +277,12 @@ use oxide_vpc::api::DEFAULT_MULTICAST_VNI; use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DelRouterEntryResp; use oxide_vpc::api::DeleteXdeReq; -use oxide_vpc::api::DhcpCfg; +use oxide_vpc::api::DetachSubnetResp; use oxide_vpc::api::DumpMcastForwardingResp; use oxide_vpc::api::DumpMcastSubscriptionsResp; use oxide_vpc::api::DumpVirt2BoundaryResp; use oxide_vpc::api::DumpVirt2PhysResp; +use oxide_vpc::api::InternetGatewayMap; use oxide_vpc::api::ListPortsResp; use oxide_vpc::api::McastForwardingEntry; use oxide_vpc::api::McastSubscribeReq; @@ -593,8 +594,8 @@ pub struct XdeDev { // could setup ports for any number of network implementations. // However, that's not where things are today. pub port: Arc>, - vpc_cfg: VpcCfg, port_v2p: Arc, + port_igw_map: KMutex>, // Pass the packets through to the underlay devices, skipping // opte-core processing. @@ -631,6 +632,10 @@ impl XdeDev { unsafe { mac::mac_rx(self.mh, ptr::null_mut(), pkt.as_ptr()) } } } + + pub fn vpc_cfg(&self) -> &VpcCfg { + &self.port.network().cfg + } } // SAFETY: The sole pointer member (the mac handle) safely supports @@ -1024,6 +1029,16 @@ unsafe extern "C" fn xde_ioc_opte_cmd(karg: *mut c_void, mode: c_int) -> c_int { hdlr_resp(&mut env, resp) } + OpteCmd::AttachSubnet => { + let resp = attach_subnet_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } + + OpteCmd::DetachSubnet => { + let resp = detach_subnet_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } + OpteCmd::SetMcastForwarding => { let resp = set_mcast_forwarding_hdlr(&mut env); hdlr_resp(&mut env, resp) @@ -1160,11 +1175,10 @@ fn create_xde(req: &CreateXdeReq) -> Result { port_v2p.clone(), state.v2b.clone(), state.ectx.clone(), - &req.dhcp, )?, port_v2p, vni: cfg.vni, - vpc_cfg: cfg, + port_igw_map: KMutex::new(None), passthrough: req.passthrough, u1, u2, @@ -1352,7 +1366,7 @@ fn delete_xde(req: &DeleteXdeReq) -> Result { } // Remove the VPC mappings for this port. - let cfg = &xde.vpc_cfg; + let cfg = xde.vpc_cfg(); let phys_net = PhysNet { ether: cfg.guest_mac, ip: cfg.phys_ip, vni: cfg.vni }; match cfg.ip_cfg { @@ -3140,7 +3154,6 @@ fn new_port( v2p: Arc, v2b: Arc, ectx: Arc, - dhcp_cfg: &DhcpCfg, ) -> Result>, OpteError> { let cfg = cfg.clone(); let name_cstr = match CString::new(name.as_str()) { @@ -3157,7 +3170,7 @@ fn new_port( // XXX some layers have no need for LFT, perhaps have two types // of Layer: one with, one without? - gateway::setup(&pb, &cfg, vpc_map.clone(), FT_LIMIT_ONE, dhcp_cfg)?; + gateway::setup(&pb, &cfg, vpc_map.clone(), FT_LIMIT_ONE)?; router::setup(&pb, &cfg, FT_LIMIT_ONE)?; nat::setup(&mut pb, &cfg, nat_ft_limit)?; overlay::setup(&pb, &cfg, v2p, m2p, v2b, FT_LIMIT_ONE)?; @@ -4205,7 +4218,11 @@ fn set_external_ips_hdlr(env: &mut IoctlEnvelope) -> Result { .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - nat::set_nat_rules(&dev.vpc_cfg, &dev.port, req)?; + let mut igw_map_lock = dev.port_igw_map.lock(); + *igw_map_lock = req.inet_gw_map.clone(); + + nat::set_external_ips(&dev.port, req)?; + Ok(NoResp::default()) } @@ -4236,27 +4253,56 @@ fn remove_cidr_hdlr( gateway::remove_cidr(&dev.port, req.cidr, req.dir, state.vpc_map.clone()) } +#[unsafe(no_mangle)] +fn attach_subnet_hdlr(env: &mut IoctlEnvelope) -> Result { + let req: oxide_vpc::api::AttachSubnetReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + let igw_map_lock = dev.port_igw_map.lock(); + nat::attach_subnet(&dev.port, igw_map_lock.as_ref(), &state.vpc_map, req)?; + + Ok(NoResp::default()) +} + +#[unsafe(no_mangle)] +fn detach_subnet_hdlr( + env: &mut IoctlEnvelope, +) -> Result { + let req: oxide_vpc::api::DetachSubnetReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + let igw_map_lock = dev.port_igw_map.lock(); + nat::detach_subnet(&dev.port, igw_map_lock.as_ref(), &state.vpc_map, req) +} + #[unsafe(no_mangle)] fn list_ports_hdlr() -> Result { let mut resp = ListPortsResp { ports: vec![] }; let state = get_xde_state(); let devs = state.devs.read(); for dev in devs.iter() { - let ipv4_state = - dev.vpc_cfg.ipv4_cfg().map(|cfg| cfg.external_ips.load()); - let ipv6_state = - dev.vpc_cfg.ipv6_cfg().map(|cfg| cfg.external_ips.load()); + let cfg = dev.vpc_cfg(); + let ipv4_state = cfg.ipv4_cfg().map(|cfg| cfg.external_ips.load()); + let ipv6_state = cfg.ipv6_cfg().map(|cfg| cfg.external_ips.load()); resp.ports.push(PortInfo { name: dev.port.name().to_string(), mac_addr: dev.port.mac_addr(), - ip4_addr: dev.vpc_cfg.ipv4_cfg().map(|cfg| cfg.private_ip), + ip4_addr: cfg.ipv4_cfg().map(|cfg| cfg.private_ip), ephemeral_ip4_addr: ipv4_state .as_ref() .and_then(|cfg| cfg.ephemeral_ip), floating_ip4_addrs: ipv4_state .as_ref() .map(|cfg| cfg.floating_ips.clone()), - ip6_addr: dev.vpc_cfg.ipv6_cfg().map(|cfg| cfg.private_ip), + ip6_addr: cfg.ipv6_cfg().map(|cfg| cfg.private_ip), ephemeral_ip6_addr: ipv6_state .as_ref() .and_then(|cfg| cfg.ephemeral_ip),