diff --git a/calico-vpp-agent/cni/cni_server.go b/calico-vpp-agent/cni/cni_server.go index 7182b6fa..01eaeb30 100644 --- a/calico-vpp-agent/cni/cni_server.go +++ b/calico-vpp-agent/cni/cni_server.go @@ -538,10 +538,17 @@ func (s *Server) createRedirectToHostRules() (uint32, error) { return types.InvalidID, fmt.Errorf("no main interface found") } for _, rule := range config.GetCalicoVppInitialConfig().RedirectToHostRules { + mainInterfaceAddress := mainInterface.GetAddress(vpplink.IPFamilyFromIP(rule.IP)) + if mainInterfaceAddress == nil { + return types.InvalidID, fmt.Errorf("error installing rule %v no address found on uplink", rule) + } err = s.vpp.AddSessionRedirect(&types.SessionRedirect{ - FiveTuple: types.NewDst3Tuple(rule.Proto, net.ParseIP(rule.IP), rule.Port), + FiveTuple: types.NewDst3Tuple(rule.Proto, rule.IP, rule.Port), TableIndex: index, - }, &types.RoutePath{Gw: config.VppHostPuntFakeGatewayAddress, SwIfIndex: mainInterface.TapSwIfIndex}) + }, &types.RoutePath{ + Gw: mainInterfaceAddress.IP, + SwIfIndex: mainInterface.TapSwIfIndex, + }) if err != nil { return types.InvalidID, err } diff --git a/calico-vpp-agent/routing/bgp_watcher.go b/calico-vpp-agent/routing/bgp_watcher.go index 56e69103..a6483204 100644 --- a/calico-vpp-agent/routing/bgp_watcher.go +++ b/calico-vpp-agent/routing/bgp_watcher.go @@ -543,10 +543,14 @@ func (s *Server) WatchBGPPath(t *tomb.Tomb) error { peer := localPeer.Peer filters := localPeer.BGPFilterNames // create a neighbor set to apply filter only on specific peer using a global policy + prefixLen := "/32" + if ip := net.ParseIP(peer.Conf.NeighborAddress); ip != nil && ip.To4() == nil { + prefixLen = "/128" + } neighborSet := &bgpapi.DefinedSet{ Name: peer.Conf.NeighborAddress + "neighbor", DefinedType: bgpapi.DefinedType_NEIGHBOR, - List: []string{peer.Conf.NeighborAddress + "/32"}, + List: []string{peer.Conf.NeighborAddress + prefixLen}, } err := s.BGPServer.AddDefinedSet(context.Background(), &bgpapi.AddDefinedSetRequest{ DefinedSet: neighborSet, @@ -580,9 +584,18 @@ func (s *Server) WatchBGPPath(t *tomb.Tomb) error { if err != nil { return errors.Wrapf(err, "error cleaning peer filters up") } - err = s.BGPServer.DeleteDefinedSet(context.Background(), &bgpapi.DeleteDefinedSetRequest{DefinedSet: s.bgpPeers[addr].NeighborSet, All: true}) - if err != nil { - return errors.Wrapf(err, "error deleting prefix set") + if s.bgpPeers[addr] == nil { + s.log.Warnf("Trying to delete unknown BGP peer %s", addr) + } else if s.bgpPeers[addr].NeighborSet == nil { + s.log.Warnf("Trying to delete BGP peer %s with empty NeighborSet", addr) + } else { + err = s.BGPServer.DeleteDefinedSet(context.Background(), &bgpapi.DeleteDefinedSetRequest{ + DefinedSet: s.bgpPeers[addr].NeighborSet, + All: true, + }) + if err != nil { + return errors.Wrapf(err, "error deleting prefix set") + } } err := s.BGPServer.DeletePeer( context.Background(), diff --git a/calico-vpp-agent/routing/routing_server.go b/calico-vpp-agent/routing/routing_server.go index 046e1a28..51b4fc0c 100644 --- a/calico-vpp-agent/routing/routing_server.go +++ b/calico-vpp-agent/routing/routing_server.go @@ -31,6 +31,7 @@ import ( "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watchers" + "github.com/projectcalico/vpp-dataplane/v3/config" "github.com/projectcalico/vpp-dataplane/v3/vpplink" ) @@ -114,17 +115,28 @@ func (s *Server) ServeRouting(t *tomb.Tomb) (err error) { } for t.Alive() { - globalConfig, err := s.getGoBGPGlobalConfig() + nodeIP4, nodeIP6 := common.GetBGPSpecAddresses(s.nodeBGPSpec) + globalConfig, err := s.getGoBGPGlobalConfig(*config.BGPServerMode) if err != nil { return fmt.Errorf("cannot get global configuration: %v", err) } err = s.BGPServer.StartBgp(context.Background(), &bgpapi.StartBgpRequest{Global: globalConfig}) - if err != nil { + if err != nil && *config.BGPServerMode == config.BGPServerModeDualStack && nodeIP4 != nil { + s.log.Warnf("Failed to start BGP server in dualStack mode: %v. Retrying with IPv4-only listener", err) + globalConfig, err = s.getGoBGPGlobalConfig(config.BGPServerModeV4Only) + if err != nil { + return errors.Wrap(err, "cannot get IPv4-only BGP configuration for fallback") + } + err = s.BGPServer.StartBgp(context.Background(), &bgpapi.StartBgpRequest{Global: globalConfig}) + if err != nil { + return errors.Wrap(err, "failed to start BGP server after IPv4-only fallback") + } + s.log.Warn("BGP server started in degraded IPv4-only mode because IPv6 listener failed") + } else if err != nil { return errors.Wrap(err, "failed to start BGP server") } - nodeIP4, nodeIP6 := common.GetBGPSpecAddresses(s.nodeBGPSpec) if nodeIP4 != nil { err = s.initialPolicySetting(false /* isv6 */) if err != nil { @@ -176,7 +188,7 @@ func (s *Server) getLogSeverityScreen() string { return s.BGPConf.LogSeverityScreen } -func (s *Server) getGoBGPGlobalConfig() (*bgpapi.Global, error) { +func (s *Server) getGoBGPGlobalConfig(mode config.BGPServerModeType) (*bgpapi.Global, error) { var routerID string listenAddresses := make([]string, 0) asn := s.nodeBGPSpec.ASNumber @@ -185,11 +197,25 @@ func (s *Server) getGoBGPGlobalConfig() (*bgpapi.Global, error) { } nodeIP4, nodeIP6 := common.GetBGPSpecAddresses(s.nodeBGPSpec) - if nodeIP6 != nil { + useIP4 := nodeIP4 != nil + useIP6 := nodeIP6 != nil + + switch mode { + case config.BGPServerModeDualStack: + case config.BGPServerModeV4Only: + useIP6 = false + if !useIP4 { + return nil, fmt.Errorf("BGP server mode set to v4Only but no IPv4 node address configured") + } + default: + return nil, fmt.Errorf("unsupported BGP server mode %q", mode) + } + + if useIP6 { routerID = nodeIP6.String() listenAddresses = append(listenAddresses, routerID) } - if nodeIP4 != nil { + if useIP4 { routerID = nodeIP4.String() // Override v6 ID if v4 is available listenAddresses = append(listenAddresses, routerID) } diff --git a/config/config.go b/config/config.go index 4f113a23..3861dfeb 100644 --- a/config/config.go +++ b/config/config.go @@ -64,14 +64,22 @@ const ( BaseVppSideHardwareAddress = "02:ca:11:c0:fd:00" ) +type BGPServerModeType string + +const ( + BGPServerModeDualStack BGPServerModeType = "dualStack" + BGPServerModeV4Only BGPServerModeType = "v4Only" +) + var ( // fake constants for place where we need a pointer to true or false True = true False = false - NodeName = RequiredStringEnvVar("NODENAME") - LogLevel = EnvVar("CALICOVPP_LOG_LEVEL", logrus.InfoLevel, logrus.ParseLevel) - BGPLogLevel = EnvVar("CALICOVPP_BGP_LOG_LEVEL", apipb.SetLogLevelRequest_INFO, BGPLogLevelParse) + NodeName = RequiredStringEnvVar("NODENAME") + LogLevel = EnvVar("CALICOVPP_LOG_LEVEL", logrus.InfoLevel, logrus.ParseLevel) + BGPLogLevel = EnvVar("CALICOVPP_BGP_LOG_LEVEL", apipb.SetLogLevelRequest_INFO, BGPLogLevelParse) + BGPServerMode = EnvVar("CALICOVPP_BGP_SERVER_MODE", BGPServerModeDualStack, BGPServerModeParse) ServiceCIDRs = PrefixListEnvVar("SERVICE_PREFIX") IPSecIkev2Psk = StringEnvVar("CALICOVPP_IPSEC_IKEV2_PSK", "") @@ -115,6 +123,8 @@ var ( /* Run this before getLinuxConfig() in case this is a script * that's responsible for creating the interface */ HookScriptBeforeIfRead = StringEnvVar("CALICOVPP_HOOK_BEFORE_IF_READ", DefaultHookScript) // InitScriptTemplate + /* Bash script template run to capture host udev properties before driver unbind */ + HookScriptCaptureHostUdevProps = StringEnvVar("CALICOVPP_HOOK_CAPTURE_HOST_UDEV_PROPS", DefaultHookScript) /* Bash script template run just after getting config from $CALICOVPP_INTERFACE & before starting VPP */ HookScriptBeforeVppRun = StringEnvVar("CALICOVPP_HOOK_BEFORE_VPP_RUN", DefaultHookScript) // InitPostIfScriptTemplate @@ -127,17 +137,24 @@ var ( AllHooks = []*string{ HookScriptBeforeIfRead, + HookScriptCaptureHostUdevProps, HookScriptBeforeVppRun, HookScriptVppRunning, HookScriptVppDoneOk, HookScriptVppErrored, } - Info = &VppManagerInfo{} + Info = &VppManagerInfo{ + UplinkStatuses: make(map[string]UplinkStatus), + PhysicalNets: make(map[string]PhysicalNetwork), + } - // VppHostPuntFakeGatewayAddress is the fake gateway we use with a static neighbor - // in the punt table to route punted packets to the host - VppHostPuntFakeGatewayAddress = net.ParseIP("169.254.0.1") + // VppsideTap0Address is the IP address we add to the tap0 + // so that it can receive ipv4 packets + VppsideTap0Address = PrefixEnvVar( + "CALICOVPP_TAP0_ADDR", + MustParseCIDR("169.254.0.1/32"), + ) ) func RunHook(hookScript *string, hookName string, params *VppManagerParams, log *logrus.Logger) { @@ -150,7 +167,7 @@ func RunHook(hookScript *string, hookName string, params *VppManagerParams, log return } - cmd := exec.Command("/bin/bash", "-c", template, hookName) + cmd := exec.Command("/bin/bash", "-c", template, hookName, params.UplinksSpecs[0].InterfaceName) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr err = cmd.Run() @@ -271,7 +288,7 @@ func (u *UplinkInterfaceSpec) String() string { type RedirectToHostRulesConfigType struct { Port uint16 `json:"port,omitempty"` - IP string `json:"ip,omitempty"` + IP net.IP `json:"ip,omitempty"` /* "tcp", "udp",... */ Proto types.IPProto `json:"proto,omitempty"` } @@ -567,6 +584,17 @@ type UplinkStatus struct { // FakeNextHopIP6 is the computed next hop for v6 routes added // in linux to (ServiceCIDR, podCIDR, etc...) towards this interface FakeNextHopIP6 net.IP + + UplinkAddresses []*net.IPNet +} + +func (uplinkStatus *UplinkStatus) GetAddress(ipFamily vpplink.IPFamily) *net.IPNet { + for _, addr := range uplinkStatus.UplinkAddresses { + if vpplink.IPFamilyFromIPNet(addr) == ipFamily { + return addr + } + } + return nil } type PhysicalNetwork struct { diff --git a/config/config_parse.go b/config/config_parse.go index c72ca904..ca3002e2 100644 --- a/config/config_parse.go +++ b/config/config_parse.go @@ -162,7 +162,18 @@ func prefixParser(value string) (net.IPNet, error) { func RequiredPrefixEnvVar(varName string) *net.IPNet { return RequiredEnvVar(varName, net.IPNet{}, prefixParser) } -func PrefixEnvVar(varName string) *net.IPNet { return EnvVar(varName, net.IPNet{}, prefixParser) } + +func PrefixEnvVar(varName string, defaultValue *net.IPNet) *net.IPNet { + return EnvVar(varName, *defaultValue, prefixParser) +} + +func MustParseCIDR(str string) *net.IPNet { + _, cidr, err := net.ParseCIDR(str) + if err != nil { + logrus.Fatalf("error parsing %s as cidr %v", str, err) + } + return cidr +} func prefixListParser(value string) ([]*net.IPNet, error) { chunks := strings.Split(value, ",") @@ -257,3 +268,14 @@ func BGPLogLevelParse(lvl string) (apipb.SetLogLevelRequest_Level, error) { var l apipb.SetLogLevelRequest_Level return l, fmt.Errorf("not a valid logrus Level: %q", lvl) } + +func BGPServerModeParse(mode string) (BGPServerModeType, error) { + switch strings.ToLower(mode) { + case strings.ToLower(string(BGPServerModeDualStack)): + return BGPServerModeDualStack, nil + case "v4only": + return BGPServerModeV4Only, nil + } + + return BGPServerModeDualStack, fmt.Errorf("not a valid BGP server mode: %q", mode) +} diff --git a/config/default_hook.sh b/config/default_hook.sh index 967e773b..f82ce5ef 100644 --- a/config/default_hook.sh +++ b/config/default_hook.sh @@ -1,7 +1,10 @@ #!/bin/sh HOOK="$0" -chroot /host /bin/sh < /dev/null 2>&1; then @@ -39,6 +42,108 @@ restart_network () { fi } +capture_udev_net_name_properties () { + echo "default_hook: Capturing udev net name properties for $INTERFACE_NAME..." + + UDEV_INFO=$(udevadm info /sys/class/net/$INTERFACE_NAME 2>/dev/null) + if [ -z "$UDEV_INFO" ]; then + echo "default_hook: Failed to get udevadm info for $INTERFACE_NAME" + return + fi + + # Extract ID_NET_NAME_* properties + ID_NET_NAME_ONBOARD=$(echo "$UDEV_INFO" | grep "ID_NET_NAME_ONBOARD=" | sed 's/.*ID_NET_NAME_ONBOARD=//') + ID_NET_NAME_SLOT=$(echo "$UDEV_INFO" | grep "ID_NET_NAME_SLOT=" | sed 's/.*ID_NET_NAME_SLOT=//') + ID_NET_NAME_PATH=$(echo "$UDEV_INFO" | grep "ID_NET_NAME_PATH=" | sed 's/.*ID_NET_NAME_PATH=//') + ID_NET_NAME_MAC=$(echo "$UDEV_INFO" | grep "ID_NET_NAME_MAC=" | sed 's/.*ID_NET_NAME_MAC=//') + + # Check if we have any properties to save + if [ -z "$ID_NET_NAME_ONBOARD" ] && [ -z "$ID_NET_NAME_SLOT" ] && [ -z "$ID_NET_NAME_PATH" ] && [ -z "$ID_NET_NAME_MAC" ]; then + echo "default_hook: No udev net name properties found for $INTERFACE_NAME" + return + fi + + # Get MAC address + MAC_ADDRESS=$(cat /sys/class/net/$INTERFACE_NAME/address 2>/dev/null) + if [ -z "$MAC_ADDRESS" ]; then + echo "default_hook: Failed to get MAC address for $INTERFACE_NAME" + return + fi + + # Save properties to temp file for later use + mkdir -p /var/run/vpp + echo "MAC_ADDRESS=$MAC_ADDRESS" > /var/run/vpp/udev_props_$INTERFACE_NAME + [ -n "$ID_NET_NAME_ONBOARD" ] && echo "ID_NET_NAME_ONBOARD=$ID_NET_NAME_ONBOARD" >> /var/run/vpp/udev_props_$INTERFACE_NAME + [ -n "$ID_NET_NAME_SLOT" ] && echo "ID_NET_NAME_SLOT=$ID_NET_NAME_SLOT" >> /var/run/vpp/udev_props_$INTERFACE_NAME + [ -n "$ID_NET_NAME_PATH" ] && echo "ID_NET_NAME_PATH=$ID_NET_NAME_PATH" >> /var/run/vpp/udev_props_$INTERFACE_NAME + [ -n "$ID_NET_NAME_MAC" ] && echo "ID_NET_NAME_MAC=$ID_NET_NAME_MAC" >> /var/run/vpp/udev_props_$INTERFACE_NAME + + echo "default_hook: Captured udev properties for $INTERFACE_NAME (MAC: $MAC_ADDRESS)" + [ -n "$ID_NET_NAME_ONBOARD" ] && echo "default_hook: ID_NET_NAME_ONBOARD=$ID_NET_NAME_ONBOARD" + [ -n "$ID_NET_NAME_SLOT" ] && echo "default_hook: ID_NET_NAME_SLOT=$ID_NET_NAME_SLOT" + [ -n "$ID_NET_NAME_PATH" ] && echo "default_hook: ID_NET_NAME_PATH=$ID_NET_NAME_PATH" + [ -n "$ID_NET_NAME_MAC" ] && echo "default_hook: ID_NET_NAME_MAC=$ID_NET_NAME_MAC" +} + +create_udev_net_name_rule () { + PROPS_FILE="/var/run/vpp/udev_props_$INTERFACE_NAME" + if [ ! -f "$PROPS_FILE" ]; then + echo "default_hook: No udev properties captured for $INTERFACE_NAME, skipping rule creation" + return + fi + + # Source the properties file + . "$PROPS_FILE" + + if [ -z "$MAC_ADDRESS" ]; then + echo "default_hook: No MAC address captured for $INTERFACE_NAME, skipping rule creation" + return + fi + + echo "default_hook: Creating udev rule for $INTERFACE_NAME with MAC $MAC_ADDRESS..." + + # Build the udev rule + RULE_FILE="/etc/udev/rules.d/99-vpp-restore-id_net_name.rules" + echo "# Re-apply ID_NET_NAME_* properties after Calico VPP creates the host-facing tap/tun netdev." > "$RULE_FILE" + printf 'ACTION=="add", SUBSYSTEM=="net", ATTR{address}=="%s"' "$MAC_ADDRESS" >> "$RULE_FILE" + + [ -n "$ID_NET_NAME_ONBOARD" ] && printf ', ENV{ID_NET_NAME_ONBOARD}:="%s"' "$ID_NET_NAME_ONBOARD" >> "$RULE_FILE" + [ -n "$ID_NET_NAME_SLOT" ] && printf ', ENV{ID_NET_NAME_SLOT}:="%s"' "$ID_NET_NAME_SLOT" >> "$RULE_FILE" + [ -n "$ID_NET_NAME_PATH" ] && printf ', ENV{ID_NET_NAME_PATH}:="%s"' "$ID_NET_NAME_PATH" >> "$RULE_FILE" + [ -n "$ID_NET_NAME_MAC" ] && printf ', ENV{ID_NET_NAME_MAC}:="%s"' "$ID_NET_NAME_MAC" >> "$RULE_FILE" + + echo "" >> "$RULE_FILE" + + echo "default_hook: Created udev rule file at $RULE_FILE" + + # Reload udev rules + udevadm control --reload-rules + + # Trigger udev for net subsystem to apply the stored ID_NET_NAME_* properties + udevadm trigger --subsystem-match=net --action=add + echo "default_hook: Triggered udev to apply the stored ID_NET_NAME_* properties" +} + +remove_udev_net_name_rule () { + RULE_FILE="/etc/udev/rules.d/99-vpp-restore-id_net_name.rules" + PROPS_FILE="/var/run/vpp/udev_props_$INTERFACE_NAME" + + if [ -f "$RULE_FILE" ]; then + echo "default_hook: Removing udev rule file $RULE_FILE..." + rm -f "$RULE_FILE" + udevadm control --reload-rules + + # Trigger udev for net subsystem to remove the stored ID_NET_NAME_* properties + udevadm trigger --subsystem-match=net --action=change + echo "default_hook: Triggered udev to remove the stored ID_NET_NAME_* properties" + fi + + if [ -f "$PROPS_FILE" ]; then + rm -f "$PROPS_FILE" + fi +} + +echo "default_hook: Uplink interface name=$INTERFACE_NAME" if which systemctl > /dev/null; then echo "default_hook: using systemctl..." else @@ -46,15 +151,20 @@ else exit 1 fi -if [ "$HOOK" = "BEFORE_VPP_RUN" ]; then +if [ "$HOOK" = "CAPTURE_HOST_UDEV_PROPS" ]; then + capture_udev_net_name_properties +elif [ "$HOOK" = "BEFORE_VPP_RUN" ]; then fix_dns elif [ "$HOOK" = "VPP_RUNNING" ]; then + create_udev_net_name_rule restart_network elif [ "$HOOK" = "VPP_DONE_OK" ]; then undo_dns_fix + remove_udev_net_name_rule restart_network elif [ "$HOOK" = "VPP_ERRORED" ]; then undo_dns_fix + remove_udev_net_name_rule restart_network fi diff --git a/vpp-manager/images/ubuntu/Dockerfile b/vpp-manager/images/ubuntu/Dockerfile index 6f39d183..4a665343 100644 --- a/vpp-manager/images/ubuntu/Dockerfile +++ b/vpp-manager/images/ubuntu/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get update \ && apt-get install -y openssl libapr1 libnuma1 \ libmbedcrypto7 libmbedtls14 libmbedx509-1 libsubunit0 \ iproute2 ifupdown ethtool libnl-3-dev libnl-route-3-dev \ - libpcap0.8 libunwind8 \ + libpcap0.8 libunwind8 iptables \ gdb \ && rm -rf /var/lib/apt/lists/* diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index d9fd9f90..69b09f92 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -101,6 +101,9 @@ func (v *VppRunner) Run(drivers []uplink.UplinkDriver) error { return errors.Wrap(err, "Error generating VPP config") } + // Run hook to capture host udev properties before driver unbind + config.RunHook(config.HookScriptCaptureHostUdevProps, "CAPTURE_HOST_UDEV_PROPS", v.params, log) + for idx := range v.conf { err = v.uplinkDriver[idx].PreconfigureLinux() if err != nil { @@ -139,22 +142,13 @@ func (v *VppRunner) configureGlobalPunt() (err error) { } func (v *VppRunner) configurePunt(tapSwIfIndex uint32, ifState config.LinuxInterfaceState) (err error) { - err = v.vpp.AddNeighbor(&types.Neighbor{ - SwIfIndex: tapSwIfIndex, - IP: config.VppHostPuntFakeGatewayAddress, - HardwareAddr: ifState.HardwareAddr, - Flags: types.IPNeighborStatic, - }) - if err != nil { - return errors.Wrapf(err, "Error adding neighbor %s to tap", config.VppHostPuntFakeGatewayAddress) - } /* In the punt table (where all punted traffics ends), route to the tap */ - for _, address := range ifState.Addresses { + for _, addr := range ifState.Addresses { err = v.vpp.RouteAdd(&types.Route{ - Dst: address.IPNet, Table: common.PuntTableID, + Dst: addr.IPNet, Paths: []types.RoutePath{{ - Gw: config.VppHostPuntFakeGatewayAddress, + Gw: addr.IP, SwIfIndex: tapSwIfIndex, }}, }) @@ -162,7 +156,6 @@ func (v *VppRunner) configurePunt(tapSwIfIndex uint32, ifState config.LinuxInter return errors.Wrapf(err, "error adding vpp side routes for interface") } } - return nil } @@ -355,6 +348,51 @@ func (v *VppRunner) allocateStaticVRFs() error { return nil } +// setupIPv6MulticastForHostTap configures mFIB entries to allow IPv6 multicast traffic +// from the Linux host to pass through VPP. This is required for DHCPv6, NDP, and other +// IPv6 protocols that use link-local multicast. +// Without this configuration, packets arriving from the tap interface fail RPF checks +// because the tap interface is not in the mFIB accept list. +func (v *VppRunner) setupIPv6MulticastForHostTap(vrfID uint32, tapSwIfIndex uint32, uplinkSwIfIndex uint32) error { + log.Infof("Setting up IPv6 multicast forwarding for host tap in VRF %d", vrfID) + + // IPv6 multicast groups that need to be forwarded from the Linux host + multicastGroups := []struct { + addr string + prefix int // CIDR prefix length + comment string + }{ + {"ff02::1:ff00:0", 104, "Solicited-Node multicast (NDP Neighbor Solicitation targets)"}, + {"ff02::1", 128, "All Nodes / All Hosts (link-local; used by NDP and others)"}, + {"ff02::2", 128, "All Routers (routers listen here; NDP RS target)"}, + {"ff02::16", 128, "All MLDv2-capable routers"}, + {"ff02::1:2", 128, "DHCPv6 All Relay Agents and Servers"}, + } + + for _, group := range multicastGroups { + groupIP := net.ParseIP(group.addr) + if groupIP == nil { + log.Warnf("Invalid multicast address: %s", group.addr) + continue + } + + groupNet := &net.IPNet{ + IP: groupIP, + Mask: net.CIDRMask(group.prefix, 128), + } + + err := v.vpp.MRouteAddForHostMulticast(vrfID, groupNet, tapSwIfIndex, uplinkSwIfIndex) + if err != nil { + return errors.Wrapf(err, "cannot add mFIB route for %s (%s) in VRF %d", + group.addr, group.comment, vrfID) + } + + log.Infof("Added mFIB route for %s (%s) in VRF %d", group.addr, group.comment, vrfID) + } + + return nil +} + // Configure specific VRFs for a given tap to the host to handle broadcast / multicast traffic sent by the host func (v *VppRunner) setupTapVRF(ifSpec *config.UplinkInterfaceSpec, ifState *config.LinuxInterfaceState, tapSwIfIndex uint32) (vrfs []uint32, err error) { for _, ipFamily := range vpplink.IPFamilies { @@ -379,7 +417,16 @@ func (v *VppRunner) setupTapVRF(ifSpec *config.UplinkInterfaceSpec, ifState *con if err != nil { log.Errorf("cannot add broadcast route in vpp: %v", err) } - } // else {} No custom routes for IPv6 for now. Forward LL multicast from the host? + } else { + // Setup IPv6 multicast forwarding for the host + // This is required for DHCPv6 solicitations, NDP, and other link-local multicast + // Unlike IPv4, we cannot use a unicast route trick because ff02::/16 is multicast + // and must go through mFIB with proper RPF configuration + err = v.setupIPv6MulticastForHostTap(vrfID, tapSwIfIndex, ifSpec.SwIfIndex) + if err != nil { + return []uint32{}, errors.Wrap(err, "Error setting up IPv6 multicast forwarding") + } + } // default route in default table err = v.vpp.AddDefaultRouteViaTable(vrfID, config.Info.PhysicalNets[ifSpec.PhysicalNetworkName].VrfID, ipFamily.IsIP6) @@ -392,26 +439,42 @@ func (v *VppRunner) setupTapVRF(ifSpec *config.UplinkInterfaceSpec, ifState *con return []uint32{}, errors.Wrapf(err, "error setting vpp tap in vrf %d", vrfID) } vrfs = append(vrfs, vrfID) - } - // Configure addresses to enable ipv4 & ipv6 on the tap - for _, addr := range ifState.Addresses { - if addr.IP.IsLinkLocalUnicast() && !common.IsFullyQualified(addr.IPNet) && common.IsV6Cidr(addr.IPNet) { - log.Infof("Not adding address %s to data interface (vpp requires /128 link-local)", addr.String()) - continue - } else { - log.Infof("Adding address %s to tap interface", addr.String()) - } - // to max len cidr because we don't want the rest of the subnet to be considered as - // connected to that interface - // note that the role of these addresses is just to tell vpp to accept ip4 / ip6 packets on the tap - // we use these addresses as the safest option, because as they are configured on linux, linux - // will never send us packets with these addresses as destination - err = v.vpp.AddInterfaceAddress(tapSwIfIndex, common.ToMaxLenCIDR(addr.IP)) - if err != nil { - log.Errorf("Error adding address to tap interface: %v", err) + for _, addr := range ifState.Addresses { + if vpplink.IPFamilyFromIP(addr.IP) == ipFamily { + err = v.vpp.RouteAdd(&types.Route{ + Table: vrfID, + Dst: common.FullyQualified(addr.IP), + Paths: []types.RoutePath{{ + Gw: addr.IP, + SwIfIndex: tapSwIfIndex, + }}, + }) + if err != nil { + return []uint32{}, errors.Wrapf(err, "error add route from VPP to tap0 in VRF %d", vrfID) + } + err = v.vpp.AddNeighbor(&types.Neighbor{ + SwIfIndex: tapSwIfIndex, + IP: addr.IP, + HardwareAddr: ifState.HardwareAddr, + Flags: types.IPNeighborStatic, + }) + if err != nil { + return []uint32{}, errors.Wrapf(err, "error add static neighbor for tap0 in VRF %d", vrfID) + } + } } } + + err = v.vpp.EnableInterfaceIP6(tapSwIfIndex) + if err != nil { + return []uint32{}, errors.Wrapf(err, "error enabling ip6 for tap %d", tapSwIfIndex) + } + + err = v.vpp.AddInterfaceAddress(tapSwIfIndex, config.VppsideTap0Address) + if err != nil { + return []uint32{}, errors.Wrapf(err, "error adding vpp side address for tap0 %d", tapSwIfIndex) + } return vrfs, nil } @@ -492,6 +555,10 @@ func (v *VppRunner) configureVppUplinkInterface( } } for _, route := range ifState.Routes { + if route.Dst != nil && route.Dst.IP.IsLinkLocalUnicast() { + log.Infof("Skipping link-local route %s", route.Dst.String()) + continue + } err = v.vpp.RouteAdd(&types.Route{ Dst: route.Dst, Paths: []types.RoutePath{{ @@ -594,6 +661,22 @@ func (v *VppRunner) configureVppUplinkInterface( } } + /* + * Add ND proxy for IPv6 gateway addresses. + * Without ND proxy for gateway, host's NS for gateway is dropped with "neighbor + * solicitations for unknown targets" error because there's no /128 FIB entry. + * This requires VPP patch https://gerrit.fd.io/r/c/vpp/+/44350 to fix NA loop bug. + */ + for _, route := range ifState.Routes { + if route.Gw != nil && route.Gw.To4() == nil { + log.Infof("Adding ND proxy for IPv6 gateway %s", route.Gw) + err = v.vpp.EnableIP6NdProxy(tapSwIfIndex, route.Gw) + if err != nil { + log.Errorf("Error configuring ND proxy for gateway %s: %v", route.Gw, err) + } + } + } + if *config.GetCalicoVppDebug().GSOEnabled { err = v.vpp.EnableGSOFeature(tapSwIfIndex) if err != nil { @@ -637,18 +720,22 @@ func (v *VppRunner) configureVppUplinkInterface( return errors.Wrap(err, "Error setting tap up") } - if config.Info.UplinkStatuses != nil { - config.Info.UplinkStatuses[link.Attrs().Name] = config.UplinkStatus{ - TapSwIfIndex: tapSwIfIndex, - SwIfIndex: ifSpec.SwIfIndex, - Mtu: uplinkMtu, - PhysicalNetworkName: ifSpec.PhysicalNetworkName, - LinkIndex: link.Attrs().Index, - Name: link.Attrs().Name, - IsMain: ifSpec.IsMain, - FakeNextHopIP4: fakeNextHopIP4, - FakeNextHopIP6: fakeNextHopIP6, - } + uplinkAddresses := make([]*net.IPNet, 0) + for _, addr := range ifState.Addresses { + uplinkAddresses = append(uplinkAddresses, addr.IPNet) + } + + config.Info.UplinkStatuses[link.Attrs().Name] = config.UplinkStatus{ + TapSwIfIndex: tapSwIfIndex, + SwIfIndex: ifSpec.SwIfIndex, + Mtu: uplinkMtu, + PhysicalNetworkName: ifSpec.PhysicalNetworkName, + LinkIndex: link.Attrs().Index, + Name: link.Attrs().Name, + IsMain: ifSpec.IsMain, + FakeNextHopIP4: fakeNextHopIP4, + FakeNextHopIP6: fakeNextHopIP6, + UplinkAddresses: uplinkAddresses, } return nil } @@ -805,6 +892,62 @@ func (v *VppRunner) AllocatePhysicalNetworkVRFs(phyNet string) (err error) { return nil } +func (v *VppRunner) configureDHCPv6HopLimit() { + log.Infof("Configuring ip6tables mangle OUTPUT rule for DHCPv6 hop limit on host") + + err := ns.WithNetNSPath("/proc/1/ns/net", func(ns.NetNS) error { + checkCmd := exec.Command("/usr/sbin/ip6tables", "-t", "mangle", "-C", "OUTPUT", + "-p", "udp", "--sport", "546", "--dport", "547", + "-j", "HL", "--hl-set", "2") + if err := checkCmd.Run(); err != nil { + outputCmd := exec.Command("/usr/sbin/ip6tables", "-t", "mangle", "-A", "OUTPUT", + "-p", "udp", "--sport", "546", "--dport", "547", + "-j", "HL", "--hl-set", "2") + outputCmd.Stdout = os.Stdout + outputCmd.Stderr = os.Stderr + if err := outputCmd.Run(); err != nil { + log.Warnf("Failed to configure ip6tables mangle OUTPUT rule for DHCPv6: %v", err) + } + } else { + log.Infof("ip6tables mangle OUTPUT rule for DHCPv6 already present") + } + + return nil + }) + + if err != nil { + log.Warnf("Error entering host network namespace") + } +} + +func (v *VppRunner) cleanupDHCPv6HopLimit() { + log.Infof("Cleaning up ip6tables mangle OUTPUT rule for DHCPv6 hop limit on host") + + err := ns.WithNetNSPath("/proc/1/ns/net", func(ns.NetNS) error { + checkCmd := exec.Command("/usr/sbin/ip6tables", "-t", "mangle", "-C", "OUTPUT", + "-p", "udp", "--sport", "546", "--dport", "547", + "-j", "HL", "--hl-set", "2") + if err := checkCmd.Run(); err == nil { + deleteCmd := exec.Command("/usr/sbin/ip6tables", "-t", "mangle", "-D", "OUTPUT", + "-p", "udp", "--sport", "546", "--dport", "547", + "-j", "HL", "--hl-set", "2") + deleteCmd.Stdout = os.Stdout + deleteCmd.Stderr = os.Stderr + if err := deleteCmd.Run(); err != nil { + log.Warnf("Failed to delete ip6tables mangle OUTPUT rule for DHCPv6: %v", err) + } + } else { + log.Infof("ip6tables mangle OUTPUT rule for DHCPv6 not present") + } + + return nil + }) + + if err != nil { + log.Warnf("Error entering host network namespace") + } +} + // Returns VPP exit code func (v *VppRunner) runVpp() (err error) { if !v.allInterfacesPhysical() { // use separate net namespace because linux deletes these interfaces when ns is deleted @@ -877,6 +1020,12 @@ func (v *VppRunner) runVpp() (err error) { return errors.Wrap(err, "Error configuring VPP") } + // Configure DHCPv6 hop limit to prevent VPP from dropping DHCPv6 SOLICIT/REQUEST packets. + // Without this, when forwarding a DHCPv6 SOLICIT/REQUEST packet, VPP will decrement the + // hop-limit by 1. Since client generates SOLICIT/REQUEST with hop-limit=1, VPP drops it + // (ip6 ttl <= 1) with ICMP time exceeded and DHCPv6 lease negotiation fails. + v.configureDHCPv6HopLimit() + // add main network that has the default VRF config.Info.PhysicalNets[config.DefaultPhysicalNetworkName] = config.PhysicalNetwork{VrfID: common.DefaultVRFIndex, PodVrfID: common.PodVRFIndex} @@ -941,6 +1090,7 @@ func (v *VppRunner) runVpp() (err error) { func (v *VppRunner) restoreConfiguration(allInterfacesPhysical bool) { log.Infof("Restoring configuration") + v.cleanupDHCPv6HopLimit() err := utils.ClearVppManagerFiles() if err != nil { log.Errorf("Error clearing vpp manager files: %v", err) diff --git a/vpplink/generated/vpp_clone_current.sh b/vpplink/generated/vpp_clone_current.sh index bc2f3f49..fe6f1dc8 100755 --- a/vpplink/generated/vpp_clone_current.sh +++ b/vpplink/generated/vpp_clone_current.sh @@ -145,6 +145,7 @@ git_cherry_pick refs/changes/07/43107/4 # 43107: vcl: fix fifo private vpp sh on git_cherry_pick refs/changes/14/43714/5 # 43714: session: fix handling of closed during migration | https://gerrit.fd.io/r/c/vpp/+/43714 git_cherry_pick refs/changes/39/43139/5 # 43139: udp: regrab connected session after transport clone | https://gerrit.fd.io/r/c/vpp/+/43139 git_cherry_pick refs/changes/23/43723/3 # 43723: session svm: fix session migrate attach data corruption | https://gerrit.fd.io/r/c/vpp/+/43723 +git_cherry_pick refs/changes/50/44350/2 # 44350: vnet: fix unicast NA handling in ND proxy | https://gerrit.fd.io/r/c/vpp/+/44350 # --------------- private plugins --------------- # Generated with 'git format-patch --zero-commit -o ./patches/ HEAD^^^' diff --git a/vpplink/helpers.go b/vpplink/helpers.go index a7cd4064..dbf73d89 100644 --- a/vpplink/helpers.go +++ b/vpplink/helpers.go @@ -49,6 +49,16 @@ func IPFamilyFromIPNet(ipNet *net.IPNet) IPFamily { return IPFamilyV4 } +func IPFamilyFromIP(ip net.IP) IPFamily { + if ip == nil { + return IPFamilyV4 + } + if ip.To4() == nil { + return IPFamilyV6 + } + return IPFamilyV4 +} + type CleanupCall struct { args []interface{} f interface{} diff --git a/vpplink/routes.go b/vpplink/routes.go index ac11f225..b8ffb62e 100644 --- a/vpplink/routes.go +++ b/vpplink/routes.go @@ -202,6 +202,78 @@ func (v *VppLink) addDelIPMRoute(route *types.Route, flags mfib_types.MfibEntryF return nil } +// MRouteAddForHostMulticast adds an mFIB route with explicit interface flags for each path +// This is needed for forwarding multicast traffic like DHCPv6 solicitations from the host +// For DHCPv6 from Linux host via tap: +// - tapSwIfIndex should have ACCEPT flag (allow packets from tap) +// - uplinkSwIfIndex should have ACCEPT|FORWARD flags (forward to uplink, accept replies) +func (v *VppLink) MRouteAddForHostMulticast(tableID uint32, group *net.IPNet, tapSwIfIndex, uplinkSwIfIndex uint32) error { + client := vppip.NewServiceClient(v.GetConnection()) + + isIP6 := group.IP.To4() == nil + ones, _ := group.Mask.Size() + prefix := ip_types.Mprefix{ + Af: types.ToVppAddressFamily(isIP6), + GrpAddressLength: uint16(ones), + GrpAddress: types.ToVppAddress(group.IP).Un, + // SrcAddress is all zeros for (*,G) entries + } + + // Create mFIB paths with explicit interface flags + paths := []mfib_types.MfibPath{ + { + // Uplink interface: Accept + Forward + // Accept incoming multicast from network, forward outgoing multicast to network + ItfFlags: mfib_types.MFIB_API_ITF_FLAG_ACCEPT | mfib_types.MFIB_API_ITF_FLAG_FORWARD, + Path: fib_types.FibPath{ + SwIfIndex: uplinkSwIfIndex, + TableID: 0, + RpfID: 0, + Weight: 1, + Preference: 0, + Type: fib_types.FIB_API_PATH_TYPE_NORMAL, + Flags: fib_types.FIB_API_PATH_FLAG_NONE, + Proto: types.IsV6toFibProto(isIP6), + }, + }, + { + // Tap interface: Accept only + // This allows packets FROM Linux host to pass RPF check + ItfFlags: mfib_types.MFIB_API_ITF_FLAG_ACCEPT, + Path: fib_types.FibPath{ + SwIfIndex: tapSwIfIndex, + TableID: 0, + RpfID: 0, + Weight: 1, + Preference: 0, + Type: fib_types.FIB_API_PATH_TYPE_NORMAL, + Flags: fib_types.FIB_API_PATH_FLAG_NONE, + Proto: types.IsV6toFibProto(isIP6), + }, + }, + } + + vppRoute := vppip.IPMroute{ + TableID: tableID, + Prefix: prefix, + EntryFlags: mfib_types.MFIB_API_ENTRY_FLAG_NONE, // Use interface-based RPF, not ACCEPT_ALL_ITF + Paths: paths, + RpfID: 0, // No RPF-ID, use interface-based checking + } + + _, err := client.IPMrouteAddDel(v.GetContext(), &vppip.IPMrouteAddDel{ + IsAdd: true, + Route: vppRoute, + }) + if err != nil { + return fmt.Errorf("failed to add mroute for host multicast %s in table %d: %w", group.String(), tableID, err) + } + + v.GetLog().Infof("Added mFIB route for host multicast %s in table %d (tap=%d, uplink=%d)", + group.String(), tableID, tapSwIfIndex, uplinkSwIfIndex) + return nil +} + func (v *VppLink) addDelDefaultMRouteViaTable(srcTable, dstTable uint32, isIP6 bool, isAdd bool) error { route := &types.Route{ Paths: []types.RoutePath{{