@@ -438,58 +438,8 @@ func (gw *GatewayManager) createGWRouterPort(hostSubnets []*net.IPNet, gwLRPJoin
438438 return gwLRPIPs , nil
439439}
440440
441- // GatewayInit creates a gateway router for the local chassis.
442- // enableGatewayMTU enables options:gateway_mtu for gateway routers.
443- func (gw * GatewayManager ) GatewayInit (
444- nodeName string ,
445- clusterIPSubnet []* net.IPNet ,
446- hostSubnets []* net.IPNet ,
447- l3GatewayConfig * util.L3GatewayConfig ,
448- gwLRPJoinIPs , drLRPIfAddrs []* net.IPNet ,
449- externalIPs []net.IP ,
450- enableGatewayMTU bool ,
451- ) error {
452-
453- // If l3gatewayAnnotation.IPAddresses changed, we need to update the perPodSNATs,
454- // so let's save the old value before we update the router for later use
455- var oldExtIPs []net.IP
456- oldLogicalRouter , err := libovsdbops .GetLogicalRouter (gw .nbClient ,
457- & nbdb.LogicalRouter {
458- Name : gw .gwRouterName ,
459- })
460- if err != nil && ! errors .Is (err , libovsdbclient .ErrNotFound ) {
461- return fmt .Errorf ("failed in retrieving %s, error: %v" , gw .gwRouterName , err )
462- }
463-
464- if oldLogicalRouter != nil && oldLogicalRouter .ExternalIDs != nil {
465- if physicalIPs , ok := oldLogicalRouter .ExternalIDs ["physical_ips" ]; ok {
466- oldExternalIPs := strings .Split (physicalIPs , "," )
467- oldExtIPs = make ([]net.IP , len (oldExternalIPs ))
468- for i , oldExternalIP := range oldExternalIPs {
469- cidr := oldExternalIP + util .GetIPFullMaskString (oldExternalIP )
470- ip , _ , err := net .ParseCIDR (cidr )
471- if err != nil {
472- return fmt .Errorf ("invalid cidr:%s error: %v" , cidr , err )
473- }
474- oldExtIPs [i ] = ip
475- }
476- }
477- }
478-
479- gwRouter , err := gw .createGWRouter (l3GatewayConfig , gwLRPJoinIPs )
480- if err != nil {
481- return err
482- }
483-
484- if err = gw .createGWRouterPeerPort (nodeName ); err != nil {
485- return err
486- }
487-
488- gwLRPIPs , err := gw .createGWRouterPort (hostSubnets , gwLRPJoinIPs , enableGatewayMTU , gwRouter )
489- if err != nil {
490- return err
491- }
492-
441+ func (gw * GatewayManager ) updateGWRouterStaticRoutes (clusterIPSubnet , drLRPIfAddrs []* net.IPNet ,
442+ l3GatewayConfig * util.L3GatewayConfig , externalRouterPort string , gwRouter * nbdb.LogicalRouter ) error {
493443 if len (drLRPIfAddrs ) > 0 {
494444 for _ , entry := range clusterIPSubnet {
495445 drLRPIfAddr , err := util .MatchFirstIPNetFamily (utilnet .IsIPv6CIDR (entry ), drLRPIfAddrs )
@@ -534,38 +484,6 @@ func (gw *GatewayManager) GatewayInit(
534484 }
535485 }
536486
537- if err := gw .addExternalSwitch ("" ,
538- l3GatewayConfig .InterfaceID ,
539- gw .gwRouterName ,
540- l3GatewayConfig .MACAddress .String (),
541- physNetName (gw .netInfo ),
542- l3GatewayConfig .IPAddresses ,
543- l3GatewayConfig .VLANID ); err != nil {
544- return err
545- }
546-
547- if l3GatewayConfig .EgressGWInterfaceID != "" {
548- if err := gw .addExternalSwitch (types .EgressGWSwitchPrefix ,
549- l3GatewayConfig .EgressGWInterfaceID ,
550- gw .gwRouterName ,
551- l3GatewayConfig .EgressGWMACAddress .String (),
552- types .PhysicalNetworkExGwName ,
553- l3GatewayConfig .EgressGWIPAddresses ,
554- nil ); err != nil {
555- return err
556- }
557- }
558-
559- externalRouterPort := types .GWRouterToExtSwitchPrefix + gw .gwRouterName
560- // Remove stale OVN resources with any old masquerade IP
561- if err := deleteStaleMasqueradeResources (gw .nbClient , gw .gwRouterName , nodeName , gw .watchFactory ); err != nil {
562- return fmt .Errorf ("failed to remove stale masquerade resources from northbound database: %w" , err )
563- }
564-
565- if err := gateway .CreateDummyGWMacBindings (gw .nbClient , gw .gwRouterName , gw .netInfo ); err != nil {
566- return err
567- }
568-
569487 for _ , nextHop := range node .DummyNextHopIPs () {
570488 // Add return service route for OVN back to host
571489 prefix := config .Gateway .V4MasqueradeSubnet
@@ -588,7 +506,7 @@ func (gw *GatewayManager) GatewayInit(
588506 return item .OutputPort != nil && * item .OutputPort == * lrsr .OutputPort && item .IPPrefix == lrsr .IPPrefix &&
589507 libovsdbops .PolicyEqualPredicate (item .Policy , lrsr .Policy )
590508 }
591- err = libovsdbops .CreateOrReplaceLogicalRouterStaticRouteWithPredicate (gw .nbClient , gw .gwRouterName , & lrsr , p ,
509+ err : = libovsdbops .CreateOrReplaceLogicalRouterStaticRouteWithPredicate (gw .nbClient , gw .gwRouterName , & lrsr , p ,
592510 & lrsr .Nexthop )
593511 if err != nil {
594512 return fmt .Errorf ("error creating service static route %+v in GR %s: %v" , lrsr , gw .gwRouterName , err )
@@ -627,6 +545,10 @@ func (gw *GatewayManager) GatewayInit(
627545 }
628546 }
629547
548+ return nil
549+ }
550+
551+ func (gw * GatewayManager ) updateClusterRouterStaticRoutes (hostSubnets []* net.IPNet , gwLRPIPs []net.IP ) error {
630552 // We need to add a route to the Gateway router's IP, on the
631553 // cluster router, to ensure that the return traffic goes back
632554 // to the same gateway router
@@ -721,17 +643,36 @@ func (gw *GatewayManager) GatewayInit(
721643 }
722644 }
723645 }
646+ return nil
647+ }
724648
649+ // syncNATsForGRIPChange updates the SNAT rules on the gateway router that are created outside the GatewayManager.
650+ // Multiple handlers, like
651+ // - DefaultNetworkController.addLogicalPort
652+ // - DefaultNetworkController.updateNamespace
653+ // - EgressIPController.addExternalGWPodSNATOps
654+ // - EgressIPController.addPodEgressIPAssignment
655+ // - SecondaryLayer2NetworkController.buildUDNEgressSNAT
656+ // - SecondaryLayer3NetworkController.addUDNNodeSubnetEgressSNAT
657+ // use gateway config parameters to create SNAT rules on the gateway router, but some of them (not all) don't watch
658+ // gateway config changes and rely on the GatewayManager to update their SNAT rules.
659+ // Is it racy? Yes!
660+ // This function also updates SNAT created by `updateGWRouterNAT`, because NATs don't use ExternalIDs,
661+ // and their fields are used to find equivalent NATs. That means on gateway IPs change, instead of updating
662+ // the old NAT, we would create a new one. FIXME: add externalIDs to NATs
663+ func (gw * GatewayManager ) syncNATsForGRIPChange (externalIPs , oldExtIPs , gwLRPIPs []net.IP ,
664+ gwRouter , oldGWRouter * nbdb.LogicalRouter ) error {
725665 // if config.Gateway.DisabledSNATMultipleGWs is not set (by default it is not),
726666 // the NAT rules for pods not having annotations to route through either external
727667 // gws or pod CNFs will be added within pods.go addLogicalPort
728668 var natsToUpdate []* nbdb.NAT
729669 // If l3gatewayAnnotation.IPAddresses changed, we need to update the SNATs on the GR
730670 oldNATs := []* nbdb.NAT {}
731- if oldLogicalRouter != nil {
732- oldNATs , err = libovsdbops .GetRouterNATs (gw .nbClient , oldLogicalRouter )
671+ var err error
672+ if oldGWRouter != nil {
673+ oldNATs , err = libovsdbops .GetRouterNATs (gw .nbClient , oldGWRouter )
733674 if err != nil && errors .Is (err , libovsdbclient .ErrNotFound ) {
734- return fmt .Errorf ("unable to get NAT entries for router on node %s: %w" , nodeName , err )
675+ return fmt .Errorf ("unable to get NAT entries for router %s: %w" , oldGWRouter . Name , err )
735676 }
736677 }
737678
@@ -789,7 +730,11 @@ func (gw *GatewayManager) GatewayInit(
789730 return fmt .Errorf ("failed to update GW SNAT rule for pod on router %s error: %v" , gw .gwRouterName , err )
790731 }
791732 }
733+ return nil
734+ }
792735
736+ func (gw * GatewayManager ) updateGWRouterNAT (nodeName string , clusterIPSubnet []* net.IPNet , l3GatewayConfig * util.L3GatewayConfig ,
737+ externalIPs , gwLRPIPs []net.IP , gwRouter * nbdb.LogicalRouter ) error {
793738 // REMOVEME(trozet) workaround - create join subnet SNAT to handle ICMP needs frag return
794739 var extIDs map [string ]string
795740 if gw .netInfo .IsSecondary () {
@@ -812,7 +757,7 @@ func (gw *GatewayManager) GatewayInit(
812757 nat := libovsdbops .BuildSNAT (& externalIP [0 ], joinIPNet , "" , extIDs )
813758 joinNATs = append (joinNATs , nat )
814759 }
815- err = libovsdbops .CreateOrUpdateNATs (gw .nbClient , gwRouter , joinNATs ... )
760+ err : = libovsdbops .CreateOrUpdateNATs (gw .nbClient , gwRouter , joinNATs ... )
816761 if err != nil {
817762 return fmt .Errorf ("failed to create SNAT rule for join subnet on router %s error: %v" , gw .gwRouterName , err )
818763 }
@@ -832,7 +777,7 @@ func (gw *GatewayManager) GatewayInit(
832777 nat = libovsdbops .BuildSNATWithMatch (& externalIP [0 ], entry , "" , extIDs , gw .netInfo .GetNetworkScopedClusterSubnetSNATMatch (nodeName ))
833778 nats = append (nats , nat )
834779 }
835- err : = libovsdbops .CreateOrUpdateNATs (gw .nbClient , gwRouter , nats ... )
780+ err = libovsdbops .CreateOrUpdateNATs (gw .nbClient , gwRouter , nats ... )
836781 if err != nil {
837782 return fmt .Errorf ("failed to update SNAT rule for pod on router %s error: %v" , gw .gwRouterName , err )
838783 }
@@ -842,15 +787,118 @@ func (gw *GatewayManager) GatewayInit(
842787 nat = libovsdbops .BuildSNATWithMatch (nil , logicalSubnet , "" , extIDs , gw .netInfo .GetNetworkScopedClusterSubnetSNATMatch (nodeName ))
843788 nats = append (nats , nat )
844789 }
845- err : = libovsdbops .DeleteNATs (gw .nbClient , gwRouter , nats ... )
790+ err = libovsdbops .DeleteNATs (gw .nbClient , gwRouter , nats ... )
846791 if err != nil {
847792 return fmt .Errorf ("failed to delete GW SNAT rule for pod on router %s error: %v" , gw .gwRouterName , err )
848793 }
849794 }
850795
851- if err : = gw .cleanupStalePodSNATs (nodeName , l3GatewayConfig .IPAddresses , gwLRPIPs ); err != nil {
796+ if err = gw .cleanupStalePodSNATs (nodeName , l3GatewayConfig .IPAddresses , gwLRPIPs ); err != nil {
852797 return fmt .Errorf ("failed to sync stale SNATs on node %s: %v" , nodeName , err )
853798 }
799+ return nil
800+ }
801+
802+ // GatewayInit creates a gateway router for the local chassis.
803+ // enableGatewayMTU enables options:gateway_mtu for gateway routers.
804+ func (gw * GatewayManager ) GatewayInit (
805+ nodeName string ,
806+ clusterIPSubnet []* net.IPNet ,
807+ hostSubnets []* net.IPNet ,
808+ l3GatewayConfig * util.L3GatewayConfig ,
809+ gwLRPJoinIPs , drLRPIfAddrs []* net.IPNet ,
810+ externalIPs []net.IP ,
811+ enableGatewayMTU bool ,
812+ ) error {
813+
814+ // If l3gatewayAnnotation.IPAddresses changed, we need to update the perPodSNATs,
815+ // so let's save the old value before we update the router for later use
816+ var oldExtIPs []net.IP
817+ oldLogicalRouter , err := libovsdbops .GetLogicalRouter (gw .nbClient ,
818+ & nbdb.LogicalRouter {
819+ Name : gw .gwRouterName ,
820+ })
821+ if err != nil && ! errors .Is (err , libovsdbclient .ErrNotFound ) {
822+ return fmt .Errorf ("failed in retrieving %s, error: %v" , gw .gwRouterName , err )
823+ }
824+
825+ if oldLogicalRouter != nil && oldLogicalRouter .ExternalIDs != nil {
826+ if physicalIPs , ok := oldLogicalRouter .ExternalIDs ["physical_ips" ]; ok {
827+ oldExternalIPs := strings .Split (physicalIPs , "," )
828+ oldExtIPs = make ([]net.IP , len (oldExternalIPs ))
829+ for i , oldExternalIP := range oldExternalIPs {
830+ cidr := oldExternalIP + util .GetIPFullMaskString (oldExternalIP )
831+ ip , _ , err := net .ParseCIDR (cidr )
832+ if err != nil {
833+ return fmt .Errorf ("invalid cidr:%s error: %v" , cidr , err )
834+ }
835+ oldExtIPs [i ] = ip
836+ }
837+ }
838+ }
839+
840+ gwRouter , err := gw .createGWRouter (l3GatewayConfig , gwLRPJoinIPs )
841+ if err != nil {
842+ return err
843+ }
844+
845+ if err = gw .createGWRouterPeerPort (nodeName ); err != nil {
846+ return err
847+ }
848+
849+ gwLRPIPs , err := gw .createGWRouterPort (hostSubnets , gwLRPJoinIPs , enableGatewayMTU , gwRouter )
850+ if err != nil {
851+ return err
852+ }
853+
854+ if err := gw .addExternalSwitch ("" ,
855+ l3GatewayConfig .InterfaceID ,
856+ gw .gwRouterName ,
857+ l3GatewayConfig .MACAddress .String (),
858+ physNetName (gw .netInfo ),
859+ l3GatewayConfig .IPAddresses ,
860+ l3GatewayConfig .VLANID ); err != nil {
861+ return err
862+ }
863+
864+ if l3GatewayConfig .EgressGWInterfaceID != "" {
865+ if err := gw .addExternalSwitch (types .EgressGWSwitchPrefix ,
866+ l3GatewayConfig .EgressGWInterfaceID ,
867+ gw .gwRouterName ,
868+ l3GatewayConfig .EgressGWMACAddress .String (),
869+ types .PhysicalNetworkExGwName ,
870+ l3GatewayConfig .EgressGWIPAddresses ,
871+ nil ); err != nil {
872+ return err
873+ }
874+ }
875+
876+ // Remove stale OVN resources with any old masquerade IP
877+ if err := deleteStaleMasqueradeResources (gw .nbClient , gw .gwRouterName , nodeName , gw .watchFactory ); err != nil {
878+ return fmt .Errorf ("failed to remove stale masquerade resources from northbound database: %w" , err )
879+ }
880+
881+ if err := gateway .CreateDummyGWMacBindings (gw .nbClient , gw .gwRouterName , gw .netInfo ); err != nil {
882+ return err
883+ }
884+
885+ externalRouterPort := types .GWRouterToExtSwitchPrefix + gw .gwRouterName
886+ if err = gw .updateGWRouterStaticRoutes (clusterIPSubnet , drLRPIfAddrs , l3GatewayConfig , externalRouterPort ,
887+ gwRouter ); err != nil {
888+ return err
889+ }
890+
891+ if err = gw .updateClusterRouterStaticRoutes (hostSubnets , gwLRPIPs ); err != nil {
892+ return err
893+ }
894+
895+ if err = gw .syncNATsForGRIPChange (externalIPs , oldExtIPs , gwLRPIPs , gwRouter , oldLogicalRouter ); err != nil {
896+ return err
897+ }
898+
899+ if err = gw .updateGWRouterNAT (nodeName , clusterIPSubnet , l3GatewayConfig , externalIPs , gwLRPIPs , gwRouter ); err != nil {
900+ return err
901+ }
854902
855903 // recording gateway mode metrics here after gateway setup is done
856904 metrics .RecordEgressRoutingViaHost ()
0 commit comments