55 "fmt"
66 "net"
77 "os"
8- "os/exec"
98 "path"
109 "strconv"
1110 "strings"
@@ -18,12 +17,15 @@ import (
1817 "github.com/moby/ipvs"
1918 "github.com/vishvananda/netlink"
2019 "github.com/vishvananda/netns"
20+ "golang.org/x/sys/unix"
2121 "k8s.io/klog/v2"
2222)
2323
2424const (
25- ipv4NetMaskBits = 32
26- ipv6NetMaskBits = 128
25+ ipv4NetMaskBits = 32
26+ ipv4DefaultRoute = "0.0.0.0/0"
27+ ipv6NetMaskBits = 128
28+ ipv6DefaultRoute = "::/0"
2729
2830 // TODO: it's bad to rely on eth0 here. While this is inside the container's namespace and is determined by the
2931 // container runtime and so far we've been able to count on this being reliably set to eth0, it is possible that
@@ -65,7 +67,6 @@ type netlinkCalls interface {
6567
6668func (ln * linuxNetworking ) ipAddrDel (iface netlink.Link , ip string , nodeIP string ) error {
6769 var netMask net.IPMask
68- var ipRouteCmdArgs []string
6970 parsedIP := net .ParseIP (ip )
7071 parsedNodeIP := net .ParseIP (nodeIP )
7172 if parsedIP .To4 () != nil {
@@ -75,7 +76,6 @@ func (ln *linuxNetworking) ipAddrDel(iface netlink.Link, ip string, nodeIP strin
7576 }
7677
7778 netMask = net .CIDRMask (ipv4NetMaskBits , ipv4NetMaskBits )
78- ipRouteCmdArgs = make ([]string , 0 )
7979 } else {
8080 // If the IP family of the NodeIP and the VIP IP don't match, we can't proceed
8181 if parsedNodeIP .To4 () != nil {
@@ -88,7 +88,6 @@ func (ln *linuxNetworking) ipAddrDel(iface netlink.Link, ip string, nodeIP strin
8888 }
8989
9090 netMask = net .CIDRMask (ipv6NetMaskBits , ipv6NetMaskBits )
91- ipRouteCmdArgs = []string {"-6" }
9291 }
9392
9493 naddr := & netlink.Addr {IPNet : & net.IPNet {IP : parsedIP , Mask : netMask }, Scope : syscall .RT_SCOPE_LINK }
@@ -106,13 +105,20 @@ func (ln *linuxNetworking) ipAddrDel(iface netlink.Link, ip string, nodeIP strin
106105
107106 // Delete VIP addition to "local" rt table also, fail silently if not found (DSR special case)
108107 // #nosec G204
109- ipRouteCmdArgs = append (ipRouteCmdArgs , "route" , "delete" , "local" , ip , "dev" , KubeDummyIf ,
110- "table" , "local" , "proto" , "kernel" , "scope" , "host" , "src" , nodeIP , "table" , "local" )
111- out , err := exec .Command ("ip" , ipRouteCmdArgs ... ).CombinedOutput ()
108+ nRoute := & netlink.Route {
109+ Type : unix .RTN_LOCAL ,
110+ Dst : & net.IPNet {IP : parsedIP , Mask : netMask },
111+ LinkIndex : iface .Attrs ().Index ,
112+ Table : syscall .RT_TABLE_LOCAL ,
113+ Protocol : unix .RTPROT_KERNEL ,
114+ Scope : syscall .RT_SCOPE_HOST ,
115+ Src : parsedNodeIP ,
116+ }
117+ err = netlink .RouteDel (nRoute )
112118 if err != nil {
113- if ! strings .Contains (string ( out ), "No such process" ) {
114- klog .Errorf ("Failed to delete route to service VIP %s configured on %s. Error: %v, Output: %s " ,
115- ip , KubeDummyIf , err , out )
119+ if ! strings .Contains (err . Error ( ), "no such process" ) {
120+ klog .Errorf ("Failed to delete route to service VIP %s configured on %s. Error: %v" ,
121+ ip , iface . Attrs (). Name , err )
116122 } else {
117123 klog .Warningf ("got a No such process error while trying to remove route: %v (this is not normally bad " +
118124 "enough to stop processing)" , err )
@@ -128,7 +134,6 @@ func (ln *linuxNetworking) ipAddrDel(iface netlink.Link, ip string, nodeIP strin
128134// inside the container.
129135func (ln * linuxNetworking ) ipAddrAdd (iface netlink.Link , ip string , nodeIP string , addRoute bool ) error {
130136 var netMask net.IPMask
131- var ipRouteCmdArgs []string
132137 parsedIP := net .ParseIP (ip )
133138 parsedNodeIP := net .ParseIP (nodeIP )
134139 if parsedIP .To4 () != nil {
@@ -138,18 +143,17 @@ func (ln *linuxNetworking) ipAddrAdd(iface netlink.Link, ip string, nodeIP strin
138143 }
139144
140145 netMask = net .CIDRMask (ipv4NetMaskBits , ipv4NetMaskBits )
141- ipRouteCmdArgs = make ([]string , 0 )
142146 } else {
143147 // If we're supposed to add a route and the IP family of the NodeIP and the VIP IP don't match, we can't proceed
144148 if addRoute && parsedNodeIP .To4 () != nil {
145149 return fmt .Errorf ("nodeIP %s does not match family for VIP IP: %s, unable to proceed" , ip , nodeIP )
146150 }
147151
148152 netMask = net .CIDRMask (ipv6NetMaskBits , ipv6NetMaskBits )
149- ipRouteCmdArgs = []string {"-6" }
150153 }
151154
152- naddr := & netlink.Addr {IPNet : & net.IPNet {IP : parsedIP , Mask : netMask }, Scope : syscall .RT_SCOPE_LINK }
155+ ipPrefix := & net.IPNet {IP : parsedIP , Mask : netMask }
156+ naddr := & netlink.Addr {IPNet : ipPrefix , Scope : syscall .RT_SCOPE_LINK }
153157 err := netlink .AddrAdd (iface , naddr )
154158 if err != nil && err .Error () != IfaceHasAddr {
155159 klog .Errorf ("failed to assign cluster ip %s to dummy interface: %s" , naddr .IPNet .IP .String (), err .Error ())
@@ -164,16 +168,24 @@ func (ln *linuxNetworking) ipAddrAdd(iface netlink.Link, ip string, nodeIP strin
164168 return nil
165169 }
166170
167- // TODO: netlink.RouteReplace which is replacement for below command is not working as expected. Call succeeds but
168- // route is not replaced. For now do it with command.
169- // #nosec G204
170- ipRouteCmdArgs = append (ipRouteCmdArgs , "route" , "replace" , "local" , ip , "dev" , KubeDummyIf ,
171- "table" , "local" , "proto" , "kernel" , "scope" , "host" , "src" , nodeIP , "table" , "local" )
172-
173- out , err := exec .Command ("ip" , ipRouteCmdArgs ... ).CombinedOutput ()
171+ kubeDummyLink , err := netlink .LinkByName (KubeDummyIf )
172+ if err != nil {
173+ klog .Errorf ("failed to get %s link due to %v" , KubeDummyIf , err )
174+ return err
175+ }
176+ nRoute := & netlink.Route {
177+ Type : unix .RTN_LOCAL ,
178+ Dst : ipPrefix ,
179+ LinkIndex : kubeDummyLink .Attrs ().Index ,
180+ Table : syscall .RT_TABLE_LOCAL ,
181+ Protocol : unix .RTPROT_KERNEL ,
182+ Scope : syscall .RT_SCOPE_HOST ,
183+ Src : parsedNodeIP ,
184+ }
185+ err = netlink .RouteReplace (nRoute )
174186 if err != nil {
175- klog .Errorf ("Failed to replace route to service VIP %s configured on %s. Error: %v, Output: %s " ,
176- ip , KubeDummyIf , err , out )
187+ klog .Errorf ("Failed to replace route to service VIP %s configured on %s. Error: %v" ,
188+ ip , KubeDummyIf , err )
177189 return err
178190 }
179191
@@ -431,60 +443,109 @@ func (ln *linuxNetworking) setupPolicyRoutingForDSR(setupIPv4, setupIPv6 bool) e
431443 return fmt .Errorf ("failed to setup policy routing required for DSR due to %v" , err )
432444 }
433445
446+ loNetLink , err := netlink .LinkByName ("lo" )
447+ if err != nil {
448+ return fmt .Errorf ("failed to get loopback interface due to %v" , err )
449+ }
450+
434451 if setupIPv4 {
435- out , err := exec .Command ("ip" , "route" , "list" , "table" , customDSRRouteTableID ).Output ()
436- if err != nil || ! strings .Contains (string (out ), " lo " ) {
437- if err = exec .Command ("ip" , "route" , "add" , "local" , "default" , "dev" , "lo" , "table" ,
438- customDSRRouteTableID ).Run (); err != nil {
439- return fmt .Errorf ("failed to add route in custom route table due to: %v" , err )
452+ nFamily := netlink .FAMILY_V4
453+ _ , defaultRouteCIDR , err := net .ParseCIDR (ipv4DefaultRoute )
454+ if err != nil {
455+ //nolint:goconst // This is a static value and should not be changed
456+ return fmt .Errorf ("failed to parse default (%s) route (this is statically defined, so if you see this " +
457+ "error please report because something has gone very wrong) due to: %v" , ipv4DefaultRoute , err )
458+ }
459+ nRoute := & netlink.Route {
460+ Type : unix .RTN_LOCAL ,
461+ Dst : defaultRouteCIDR ,
462+ LinkIndex : loNetLink .Attrs ().Index ,
463+ Table : customDSRRouteTableID ,
464+ }
465+ routes , err := netlink .RouteListFiltered (nFamily , nRoute , netlink .RT_FILTER_TABLE | netlink .RT_FILTER_OIF )
466+ if err != nil || len (routes ) < 1 {
467+ err = netlink .RouteAdd (nRoute )
468+ if err != nil {
469+ return fmt .Errorf ("failed to add route to custom route table for DSR due to: %v" , err )
440470 }
441471 }
442472 }
473+
443474 if setupIPv6 {
444- out , err := exec .Command ("ip" , "-6" , "route" , "list" , "table" , customDSRRouteTableID ).Output ()
445- if err != nil || ! strings .Contains (string (out ), " lo " ) {
446- if err = exec .Command ("ip" , "-6" , "route" , "add" , "local" , "default" , "dev" , "lo" , "table" ,
447- customDSRRouteTableID ).Run (); err != nil {
448- return fmt .Errorf ("failed to add route in custom route table due to: %v" , err )
475+ nFamily := netlink .FAMILY_V6
476+ _ , defaultRouteCIDR , err := net .ParseCIDR (ipv6DefaultRoute )
477+ if err != nil {
478+ return fmt .Errorf ("failed to parse default (%s) route (this is statically defined, so if you see this " +
479+ "error please report because something has gone very wrong) due to: %v" , ipv6DefaultRoute , err )
480+ }
481+ nRoute := & netlink.Route {
482+ Type : unix .RTN_LOCAL ,
483+ Dst : defaultRouteCIDR ,
484+ LinkIndex : loNetLink .Attrs ().Index ,
485+ Table : customDSRRouteTableID ,
486+ }
487+ routes , err := netlink .RouteListFiltered (nFamily , nRoute , netlink .RT_FILTER_TABLE | netlink .RT_FILTER_OIF )
488+ if err != nil || len (routes ) < 1 {
489+ err = netlink .RouteAdd (nRoute )
490+ if err != nil {
491+ return fmt .Errorf ("failed to add route to custom route table for DSR due to: %v" , err )
449492 }
450493 }
451494 }
495+
452496 return nil
453497}
454498
455499// For DSR it is required that node needs to know how to route external IP. Otherwise when endpoint
456500// directly responds back with source IP as external IP kernel will treat as martian packet.
457501// To prevent martian packets add route to external IP through the `kube-bridge` interface
458502// setupRoutesForExternalIPForDSR: setups routing so that kernel does not think return packets as martians
459-
460503func (ln * linuxNetworking ) setupRoutesForExternalIPForDSR (serviceInfoMap serviceInfoMap ,
461504 setupIPv4 , setupIPv6 bool ) error {
462505 err := utils .RouteTableAdd (externalIPRouteTableID , externalIPRouteTableName )
463506 if err != nil {
464507 return fmt .Errorf ("failed to setup policy routing required for DSR due to %v" , err )
465508 }
466509
467- setupIPRulesAndRoutes := func (ipArgs []string ) error {
468- out , err := runIPCommandsWithArgs (ipArgs , "rule" , "list" ).Output ()
510+ setupIPRulesAndRoutes := func (isIPv6 bool ) error {
511+ nFamily := netlink .FAMILY_V4
512+ _ , defaultPrefixCIDR , err := net .ParseCIDR (ipv4DefaultRoute )
513+ if isIPv6 {
514+ nFamily = netlink .FAMILY_V6
515+ _ , defaultPrefixCIDR , err = net .ParseCIDR (ipv6DefaultRoute )
516+ }
517+ if err != nil {
518+ return fmt .Errorf ("failed to parse default route (this is statically defined, so if you see this " +
519+ "error please report because something has gone very wrong) due to: %v" , err )
520+ }
521+
522+ nRule := & netlink.Rule {
523+ Priority : defaultDSRPolicyRulePriority ,
524+ Src : defaultPrefixCIDR ,
525+ Table : externalIPRouteTableID ,
526+ }
527+ rules , err := netlink .RuleListFiltered (nFamily , nRule ,
528+ netlink .RT_FILTER_TABLE | netlink .RT_FILTER_SRC | netlink .RT_FILTER_PRIORITY )
469529 if err != nil {
470- return fmt .Errorf ("failed to verify if `ip rule add prio 32765 from all lookup external_ip` exists due to: %v" ,
471- err )
530+ return fmt .Errorf ("failed to list rule for external IP's and verify if `ip rule add prio 32765 from all " +
531+ "lookup external_ip` exists due to: %v" , err )
472532 }
473533
474- if ! (strings .Contains (string (out ), externalIPRouteTableName ) ||
475- strings .Contains (string (out ), externalIPRouteTableID )) {
476- err = runIPCommandsWithArgs (ipArgs , "rule" , "add" , "prio" , "32765" , "from" , "all" , "lookup" ,
477- externalIPRouteTableID ).Run ()
534+ if len (rules ) < 1 {
535+ err = netlink .RuleAdd (nRule )
478536 if err != nil {
479537 klog .Infof ("Failed to add policy rule `ip rule add prio 32765 from all lookup external_ip` due to %v" ,
480- err . Error () )
538+ err )
481539 return fmt .Errorf ("failed to add policy rule `ip rule add prio 32765 from all lookup external_ip` " +
482540 "due to %v" , err )
483541 }
484542 }
485543
486- out , _ = runIPCommandsWithArgs (ipArgs , "route" , "list" , "table" , externalIPRouteTableID ).Output ()
487- outStr := string (out )
544+ kubeBridgeLink , err := netlink .LinkByName (KubeBridgeIf )
545+ if err != nil {
546+ return fmt .Errorf ("failed to get kube-bridge interface due to %v" , err )
547+ }
548+
488549 activeExternalIPs := make (map [string ]bool )
489550 for _ , svc := range serviceInfoMap {
490551 for _ , externalIP := range svc .externalIPs {
@@ -497,9 +558,21 @@ func (ln *linuxNetworking) setupRoutesForExternalIPForDSR(serviceInfoMap service
497558
498559 activeExternalIPs [externalIP ] = true
499560
500- if ! strings .Contains (outStr , externalIP ) {
501- if err = runIPCommandsWithArgs (ipArgs , "route" , "add" , externalIP , "dev" , "kube-bridge" , "table" ,
502- externalIPRouteTableID ).Run (); err != nil {
561+ nSrcIP := net .ParseIP (externalIP )
562+ nRoute := & netlink.Route {
563+ Src : nSrcIP ,
564+ LinkIndex : kubeBridgeLink .Attrs ().Index ,
565+ Table : externalIPRouteTableID ,
566+ }
567+
568+ routes , err := netlink .RouteListFiltered (nFamily , nRoute ,
569+ netlink .RT_FILTER_SRC | netlink .RT_FILTER_TABLE | netlink .RT_FILTER_OIF )
570+ if err != nil {
571+ return fmt .Errorf ("failed to list route for external IP's due to: %s" , err )
572+ }
573+ if len (routes ) < 1 {
574+ err = netlink .RouteAdd (nRoute )
575+ if err != nil {
503576 klog .Errorf ("Failed to add route for %s in custom route table for external IP's due to: %v" ,
504577 externalIP , err )
505578 continue
@@ -509,19 +582,18 @@ func (ln *linuxNetworking) setupRoutesForExternalIPForDSR(serviceInfoMap service
509582 }
510583
511584 // check if there are any pbr in externalIPRouteTableID for external IP's
512- if len (outStr ) > 0 {
513- // clean up stale external IPs
514- for _ , line := range strings .Split (strings .Trim (outStr , "\n " ), "\n " ) {
515- route := strings .Split (strings .Trim (line , " " ), " " )
516- ip := route [0 ]
517- if ! activeExternalIPs [ip ] {
518- args := []string {"route" , "del" , "table" , externalIPRouteTableID }
519- args = append (args , route ... )
520- if err = runIPCommandsWithArgs (ipArgs , args ... ).Run (); err != nil {
521- klog .Errorf ("Failed to del route for %v in custom route table for external IP's due to: %s" ,
522- ip , err )
523- continue
524- }
585+ routes , err := netlink .RouteList (nil , nFamily )
586+ if err != nil {
587+ return fmt .Errorf ("failed to list route for external IP's due to: %s" , err )
588+ }
589+ for idx , route := range routes {
590+ ip := route .Src .String ()
591+ if ! activeExternalIPs [ip ] {
592+ err = netlink .RouteDel (& routes [idx ])
593+ if err != nil {
594+ klog .Errorf ("Failed to del route for %v in custom route table for external IP's due to: %s" ,
595+ ip , err )
596+ continue
525597 }
526598 }
527599 }
@@ -530,13 +602,13 @@ func (ln *linuxNetworking) setupRoutesForExternalIPForDSR(serviceInfoMap service
530602 }
531603
532604 if setupIPv4 {
533- err = setupIPRulesAndRoutes ([] string {} )
605+ err = setupIPRulesAndRoutes (false )
534606 if err != nil {
535607 return err
536608 }
537609 }
538610 if setupIPv6 {
539- err = setupIPRulesAndRoutes ([] string { "-6" } )
611+ err = setupIPRulesAndRoutes (true )
540612 if err != nil {
541613 return err
542614 }
0 commit comments