@@ -38,6 +38,7 @@ const (
3838 KUBE_TUNNEL_IF = "kube-tunnel-if"
3939 IFACE_NOT_FOUND = "Link not found"
4040 IFACE_HAS_ADDR = "file exists"
41+ IFACE_HAS_NO_ADDR = "cannot assign requested address"
4142 IPVS_SERVER_EXISTS = "file exists"
4243 namespace = "kube_router"
4344)
@@ -254,6 +255,13 @@ func (nsc *NetworkServicesController) syncIpvsServices(serviceInfoMap serviceInf
254255 }
255256 glog .Infof ("Custom routing table " + customDSRRouteTableName + "required for Direct Server Return is setup as expected." )
256257
258+ glog .Infof ("Setting up custom route table required to add routes for external IP's." )
259+ err = setupRoutesForExternalIPForDSR (serviceInfoMap )
260+ if err != nil {
261+ return errors .New ("Failed setup custom routing table required to add routes for external IP's due to: " + err .Error ())
262+ }
263+ glog .Infof ("Custom routing table " + externalIPRouteTableName + "required for Direct Server Return is setup as expected." )
264+
257265 // map of active services and service endpoints
258266 activeServiceEndpointMap := make (map [string ][]string )
259267
@@ -312,29 +320,62 @@ func (nsc *NetworkServicesController) syncIpvsServices(serviceInfoMap serviceInf
312320 // without a VIP http://www.austintek.com/LVS/LVS-HOWTO/HOWTO/LVS-HOWTO.routing_to_VIP-less_director.html
313321 // to avoid martian packets
314322 for _ , externalIP := range svc .externalIPs {
315- ipvsExternalIPSvc , err := ipvsAddFWMarkService (net .ParseIP (externalIP ), protocol , uint16 (svc .port ), svc .sessionAffinity , svc .scheduler )
316- if err != nil {
317- glog .Errorf ("Failed to create ipvs service for External IP: %s due to: %s" , externalIP , err .Error ())
318- continue
319- }
320- externalIpServices = append (externalIpServices , externalIPService {ipvsSvc : ipvsExternalIPSvc , externalIp : externalIP })
321- fwMark := generateFwmark (externalIP , svc .protocol , strconv .Itoa (svc .port ))
322- externalIpServiceId := fmt .Sprint (fwMark )
323+ var externalIpServiceId string
324+ if svc .directServerReturn && svc .directServerReturnMethod == "tunnel" {
325+ ipvsExternalIPSvc , err := ipvsAddFWMarkService (net .ParseIP (externalIP ), protocol , uint16 (svc .port ), svc .sessionAffinity , svc .scheduler )
326+ if err != nil {
327+ glog .Errorf ("Failed to create ipvs service for External IP: %s due to: %s" , externalIP , err .Error ())
328+ continue
329+ }
330+ externalIpServices = append (externalIpServices , externalIPService {ipvsSvc : ipvsExternalIPSvc , externalIp : externalIP })
331+ fwMark := generateFwmark (externalIP , svc .protocol , strconv .Itoa (svc .port ))
332+ externalIpServiceId = fmt .Sprint (fwMark )
323333
324- // ensure there is iptable mangle table rule to FWMARK the packet
325- err = setupMangleTableRule (externalIP , svc .protocol , strconv .Itoa (svc .port ), externalIpServiceId )
326- if err != nil {
327- glog .Errorf ("Failed to setup mangle table rule to FMWARD the traffic to external IP" )
328- continue
329- }
334+ // ensure there is iptable mangle table rule to FWMARK the packet
335+ err = setupMangleTableRule (externalIP , svc .protocol , strconv .Itoa (svc .port ), externalIpServiceId )
336+ if err != nil {
337+ glog .Errorf ("Failed to setup mangle table rule to FMWARD the traffic to external IP" )
338+ continue
339+ }
330340
331- // in VIP less directory we dont assign VIP to any interface, so we do policy routing
332- // to deliver the packet locally so that IPVS can pick the packet
333- err = routeVIPTrafficToDirector ("0x" + fmt .Sprintf ("%x" , fwMark ))
334- if err != nil {
335- glog .Errorf ("Failed to setup ip rule to lookup traffic to external IP: %s through custom " +
336- "route table due to " , externalIP , err .Error ())
337- continue
341+ // ensure VIP less director. we dont assign VIP to any interface
342+ eip := & netlink.Addr {IPNet : & net.IPNet {IP : net .ParseIP (externalIP ), Mask : net .IPv4Mask (255 , 255 , 255 , 255 )}, Scope : syscall .RT_SCOPE_LINK }
343+ err = netlink .AddrDel (dummyVipInterface , eip )
344+ if err != nil && err .Error () != IFACE_HAS_NO_ADDR {
345+ glog .Errorf ("Failed to verify is external ip %s is assocated with dummy interface %s due to %s" , externalIP , KUBE_DUMMY_IF , err .Error ())
346+ }
347+
348+ // do policy routing to deliver the packet locally so that IPVS can pick the packet
349+ err = routeVIPTrafficToDirector ("0x" + fmt .Sprintf ("%x" , fwMark ))
350+ if err != nil {
351+ glog .Errorf ("Failed to setup ip rule to lookup traffic to external IP: %s through custom " +
352+ "route table due to " , externalIP , err .Error ())
353+ continue
354+ }
355+ } else {
356+ // ensure director with vip assigned
357+ eip := & netlink.Addr {IPNet : & net.IPNet {IP : net .ParseIP (externalIP ), Mask : net .IPv4Mask (255 , 255 , 255 , 255 )}, Scope : syscall .RT_SCOPE_LINK }
358+ err := netlink .AddrAdd (dummyVipInterface , eip )
359+ if err != nil && err .Error () != IFACE_HAS_ADDR {
360+ glog .Errorf ("Failed to assign external ip %s to dummy interface %s due to %s" , externalIP , KUBE_DUMMY_IF , err .Error ())
361+ }
362+
363+ // create IPVS service for the service to be exposed through the external ip
364+ ipvsExternalIPSvc , err := ipvsAddService (net .ParseIP (externalIP ), protocol , uint16 (svc .port ), svc .sessionAffinity , svc .scheduler )
365+ if err != nil {
366+ glog .Errorf ("Failed to create ipvs service for external ip: %s due to %s" , externalIP , err .Error ())
367+ continue
368+ }
369+ externalIpServices = append (externalIpServices , externalIPService {ipvsSvc : ipvsExternalIPSvc , externalIp : externalIP })
370+ externalIpServiceId = generateIpPortId (externalIP , svc .protocol , strconv .Itoa (svc .port ))
371+
372+ // ensure there is NO iptable mangle table rule to FWMARK the packet
373+ fwMark := fmt .Sprint (generateFwmark (externalIP , svc .protocol , strconv .Itoa (svc .port )))
374+ err = cleanupMangleTableRule (externalIP , svc .protocol , strconv .Itoa (svc .port ), fwMark )
375+ if err != nil {
376+ glog .Errorf ("Failed to verify and cleanup any mangle table rule to FMWARD the traffic to external IP due to " + err .Error ())
377+ continue
378+ }
338379 }
339380
340381 activeServiceEndpointMap [externalIpServiceId ] = make ([]string , 0 )
@@ -495,6 +536,7 @@ func prepareEndpointForDsr(containerId string, endpointIP string, vip string) er
495536 if err != nil {
496537 return errors .New ("Failed to get namespace due to " + err .Error ())
497538 }
539+ defer currentNamespaceHandle .Close ()
498540
499541 client , err := client .NewEnvClient ()
500542 if err != nil {
@@ -511,14 +553,15 @@ func prepareEndpointForDsr(containerId string, endpointIP string, vip string) er
511553 if err != nil {
512554 return errors .New ("Failed to get endpoint namespace due to " + err .Error ())
513555 }
556+ defer endpointNamespaceHandle .Close ()
514557
515558 err = netns .Set (endpointNamespaceHandle )
516559 if err != nil {
517560 return errors .New ("Failed to enter to endpoint namespace due to " + err .Error ())
518561 }
519562
520563 // TODO: fix boilerplate `netns.Set(currentNamespaceHandle)` code. Need a robust
521- // way to switch back to old namespace, pretty much many things will go wrong
564+ // way to switch back to old namespace, pretty much all things will go wrong if we dont switch back
522565
523566 // create a ipip tunnel interface inside the endpoint container
524567 tunIf , err := netlink .LinkByName (KUBE_TUNNEL_IF )
@@ -575,11 +618,24 @@ func prepareEndpointForDsr(containerId string, endpointIP string, vip string) er
575618 glog .Infof ("Successfully assinged VIP: " + vip + " in endpoint " + endpointIP + "." )
576619
577620 // disable rp_filter on all interface
621+ err = ioutil .WriteFile ("/proc/sys/net/ipv4/conf/kube-tunnel-if/rp_filter" , []byte (strconv .Itoa (0 )), 0640 )
622+ if err != nil {
623+ netns .Set (currentNamespaceHandle )
624+ return errors .New ("Failed to disable rp_filter on kube-tunnel-if in the endpoint container" )
625+ }
626+
627+ err = ioutil .WriteFile ("/proc/sys/net/ipv4/conf/eth0/rp_filter" , []byte (strconv .Itoa (0 )), 0640 )
628+ if err != nil {
629+ netns .Set (currentNamespaceHandle )
630+ return errors .New ("Failed to disable rp_filter on eth0 in the endpoint container" )
631+ }
632+
578633 err = ioutil .WriteFile ("/proc/sys/net/ipv4/conf/all/rp_filter" , []byte (strconv .Itoa (0 )), 0640 )
579634 if err != nil {
580635 netns .Set (currentNamespaceHandle )
581- return errors .New ("Failed to disable rp_filter in the endpoint container" )
636+ return errors .New ("Failed to disable rp_filter on `all` in the endpoint container" )
582637 }
638+
583639 glog .Infof ("Successfully disabled rp_filter in endpoint " + endpointIP + "." )
584640
585641 netns .Set (currentNamespaceHandle )
@@ -1150,8 +1206,10 @@ func ipvsAddServer(service *ipvs.Service, dest *ipvs.Destination) error {
11501206}
11511207
11521208const (
1153- customDSRRouteTableID = "78"
1154- customDSRRouteTableName = "kube-router-dsr"
1209+ customDSRRouteTableID = "78"
1210+ customDSRRouteTableName = "kube-router-dsr"
1211+ externalIPRouteTableId = "79"
1212+ externalIPRouteTableName = "external_ip"
11551213)
11561214
11571215// setupMangleTableRule: setsup iptable rule to FWMARK the traffic to exteranl IP vip
@@ -1168,6 +1226,26 @@ func setupMangleTableRule(ip string, protocol string, port string, fwmark string
11681226 return nil
11691227}
11701228
1229+ func cleanupMangleTableRule (ip string , protocol string , port string , fwmark string ) error {
1230+ iptablesCmdHandler , err := iptables .New ()
1231+ if err != nil {
1232+ return errors .New ("Failed to initialize iptables executor" + err .Error ())
1233+ }
1234+ args := []string {"-d" , ip , "-m" , protocol , "-p" , protocol , "--dport" , port , "-j" , "MARK" , "--set-mark" , fwmark }
1235+ exists , err := iptablesCmdHandler .Exists ("mangle" , "PREROUTING" , args ... )
1236+ if err != nil {
1237+ return errors .New ("Failed to cleanup iptables command to set up FWMARK due to " + err .Error ())
1238+ }
1239+ if exists {
1240+ err = iptablesCmdHandler .Delete ("mangle" , "PREROUTING" , args ... )
1241+ if err != nil {
1242+ return errors .New ("Failed to cleanup iptables command to set up FWMARK due to " + err .Error ())
1243+ }
1244+ }
1245+
1246+ return nil
1247+ }
1248+
11711249// For DSR it is required that we dont assign the VIP to any interface to avoid martian packets
11721250// http://www.austintek.com/LVS/LVS-HOWTO/HOWTO/LVS-HOWTO.routing_to_VIP-less_director.html
11731251// routeVIPTrafficToDirector: setups policy routing so that FWMARKed packets are deliverd locally
@@ -1217,6 +1295,47 @@ func setupPolicyRoutingForDSR() error {
12171295 return nil
12181296}
12191297
1298+ // For DSR it is required that node needs to know how to route exteranl IP. Otherwise when endpoint
1299+ // directly responds back with source IP as external IP kernel will treat as martian packet.
1300+ // To prevent martian packets add route to exteranl IP through the `kube-bridge` interface
1301+ // setupRoutesForExternalIPForDSR: setups routing so that kernel does not think return packets as martians
1302+
1303+ func setupRoutesForExternalIPForDSR (serviceInfoMap serviceInfoMap ) error {
1304+ b , err := ioutil .ReadFile ("/etc/iproute2/rt_tables" )
1305+ if err != nil {
1306+ return errors .New ("Failed to setup external ip routing table required for DSR due to " + err .Error ())
1307+ }
1308+ if ! strings .Contains (string (b ), externalIPRouteTableName ) {
1309+ f , err := os .OpenFile ("/etc/iproute2/rt_tables" , os .O_APPEND | os .O_WRONLY , 0600 )
1310+ if err != nil {
1311+ return errors .New ("Failed to setup external ip routing table required for DSR due to " + err .Error ())
1312+ }
1313+ if _ , err = f .WriteString (externalIPRouteTableId + " " + externalIPRouteTableName ); err != nil {
1314+ return errors .New ("Failed to setup external ip routing table required for DSR due to " + err .Error ())
1315+ }
1316+ }
1317+ out , err := exec .Command ("ip" , "route" , "list" , "table" , externalIPRouteTableId ).Output ()
1318+ if err != nil {
1319+ return errors .New ("Failed to verify required routing table for external IP's exists. " +
1320+ "Failed to setup policy routing required for DSR due to " + err .Error ())
1321+ }
1322+
1323+ for _ , svc := range serviceInfoMap {
1324+ for _ , externalIP := range svc .externalIPs {
1325+ if ! strings .Contains (string (out ), externalIP ) {
1326+ if err = exec .Command ("ip" , "route" , "add" , externalIP , "dev" , "kube-bridge" , "table" ,
1327+ externalIPRouteTableId ).Run (); err != nil {
1328+ return errors .New ("Failed to add route for " + externalIP + " in custom route table for external IP's due to: " + err .Error ())
1329+ }
1330+ }
1331+ }
1332+ }
1333+
1334+ // TODO: cleanup routes for non-active exteranl IP's
1335+
1336+ return nil
1337+ }
1338+
12201339// unique identfier for a load-balanced service (namespace + name + portname)
12211340func generateServiceId (namespace , svcName , port string ) string {
12221341 return namespace + "-" + svcName + "-" + port
0 commit comments