Skip to content

Commit 5f425eb

Browse files
authored
Fixes direct server return on exteranl IP's (#210)
- add a route to exteranl ip in custom routing table to prevent martian packets - switch between Masqurade and Tunnel for forwarding when DSR in disabled and enabled
1 parent 6a3fada commit 5f425eb

File tree

1 file changed

+144
-25
lines changed

1 file changed

+144
-25
lines changed

app/controllers/network_services_controller.go

Lines changed: 144 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ const (
3838
KUBE_TUNNEL_IF = "kube-tunnel-if"
3939
IFACE_NOT_FOUND = "Link not found"
4040
IFACE_HAS_ADDR = "file exists"
41+
IFACE_HAS_NO_ADDR = "cannot assign requested address"
4142
IPVS_SERVER_EXISTS = "file exists"
4243
namespace = "kube_router"
4344
)
@@ -254,6 +255,13 @@ func (nsc *NetworkServicesController) syncIpvsServices(serviceInfoMap serviceInf
254255
}
255256
glog.Infof("Custom routing table " + customDSRRouteTableName + "required for Direct Server Return is setup as expected.")
256257

258+
glog.Infof("Setting up custom route table required to add routes for external IP's.")
259+
err = setupRoutesForExternalIPForDSR(serviceInfoMap)
260+
if err != nil {
261+
return errors.New("Failed setup custom routing table required to add routes for external IP's due to: " + err.Error())
262+
}
263+
glog.Infof("Custom routing table " + externalIPRouteTableName + "required for Direct Server Return is setup as expected.")
264+
257265
// map of active services and service endpoints
258266
activeServiceEndpointMap := make(map[string][]string)
259267

@@ -312,29 +320,62 @@ func (nsc *NetworkServicesController) syncIpvsServices(serviceInfoMap serviceInf
312320
// without a VIP http://www.austintek.com/LVS/LVS-HOWTO/HOWTO/LVS-HOWTO.routing_to_VIP-less_director.html
313321
// to avoid martian packets
314322
for _, externalIP := range svc.externalIPs {
315-
ipvsExternalIPSvc, err := ipvsAddFWMarkService(net.ParseIP(externalIP), protocol, uint16(svc.port), svc.sessionAffinity, svc.scheduler)
316-
if err != nil {
317-
glog.Errorf("Failed to create ipvs service for External IP: %s due to: %s", externalIP, err.Error())
318-
continue
319-
}
320-
externalIpServices = append(externalIpServices, externalIPService{ipvsSvc: ipvsExternalIPSvc, externalIp: externalIP})
321-
fwMark := generateFwmark(externalIP, svc.protocol, strconv.Itoa(svc.port))
322-
externalIpServiceId := fmt.Sprint(fwMark)
323+
var externalIpServiceId string
324+
if svc.directServerReturn && svc.directServerReturnMethod == "tunnel" {
325+
ipvsExternalIPSvc, err := ipvsAddFWMarkService(net.ParseIP(externalIP), protocol, uint16(svc.port), svc.sessionAffinity, svc.scheduler)
326+
if err != nil {
327+
glog.Errorf("Failed to create ipvs service for External IP: %s due to: %s", externalIP, err.Error())
328+
continue
329+
}
330+
externalIpServices = append(externalIpServices, externalIPService{ipvsSvc: ipvsExternalIPSvc, externalIp: externalIP})
331+
fwMark := generateFwmark(externalIP, svc.protocol, strconv.Itoa(svc.port))
332+
externalIpServiceId = fmt.Sprint(fwMark)
323333

324-
// ensure there is iptable mangle table rule to FWMARK the packet
325-
err = setupMangleTableRule(externalIP, svc.protocol, strconv.Itoa(svc.port), externalIpServiceId)
326-
if err != nil {
327-
glog.Errorf("Failed to setup mangle table rule to FMWARD the traffic to external IP")
328-
continue
329-
}
334+
// ensure there is iptable mangle table rule to FWMARK the packet
335+
err = setupMangleTableRule(externalIP, svc.protocol, strconv.Itoa(svc.port), externalIpServiceId)
336+
if err != nil {
337+
glog.Errorf("Failed to setup mangle table rule to FMWARD the traffic to external IP")
338+
continue
339+
}
330340

331-
// in VIP less directory we dont assign VIP to any interface, so we do policy routing
332-
// to deliver the packet locally so that IPVS can pick the packet
333-
err = routeVIPTrafficToDirector("0x" + fmt.Sprintf("%x", fwMark))
334-
if err != nil {
335-
glog.Errorf("Failed to setup ip rule to lookup traffic to external IP: %s through custom "+
336-
"route table due to ", externalIP, err.Error())
337-
continue
341+
// ensure VIP less director. we dont assign VIP to any interface
342+
eip := &netlink.Addr{IPNet: &net.IPNet{IP: net.ParseIP(externalIP), Mask: net.IPv4Mask(255, 255, 255, 255)}, Scope: syscall.RT_SCOPE_LINK}
343+
err = netlink.AddrDel(dummyVipInterface, eip)
344+
if err != nil && err.Error() != IFACE_HAS_NO_ADDR {
345+
glog.Errorf("Failed to verify is external ip %s is assocated with dummy interface %s due to %s", externalIP, KUBE_DUMMY_IF, err.Error())
346+
}
347+
348+
// do policy routing to deliver the packet locally so that IPVS can pick the packet
349+
err = routeVIPTrafficToDirector("0x" + fmt.Sprintf("%x", fwMark))
350+
if err != nil {
351+
glog.Errorf("Failed to setup ip rule to lookup traffic to external IP: %s through custom "+
352+
"route table due to ", externalIP, err.Error())
353+
continue
354+
}
355+
} else {
356+
// ensure director with vip assigned
357+
eip := &netlink.Addr{IPNet: &net.IPNet{IP: net.ParseIP(externalIP), Mask: net.IPv4Mask(255, 255, 255, 255)}, Scope: syscall.RT_SCOPE_LINK}
358+
err := netlink.AddrAdd(dummyVipInterface, eip)
359+
if err != nil && err.Error() != IFACE_HAS_ADDR {
360+
glog.Errorf("Failed to assign external ip %s to dummy interface %s due to %s", externalIP, KUBE_DUMMY_IF, err.Error())
361+
}
362+
363+
// create IPVS service for the service to be exposed through the external ip
364+
ipvsExternalIPSvc, err := ipvsAddService(net.ParseIP(externalIP), protocol, uint16(svc.port), svc.sessionAffinity, svc.scheduler)
365+
if err != nil {
366+
glog.Errorf("Failed to create ipvs service for external ip: %s due to %s", externalIP, err.Error())
367+
continue
368+
}
369+
externalIpServices = append(externalIpServices, externalIPService{ipvsSvc: ipvsExternalIPSvc, externalIp: externalIP})
370+
externalIpServiceId = generateIpPortId(externalIP, svc.protocol, strconv.Itoa(svc.port))
371+
372+
// ensure there is NO iptable mangle table rule to FWMARK the packet
373+
fwMark := fmt.Sprint(generateFwmark(externalIP, svc.protocol, strconv.Itoa(svc.port)))
374+
err = cleanupMangleTableRule(externalIP, svc.protocol, strconv.Itoa(svc.port), fwMark)
375+
if err != nil {
376+
glog.Errorf("Failed to verify and cleanup any mangle table rule to FMWARD the traffic to external IP due to " + err.Error())
377+
continue
378+
}
338379
}
339380

340381
activeServiceEndpointMap[externalIpServiceId] = make([]string, 0)
@@ -495,6 +536,7 @@ func prepareEndpointForDsr(containerId string, endpointIP string, vip string) er
495536
if err != nil {
496537
return errors.New("Failed to get namespace due to " + err.Error())
497538
}
539+
defer currentNamespaceHandle.Close()
498540

499541
client, err := client.NewEnvClient()
500542
if err != nil {
@@ -511,14 +553,15 @@ func prepareEndpointForDsr(containerId string, endpointIP string, vip string) er
511553
if err != nil {
512554
return errors.New("Failed to get endpoint namespace due to " + err.Error())
513555
}
556+
defer endpointNamespaceHandle.Close()
514557

515558
err = netns.Set(endpointNamespaceHandle)
516559
if err != nil {
517560
return errors.New("Failed to enter to endpoint namespace due to " + err.Error())
518561
}
519562

520563
// TODO: fix boilerplate `netns.Set(currentNamespaceHandle)` code. Need a robust
521-
// way to switch back to old namespace, pretty much many things will go wrong
564+
// way to switch back to old namespace, pretty much all things will go wrong if we dont switch back
522565

523566
// create a ipip tunnel interface inside the endpoint container
524567
tunIf, err := netlink.LinkByName(KUBE_TUNNEL_IF)
@@ -575,11 +618,24 @@ func prepareEndpointForDsr(containerId string, endpointIP string, vip string) er
575618
glog.Infof("Successfully assinged VIP: " + vip + " in endpoint " + endpointIP + ".")
576619

577620
// disable rp_filter on all interface
621+
err = ioutil.WriteFile("/proc/sys/net/ipv4/conf/kube-tunnel-if/rp_filter", []byte(strconv.Itoa(0)), 0640)
622+
if err != nil {
623+
netns.Set(currentNamespaceHandle)
624+
return errors.New("Failed to disable rp_filter on kube-tunnel-if in the endpoint container")
625+
}
626+
627+
err = ioutil.WriteFile("/proc/sys/net/ipv4/conf/eth0/rp_filter", []byte(strconv.Itoa(0)), 0640)
628+
if err != nil {
629+
netns.Set(currentNamespaceHandle)
630+
return errors.New("Failed to disable rp_filter on eth0 in the endpoint container")
631+
}
632+
578633
err = ioutil.WriteFile("/proc/sys/net/ipv4/conf/all/rp_filter", []byte(strconv.Itoa(0)), 0640)
579634
if err != nil {
580635
netns.Set(currentNamespaceHandle)
581-
return errors.New("Failed to disable rp_filter in the endpoint container")
636+
return errors.New("Failed to disable rp_filter on `all` in the endpoint container")
582637
}
638+
583639
glog.Infof("Successfully disabled rp_filter in endpoint " + endpointIP + ".")
584640

585641
netns.Set(currentNamespaceHandle)
@@ -1150,8 +1206,10 @@ func ipvsAddServer(service *ipvs.Service, dest *ipvs.Destination) error {
11501206
}
11511207

11521208
const (
1153-
customDSRRouteTableID = "78"
1154-
customDSRRouteTableName = "kube-router-dsr"
1209+
customDSRRouteTableID = "78"
1210+
customDSRRouteTableName = "kube-router-dsr"
1211+
externalIPRouteTableId = "79"
1212+
externalIPRouteTableName = "external_ip"
11551213
)
11561214

11571215
// setupMangleTableRule: setsup iptable rule to FWMARK the traffic to exteranl IP vip
@@ -1168,6 +1226,26 @@ func setupMangleTableRule(ip string, protocol string, port string, fwmark string
11681226
return nil
11691227
}
11701228

1229+
func cleanupMangleTableRule(ip string, protocol string, port string, fwmark string) error {
1230+
iptablesCmdHandler, err := iptables.New()
1231+
if err != nil {
1232+
return errors.New("Failed to initialize iptables executor" + err.Error())
1233+
}
1234+
args := []string{"-d", ip, "-m", protocol, "-p", protocol, "--dport", port, "-j", "MARK", "--set-mark", fwmark}
1235+
exists, err := iptablesCmdHandler.Exists("mangle", "PREROUTING", args...)
1236+
if err != nil {
1237+
return errors.New("Failed to cleanup iptables command to set up FWMARK due to " + err.Error())
1238+
}
1239+
if exists {
1240+
err = iptablesCmdHandler.Delete("mangle", "PREROUTING", args...)
1241+
if err != nil {
1242+
return errors.New("Failed to cleanup iptables command to set up FWMARK due to " + err.Error())
1243+
}
1244+
}
1245+
1246+
return nil
1247+
}
1248+
11711249
// For DSR it is required that we dont assign the VIP to any interface to avoid martian packets
11721250
// http://www.austintek.com/LVS/LVS-HOWTO/HOWTO/LVS-HOWTO.routing_to_VIP-less_director.html
11731251
// routeVIPTrafficToDirector: setups policy routing so that FWMARKed packets are deliverd locally
@@ -1217,6 +1295,47 @@ func setupPolicyRoutingForDSR() error {
12171295
return nil
12181296
}
12191297

1298+
// For DSR it is required that node needs to know how to route exteranl IP. Otherwise when endpoint
1299+
// directly responds back with source IP as external IP kernel will treat as martian packet.
1300+
// To prevent martian packets add route to exteranl IP through the `kube-bridge` interface
1301+
// setupRoutesForExternalIPForDSR: setups routing so that kernel does not think return packets as martians
1302+
1303+
func setupRoutesForExternalIPForDSR(serviceInfoMap serviceInfoMap) error {
1304+
b, err := ioutil.ReadFile("/etc/iproute2/rt_tables")
1305+
if err != nil {
1306+
return errors.New("Failed to setup external ip routing table required for DSR due to " + err.Error())
1307+
}
1308+
if !strings.Contains(string(b), externalIPRouteTableName) {
1309+
f, err := os.OpenFile("/etc/iproute2/rt_tables", os.O_APPEND|os.O_WRONLY, 0600)
1310+
if err != nil {
1311+
return errors.New("Failed to setup external ip routing table required for DSR due to " + err.Error())
1312+
}
1313+
if _, err = f.WriteString(externalIPRouteTableId + " " + externalIPRouteTableName); err != nil {
1314+
return errors.New("Failed to setup external ip routing table required for DSR due to " + err.Error())
1315+
}
1316+
}
1317+
out, err := exec.Command("ip", "route", "list", "table", externalIPRouteTableId).Output()
1318+
if err != nil {
1319+
return errors.New("Failed to verify required routing table for external IP's exists. " +
1320+
"Failed to setup policy routing required for DSR due to " + err.Error())
1321+
}
1322+
1323+
for _, svc := range serviceInfoMap {
1324+
for _, externalIP := range svc.externalIPs {
1325+
if !strings.Contains(string(out), externalIP) {
1326+
if err = exec.Command("ip", "route", "add", externalIP, "dev", "kube-bridge", "table",
1327+
externalIPRouteTableId).Run(); err != nil {
1328+
return errors.New("Failed to add route for " + externalIP + " in custom route table for external IP's due to: " + err.Error())
1329+
}
1330+
}
1331+
}
1332+
}
1333+
1334+
// TODO: cleanup routes for non-active exteranl IP's
1335+
1336+
return nil
1337+
}
1338+
12201339
// unique identfier for a load-balanced service (namespace + name + portname)
12211340
func generateServiceId(namespace, svcName, port string) string {
12221341
return namespace + "-" + svcName + "-" + port

0 commit comments

Comments
 (0)