Skip to content

Commit c81bfbb

Browse files
authored
Enable pod egress masquerading by default (#111)
* Enable pod egress masquerading by default - Adds flag "--enable-pod-egress" (default: true) - Removes previously created iptables rule if option is changed to false * Use an ipset to match Pod egress traffic to be masqueraded * Set --cluster-cidr as depreciated flag If set to anything, normal dynamic Pod egress masquerading is turned on. * Use Replace else Add logic for updating export policy Fixes errors logged due to existing statement in poliy.
1 parent 0a8b7e7 commit c81bfbb

File tree

4 files changed

+221
-66
lines changed

4 files changed

+221
-66
lines changed

Documentation/README.md

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -69,25 +69,27 @@ Also you can choose to run kube-router as agent running on each cluster node. Al
6969
### command line options
7070

7171
```
72-
--run-firewall If false, kube-router won't setup iptables to provide ingress firewall for pods. true by default.
73-
--run-router If true each node advertise routes the rest of the nodes and learn the routes for the pods. false by default
74-
--run-service-proxy If false, kube-router won't setup IPVS for services proxy. true by default.
75-
--cleanup-config If true cleanup iptables rules, ipvs, ipset configuration and exit.
76-
--masquerade-all SNAT all traffic to cluster IP/node port. False by default
77-
--cluster-cidr CIDR range of pods in the cluster. If specified external traffic from the pods will be masquraded
78-
--config-sync-period duration How often configuration from the apiserver is refreshed. Must be greater than 0. (default 1m0s)
79-
--iptables-sync-period duration The maximum interval of how often iptables rules are refreshed (e.g. '5s', '1m'). Must be greater than 0. (default 1m0s)
80-
--ipvs-sync-period duration The maximum interval of how often ipvs config is refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0. (default 1m0s)
81-
--kubeconfig string Path to kubeconfig file with authorization information (the master location is set by the master flag).
82-
--master string The address of the Kubernetes API server (overrides any value in kubeconfig)
83-
--routes-sync-period duration The maximum interval of how often routes are advertised and learned (e.g. '5s', '1m', '2h22m'). Must be greater than 0. (default 1m0s)
84-
--advertise-cluster-ip If true then cluster IP will be added into the RIB and will be advertised to the peers. False by default.
85-
--cluster-asn ASN number under which cluster nodes will run iBGP
86-
--peer-asn ASN number of the BGP peer to which cluster nodes will advertise cluster ip and node's pod cidr
87-
--peer-router The ip address of the external router to which all nodes will peer and advertise the cluster ip and pod cidr's
88-
--nodes-full-mesh When enabled each node in the cluster will setup BGP peer with rest of the nodes. True by default
89-
--hostname-override If non-empty, this string will be used as identification of node name instead of the actual hostname.
90-
--hairpin-mode Adds iptable rules for every ClusterIP Service Endpoint to support hairpin traffic. False by default
72+
Usage of ./kube-router:
73+
--advertise-cluster-ip Add Cluster IP to the RIB and advertise to peers.
74+
--cleanup-config Cleanup iptables rules, ipvs, ipset configuration and exit.
75+
--cluster-asn string ASN number under which cluster nodes will run iBGP.
76+
--config-sync-period duration The delay between apiserver configuration synchronizations (e.g. '5s', '1m'). Must be greater than 0. (default 1m0s)
77+
--enable-pod-egress SNAT traffic from Pods to destinations outside the cluster. (default true)
78+
--hairpin-mode Add iptable rules for every Service Endpoint to support hairpin traffic.
79+
-h, --help Print usage information.
80+
--hostname-override string Overrides the NodeName of the node. Set this if kube-router is unable to determine your NodeName automatically.
81+
--iptables-sync-period duration The delay between iptables rule synchronizations (e.g. '5s', '1m'). Must be greater than 0. (default 1m0s)
82+
--ipvs-sync-period duration The delay between ipvs config synchronizations (e.g. '5s', '1m', '2h22m'). Must be greater than 0. (default 1m0s)
83+
--kubeconfig string Path to kubeconfig file with authorization information (the master location is set by the master flag).
84+
--masquerade-all SNAT all traffic to cluster IP/node port.
85+
--master string The address of the Kubernetes API server (overrides any value in kubeconfig).
86+
--nodes-full-mesh Each node in the cluster will setup BGP peering with rest of the nodes. (default true)
87+
--peer-asn string ASN number of the BGP peer to which cluster nodes will advertise cluster ip and node's pod cidr.
88+
--peer-router string The ip address of the external router to which all nodes will peer and advertise the cluster ip and pod cidr's.
89+
--routes-sync-period duration The delay between route updates and advertisements (e.g. '5s', '1m', '2h22m'). Must be greater than 0. (default 1m0s)
90+
--run-firewall Enables Network Policy -- sets up iptables to provide ingress firewall for pods. (default true)
91+
--run-router Enables Pod Networking -- Advertises and learns the routes to Pods via iBGP. (default true)
92+
--run-service-proxy Enables Service Proxy -- sets up IPVS for Kubernetes Services. (default true)
9193
```
9294

9395
### requirements

app/controllers/network_routes_controller.go

Lines changed: 155 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"github.com/cloudnativelabs/kube-router/utils"
2121
"github.com/coreos/go-iptables/iptables"
2222
"github.com/golang/glog"
23+
"github.com/janeczku/go-ipset/ipset"
2324
bgpapi "github.com/osrg/gobgp/api"
2425
"github.com/osrg/gobgp/config"
2526
"github.com/osrg/gobgp/packet/bgp"
@@ -40,6 +41,7 @@ type NetworkRoutingController struct {
4041
bgpServer *gobgp.BgpServer
4142
syncPeriod time.Duration
4243
clusterCIDR string
44+
enablePodEgress bool
4345
hostnameOverride string
4446
advertiseClusterIp bool
4547
defaultNodeAsnNumber uint32
@@ -48,18 +50,22 @@ type NetworkRoutingController struct {
4850
nodePeerRouters []string
4951
globalPeerAsnNumber uint32
5052
bgpFullMeshMode bool
53+
podSubnetsIpSet *ipset.IPSet
5154
}
5255

5356
var (
54-
activeNodes = make(map[string]bool)
57+
activeNodes = make(map[string]bool)
58+
podEgressArgs = []string{"-m", "set", "--match-set", podSubnetIpSetName, "src",
59+
"-m", "set", "!", "--match-set", podSubnetIpSetName, "dst",
60+
"-j", "MASQUERADE"}
5561
)
5662

5763
const (
5864
clustetNieghboursSet = "clusterneighboursset"
65+
podSubnetIpSetName = "kube-router-pod-subnets"
5966
)
6067

6168
func (nrc *NetworkRoutingController) Run(stopCh <-chan struct{}, wg *sync.WaitGroup) {
62-
6369
cidr, err := utils.GetPodCidrFromCniSpec("/etc/cni/net.d/10-kuberouter.conf")
6470
if err != nil {
6571
glog.Errorf("Failed to get pod CIDR from CNI conf file: %s", err.Error())
@@ -88,15 +94,24 @@ func (nrc *NetworkRoutingController) Run(stopCh <-chan struct{}, wg *sync.WaitGr
8894

8995
glog.Infof("Starting network route controller")
9096

91-
if len(nrc.clusterCIDR) != 0 {
92-
args := []string{"-s", nrc.clusterCIDR, "!", "-d", nrc.clusterCIDR, "-j", "MASQUERADE"}
93-
iptablesCmdHandler, err := iptables.New()
97+
// Handle Pod egress masquerading configuration
98+
if nrc.enablePodEgress {
99+
glog.Infoln("Enabling Pod egress.")
100+
err = createPodEgressRule()
94101
if err != nil {
95-
glog.Errorf("Failed to add iptable rule to masqurade outbound traffic from pods due to %s. External connectivity will not work.", err.Error())
102+
glog.Errorf("Error enabling Pod egress: %s", err.Error())
96103
}
97-
err = iptablesCmdHandler.AppendUnique("nat", "POSTROUTING", args...)
104+
} else {
105+
glog.Infoln("Disabling Pod egress.")
106+
err = deletePodEgressRule()
107+
// TODO: Don't error if removing non-existant Pod egress rules/ipsets.
108+
if err != nil {
109+
glog.Infof("Error disabling Pod egress: %s", err.Error())
110+
}
111+
112+
err = deletePodSubnetIpSet()
98113
if err != nil {
99-
glog.Errorf("Failed to add iptable rule to masqurade outbound traffic from pods due to %s. External connectivity will not work.", err.Error())
114+
glog.Infof("Error disabling Pod egress: %s", err.Error())
100115
}
101116
}
102117

@@ -127,6 +142,14 @@ func (nrc *NetworkRoutingController) Run(stopCh <-chan struct{}, wg *sync.WaitGr
127142
default:
128143
}
129144

145+
// Update Pod subnet ipset entries
146+
if nrc.enablePodEgress {
147+
err := nrc.syncPodSubnetIpSet()
148+
if err != nil {
149+
glog.Errorf("Error synchronizing Pod subnet ipset: %s", err.Error())
150+
}
151+
}
152+
130153
// add the current set of nodes (excluding self) as BGP peers. Nodes form full mesh
131154
nrc.syncPeers()
132155

@@ -148,14 +171,14 @@ func (nrc *NetworkRoutingController) Run(stopCh <-chan struct{}, wg *sync.WaitGr
148171
}
149172

150173
glog.Infof("Performing periodic syn of the routes")
151-
err := nrc.advertiseRoute()
174+
err = nrc.advertiseRoute()
152175
if err != nil {
153-
glog.Errorf("Failed to advertise route: %s", err.Error())
176+
glog.Errorf("Error advertising route: %s", err.Error())
154177
}
155178

156179
err = nrc.addExportPolicies()
157180
if err != nil {
158-
glog.Errorf("Failed to add BGP export policies due to %s", err.Error())
181+
glog.Errorf("Error adding BGP export policies: %s", err.Error())
159182
}
160183

161184
select {
@@ -167,6 +190,43 @@ func (nrc *NetworkRoutingController) Run(stopCh <-chan struct{}, wg *sync.WaitGr
167190
}
168191
}
169192

193+
func createPodEgressRule() error {
194+
iptablesCmdHandler, err := iptables.New()
195+
if err != nil {
196+
return errors.New("Failed create iptables handler:" + err.Error())
197+
}
198+
199+
err = iptablesCmdHandler.AppendUnique("nat", "POSTROUTING", podEgressArgs...)
200+
if err != nil {
201+
return errors.New("Failed to add iptable rule to masqurade outbound traffic from pods: " +
202+
err.Error() + "External connectivity will not work.")
203+
204+
}
205+
glog.Infof("Added iptables rule to masqurade outbound traffic from pods.")
206+
return nil
207+
}
208+
209+
func deletePodEgressRule() error {
210+
iptablesCmdHandler, err := iptables.New()
211+
if err != nil {
212+
return errors.New("Failed create iptables handler:" + err.Error())
213+
}
214+
215+
exists, err := iptablesCmdHandler.Exists("nat", "POSTROUTING", podEgressArgs...)
216+
if err != nil {
217+
return errors.New("Failed to lookup iptable rule to masqurade outbound traffic from pods: " + err.Error())
218+
}
219+
if exists {
220+
err = iptablesCmdHandler.Delete("nat", "POSTROUTING", podEgressArgs...)
221+
if err != nil {
222+
return errors.New("Failed to delete iptable rule to masqurade outbound traffic from pods: " +
223+
err.Error() + ". Pod egress might still work...")
224+
}
225+
glog.Infof("Deleted iptables rule to masqurade outbound traffic from pods.")
226+
}
227+
return nil
228+
}
229+
170230
func (nrc *NetworkRoutingController) watchBgpUpdates() {
171231
watcher := nrc.bgpServer.Watch(gobgp.WatchBestPath(false))
172232
for {
@@ -339,23 +399,32 @@ func (nrc *NetworkRoutingController) addExportPolicies() error {
339399

340400
policy, err := table.NewPolicy(definition)
341401
if err != nil {
342-
return err
402+
return errors.New("Failed to create new policy: " + err.Error())
343403
}
344-
if err = nrc.bgpServer.AddPolicy(policy, false); err != nil {
345-
return err
404+
405+
err = nrc.bgpServer.ReplacePolicy(policy, false, false)
406+
if err != nil {
407+
err = nrc.bgpServer.AddPolicy(policy, false)
408+
if err != nil {
409+
return errors.New("Failed to add policy: " + err.Error())
410+
}
346411
}
412+
347413
err = nrc.bgpServer.AddPolicyAssignment("",
348414
table.POLICY_DIRECTION_EXPORT,
349415
[]*config.PolicyDefinition{&definition},
350416
table.ROUTE_TYPE_ACCEPT)
351417
if err != nil {
352-
return err
418+
return errors.New("Failed to add policy assignment: " + err.Error())
353419
}
354420

355421
// configure default BGP export policy to reject
356422
pd := make([]*config.PolicyDefinition, 0)
357423
pd = append(pd, &definition)
358-
nrc.bgpServer.ReplacePolicyAssignment("", table.POLICY_DIRECTION_EXPORT, pd, table.ROUTE_TYPE_REJECT)
424+
err = nrc.bgpServer.ReplacePolicyAssignment("", table.POLICY_DIRECTION_EXPORT, pd, table.ROUTE_TYPE_REJECT)
425+
if err != nil {
426+
return errors.New("Failed to replace policy assignment: " + err.Error())
427+
}
359428

360429
return nil
361430
}
@@ -419,10 +488,33 @@ func (nrc *NetworkRoutingController) injectRoute(path *table.Path) error {
419488
}
420489

421490
func (nrc *NetworkRoutingController) Cleanup() {
491+
err := deletePodEgressRule()
492+
if err != nil {
493+
glog.Errorf("Error deleting Pod egress iptable rule: %s", err.Error())
494+
}
495+
496+
err = deletePodSubnetIpSet()
497+
if err != nil {
498+
glog.Errorf("Error deleting Pod subnet ipset: %s", err.Error())
499+
}
422500
}
423501

424-
func (nrc *NetworkRoutingController) disableSourceDestinationCheck() {
502+
func deletePodSubnetIpSet() error {
503+
_, err := exec.LookPath("ipset")
504+
if err != nil {
505+
return errors.New("Ensure ipset package is installed: " + err.Error())
506+
}
507+
508+
podSubnetIpSet := ipset.IPSet{Name: podSubnetIpSetName, HashType: "bitmap:ip"}
509+
err = podSubnetIpSet.Destroy()
510+
if err != nil {
511+
return errors.New("Failure deleting Pod egress ipset: " + err.Error())
512+
}
425513

514+
return nil
515+
}
516+
517+
func (nrc *NetworkRoutingController) disableSourceDestinationCheck() {
426518
nodes, err := nrc.clientset.Core().Nodes().List(metav1.ListOptions{})
427519
if err != nil {
428520
glog.Errorf("Failed to list nodes from API server due to: %s. Can not perform BGP peer sync", err.Error())
@@ -463,10 +555,33 @@ func (nrc *NetworkRoutingController) disableSourceDestinationCheck() {
463555
}
464556
}
465557

558+
func (nrc *NetworkRoutingController) syncPodSubnetIpSet() error {
559+
glog.Infof("Syncing Pod subnet ipset entries.")
560+
561+
// get the current list of the nodes from API server
562+
nodes, err := nrc.clientset.Core().Nodes().List(metav1.ListOptions{})
563+
if err != nil {
564+
return errors.New("Failed to list nodes from API server: " + err.Error())
565+
}
566+
567+
// Collect active PodCIDR(s) from nodes
568+
currentPodCidrs := make([]string, 0)
569+
for _, node := range nodes.Items {
570+
currentPodCidrs = append(currentPodCidrs, node.Spec.PodCIDR)
571+
}
572+
573+
err = nrc.podSubnetsIpSet.Refresh(currentPodCidrs)
574+
if err != nil {
575+
return errors.New("Failed to update Pod subnet ipset: " + err.Error())
576+
}
577+
578+
return nil
579+
}
580+
466581
// Refresh the peer relationship rest of the nodes in the cluster. Node add/remove
467582
// events should ensure peer relationship with only currently active nodes. In case
468583
// we miss any events from API server this method which is called periodically
469-
// ensure peer relationship with removed nodes is deleted.
584+
// ensure peer relationship with removed nodes is deleted. Also update Pod subnet ipset.
470585
func (nrc *NetworkRoutingController) syncPeers() {
471586

472587
glog.Infof("Syncing BGP peers for the node.")
@@ -478,7 +593,7 @@ func (nrc *NetworkRoutingController) syncPeers() {
478593
return
479594
}
480595

481-
// establish peer with current set of nodes
596+
// establish peer and add Pod CIDRs with current set of nodes
482597
currentNodes := make([]string, 0)
483598
for _, node := range nodes.Items {
484599
nodeIP, _ := getNodeIP(&node)
@@ -518,6 +633,7 @@ func (nrc *NetworkRoutingController) syncPeers() {
518633
PeerAs: nrc.defaultNodeAsnNumber,
519634
},
520635
}
636+
521637
// TODO: check if a node is alredy added as nieighbour in a better way than add and catch error
522638
if err := nrc.bgpServer.AddNeighbor(n); err != nil {
523639
if !strings.Contains(err.Error(), "Can't overwrite the existing peer") {
@@ -723,14 +839,33 @@ func getNodeSubnet(nodeIp net.IP) (net.IPNet, string, error) {
723839
}
724840

725841
func NewNetworkRoutingController(clientset *kubernetes.Clientset, kubeRouterConfig *options.KubeRouterConfig) (*NetworkRoutingController, error) {
842+
// TODO: Remove lookup, ipset.New already does this.
843+
_, err := exec.LookPath("ipset")
844+
if err != nil {
845+
return nil, errors.New("Ensure ipset package is installed: " + err.Error())
846+
}
726847

727848
nrc := NetworkRoutingController{}
728849

729850
nrc.bgpFullMeshMode = kubeRouterConfig.FullMeshMode
730-
nrc.clusterCIDR = kubeRouterConfig.ClusterCIDR
851+
nrc.enablePodEgress = kubeRouterConfig.EnablePodEgress
731852
nrc.syncPeriod = kubeRouterConfig.RoutesSyncPeriod
732853
nrc.clientset = clientset
733854

855+
if nrc.enablePodEgress || len(nrc.clusterCIDR) != 0 {
856+
nrc.enablePodEgress = true
857+
858+
// TODO: Add bitmap hashtype support to ipset package. It would work well here.
859+
podSubnetIpSet, err := ipset.New(podSubnetIpSetName, "hash:net", &ipset.Params{})
860+
if err != nil {
861+
return nil, fmt.Errorf("failed to create Pod subnet ipset: %s", err.Error())
862+
}
863+
864+
nrc.podSubnetsIpSet = podSubnetIpSet
865+
} else {
866+
nrc.podSubnetsIpSet = nil
867+
}
868+
734869
if len(kubeRouterConfig.ClusterAsn) != 0 {
735870
asn, err := strconv.ParseUint(kubeRouterConfig.ClusterAsn, 0, 32)
736871
if err != nil {

app/controllers/network_services_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ func deleteMasqueradeIptablesRule() error {
690690
if err != nil {
691691
return errors.New("Failed to run iptables command" + err.Error())
692692
}
693+
glog.Infof("Deleted iptables masquerade rule: %s", rule)
693694
break
694695
}
695696
}

0 commit comments

Comments
 (0)