Skip to content

Commit 86ba784

Browse files
authored
Introduces the option --override-nexthop, to override the next hop used in advertised routes (#502)
* Introduces the option --override-nexthop, setting it to true will make advertised next hop for the routers to the peers will be automatically selected to be appropriate reachable local IP. This will be overrider any next-hop set for the routes in the RIB. Kube-router by defauly set the next-hop to `node IP` which is not correct in case of nodes with multiple interfaces and use differnt interaces for differect external peers. Fixes #480 * add next-hop-self documentation
1 parent b76d22f commit 86ba784

File tree

5 files changed

+27
-6
lines changed

5 files changed

+27
-6
lines changed

docs/bgp.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,7 @@ Here is sample example to make GoBGP server to listen on multiple IP address
152152
kubectl annotate node ip-172-20-46-87.us-west-2.compute.internal "kube-router.io/bgp-local-addresses=172.20.56.25,192.168.1.99"
153153
```
154154

155+
## Overriding the next hop
156+
157+
By default kube-router populates GoBGP RIB with node IP as next hop for the advertised pod CIDR's and service VIP. While this works for most cases, overriding the next hop for the advertised rotues is necessary when node has multiple interfaces over which external peers are reached. Next hop need to be as per the interface local IP over which external peer can be reached. `--override-nexthop` let you override the next hop for the advertised route. Setting `--override-nexthop` to true leverages BGP next-hop-self functionality implemented in GoBGP. Next hop will automatically selected appropriately when advertising routes irrespective of the next hop in the RIB.
158+

docs/user-guide.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Usage of kube-router:
5353
--metrics-port uint16 Prometheus metrics port, (Default 0, Disabled)
5454
--nodeport-bindon-all-ip For service of NodePort type create IPVS service that listens on all IP's of the node.
5555
--nodes-full-mesh Each node in the cluster will setup BGP peering with rest of the nodes. (default true)
56+
--override-nexthop Override the next-hop in bgp routes sent to peers with the local ip.
5657
--peer-router-asns uints ASN numbers of the BGP peer to which cluster nodes will advertise cluster ip and node's pod cidr. (default [])
5758
--peer-router-ips ipSlice The ip address of the external router to which all nodes will peer and advertise the cluster ip and pod cidr's. (default [])
5859
--peer-router-multihop-ttl uint8 Enable eBGP multihop supports -- sets multihop-ttl. (Relevant only if ttl >= 2)

pkg/controllers/routing/export_policies.go

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
// BGP peers
2424
// - each node is NOT allowed to advertise service VIP's (cluster ip, load balancer ip, external IP) to
2525
// iBGP peers
26+
// - an option to allow overriding the next-hop-address with the outgoing ip for external bgp peers
2627
func (nrc *NetworkRoutingController) addExportPolicies() error {
2728

2829
// we are rr server do not add export policies
@@ -96,6 +97,12 @@ func (nrc *NetworkRoutingController) addExportPolicies() error {
9697
if err != nil {
9798
nrc.bgpServer.AddDefinedSet(iBGPPeerNS)
9899
}
100+
actions := config.Actions{
101+
RouteDisposition: config.ROUTE_DISPOSITION_ACCEPT_ROUTE,
102+
}
103+
if nrc.overrideNextHop {
104+
actions.BgpActions.SetNextHop = "self"
105+
}
99106
// statement to represent the export policy to permit advertising node's pod CIDR
100107
statements = append(statements,
101108
config.Statement{
@@ -107,9 +114,7 @@ func (nrc *NetworkRoutingController) addExportPolicies() error {
107114
NeighborSet: "iBGPpeerset",
108115
},
109116
},
110-
Actions: config.Actions{
111-
RouteDisposition: config.ROUTE_DISPOSITION_ACCEPT_ROUTE,
112-
},
117+
Actions: actions,
113118
})
114119
}
115120

@@ -133,6 +138,9 @@ func (nrc *NetworkRoutingController) addExportPolicies() error {
133138
if err != nil {
134139
nrc.bgpServer.AddDefinedSet(ns)
135140
}
141+
if nrc.overrideNextHop {
142+
bgpActions.SetNextHop = "self"
143+
}
136144
// statement to represent the export policy to permit advertising cluster IP's
137145
// only to the global BGP peer or node specific BGP peer
138146
statements = append(statements, config.Statement{
@@ -150,6 +158,12 @@ func (nrc *NetworkRoutingController) addExportPolicies() error {
150158
},
151159
})
152160
if nrc.advertisePodCidr {
161+
actions := config.Actions{
162+
RouteDisposition: config.ROUTE_DISPOSITION_ACCEPT_ROUTE,
163+
}
164+
if nrc.overrideNextHop {
165+
actions.BgpActions.SetNextHop = "self"
166+
}
153167
statements = append(statements, config.Statement{
154168
Conditions: config.Conditions{
155169
MatchPrefixSet: config.MatchPrefixSet{
@@ -159,9 +173,7 @@ func (nrc *NetworkRoutingController) addExportPolicies() error {
159173
NeighborSet: "externalpeerset",
160174
},
161175
},
162-
Actions: config.Actions{
163-
RouteDisposition: config.ROUTE_DISPOSITION_ACCEPT_ROUTE,
164-
},
176+
Actions: actions,
165177
})
166178
}
167179
}

pkg/controllers/routing/network_routes_controller.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ type NetworkRoutingController struct {
9494
pathPrependCount uint8
9595
pathPrepend bool
9696
localAddressList []string
97+
overrideNextHop bool
9798

9899
nodeLister cache.Indexer
99100
svcLister cache.Indexer
@@ -763,6 +764,7 @@ func NewNetworkRoutingController(clientset kubernetes.Interface,
763764
nrc.peerMultihopTTL = kubeRouterConfig.PeerMultihopTtl
764765
nrc.enablePodEgress = kubeRouterConfig.EnablePodEgress
765766
nrc.syncPeriod = kubeRouterConfig.RoutesSyncPeriod
767+
nrc.overrideNextHop = kubeRouterConfig.OverrideNextHop
766768
nrc.clientset = clientset
767769
nrc.activeNodes = make(map[string]bool)
768770
nrc.bgpRRClient = false

pkg/options/options.go

100755100644
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ type KubeRouterConfig struct {
3636
MetricsPath string
3737
MetricsPort uint16
3838
NodePortBindOnAllIp bool
39+
OverrideNextHop bool
3940
PeerASNs []uint
4041
PeerMultihopTtl uint8
4142
PeerPasswords []string
@@ -133,4 +134,5 @@ func (s *KubeRouterConfig) AddFlags(fs *pflag.FlagSet) {
133134
// "Password that cluster-node BGP servers will use to authenticate one another when \"--nodes-full-mesh\" is set.")
134135
fs.StringVarP(&s.VLevel, "v", "v", "0", "log level for V logs")
135136
fs.Uint16Var(&s.HealthPort, "health-port", 20244, "Health check port, 0 = Disabled")
137+
fs.BoolVar(&s.OverrideNextHop, "override-nexthop", false, "Override the next-hop in bgp routes sent to peers with the local ip.")
136138
}

0 commit comments

Comments
 (0)