@@ -35,6 +35,9 @@ type NetworkRoutingController struct {
3535 asnNumber uint32
3636 peerAsnNumber uint32
3737}
38+ var (
39+ activeNodes = make (map [string ]bool )
40+ )
3841
3942func (nrc * NetworkRoutingController ) Run (stopCh <- chan struct {}, wg * sync.WaitGroup ) {
4043
@@ -61,32 +64,8 @@ func (nrc *NetworkRoutingController) Run(stopCh <-chan struct{}, wg *sync.WaitGr
6164 defer t .Stop ()
6265 defer wg .Done ()
6366
64- nodes , err := nrc .clientset .Core ().Nodes ().List (metav1.ListOptions {})
65- if err != nil {
66- glog .Errorf ("Failed to list nodes: %s" , err .Error ())
67- return
68- }
69-
7067 glog .Infof ("Starting network route controller" )
7168
72- // add the current set of nodes (excluding self) as BGP peers. Nodes form full mesh
73- for _ , node := range nodes .Items {
74- nodeIP , _ := getNodeIP (& node )
75- if nodeIP .String () == nrc .nodeIP .String () {
76- continue
77- }
78-
79- n := & config.Neighbor {
80- Config : config.NeighborConfig {
81- NeighborAddress : nodeIP .String (),
82- PeerAs : nrc .asnNumber ,
83- },
84- }
85- if err := nrc .bgpServer .AddNeighbor (n ); err != nil {
86- panic (err )
87- }
88- }
89-
9069 // if the global routing peer is configured then peer with it
9170 if len (nrc .peerRouter ) != 0 {
9271 n := & config.Neighbor {
@@ -109,6 +88,9 @@ func (nrc *NetworkRoutingController) Run(stopCh <-chan struct{}, wg *sync.WaitGr
10988 default :
11089 }
11190
91+ // add the current set of nodes (excluding self) as BGP peers. Nodes form full mesh
92+ nrc .syncPeers ()
93+
11294 // advertise cluster IP for the service to be reachable via host
11395 if nrc .advertiseClusterIp {
11496 glog .Infof ("Advertising cluster ips" )
@@ -209,7 +191,108 @@ func (nrc *NetworkRoutingController) injectRoute(path *table.Path) error {
209191}
210192
211193func (nrc * NetworkRoutingController ) Cleanup () {
194+ }
195+
196+ // Refresh the peer relationship rest of the nodes in the cluster. Node add/remove
197+ // events should ensure peer relationship with only currently active nodes. In case
198+ // we miss any events from API server this method which is called periodically
199+ // ensure peer relationship with removed nodes is deleted.
200+ func (nrc * NetworkRoutingController ) syncPeers () {
201+
202+ // get the current list of the nodes from API server
203+ nodes , err := nrc .clientset .Core ().Nodes ().List (metav1.ListOptions {})
204+ if err != nil {
205+ glog .Errorf ("Failed to list nodes: %s" , err .Error ())
206+ return
207+ }
208+
209+ // establish peer with current set of nodes
210+ currentNodes := make ([]string , 0 )
211+ for _ , node := range nodes .Items {
212+ nodeIP , _ := getNodeIP (& node )
213+ if nodeIP .String () == nrc .nodeIP .String () {
214+ continue
215+ }
216+ currentNodes = append (currentNodes , nodeIP .String ())
217+ activeNodes [nodeIP .String ()] = true
218+ n := & config.Neighbor {
219+ Config : config.NeighborConfig {
220+ NeighborAddress : nodeIP .String (),
221+ PeerAs : nrc .asnNumber ,
222+ },
223+ }
224+ // TODO: check if a node is alredy added as nieighbour in a better way that add and catch error
225+ if err := nrc .bgpServer .AddNeighbor (n ); err != nil {
226+ if ! strings .Contains (err .Error (), "Can't overwrite the existing peer" ) {
227+ glog .Errorf ("Failed to add node %s as peer due to %s" , nodeIP .String (), err )
228+ }
229+ }
230+ }
231+
232+ // find the list of the node removed, from the last known list of active nodes
233+ removedNodes := make ([]string , 0 )
234+ for ip , _ := range activeNodes {
235+ stillActive := false
236+ for _ , node := range currentNodes {
237+ if ip == node {
238+ stillActive = true
239+ break
240+ }
241+ }
242+ if ! stillActive {
243+ removedNodes = append (removedNodes , ip )
244+ }
245+ }
212246
247+ // delete the neighbor for the node that is removed
248+ for _ , ip := range removedNodes {
249+ n := & config.Neighbor {
250+ Config : config.NeighborConfig {
251+ NeighborAddress : ip ,
252+ PeerAs : nrc .asnNumber ,
253+ },
254+ }
255+ if err := nrc .bgpServer .DeleteNeighbor (n ); err != nil {
256+ glog .Errorf ("Failed to remove node %s as peer due to %s" , ip , err )
257+ }
258+ delete (activeNodes , ip )
259+ }
260+ }
261+
262+ // Handle updates from Node watcher. Node watcher calls this method whenever there is
263+ // new node is added or old node is deleted. So peer up with new node and drop peering
264+ // from old node
265+ func (nrc * NetworkRoutingController ) OnNodeUpdate (nodeUpdate * watchers.NodeUpdate ) {
266+ nrc .mu .Lock ()
267+ defer nrc .mu .Unlock ()
268+
269+ node := nodeUpdate .Node
270+ nodeIP , _ := getNodeIP (node )
271+ if nodeUpdate .Op == watchers .ADD {
272+ glog .Infof ("Received node %s added update from watch API so peer with new node" , nodeIP )
273+ n := & config.Neighbor {
274+ Config : config.NeighborConfig {
275+ NeighborAddress : nodeIP .String (),
276+ PeerAs : nrc .asnNumber ,
277+ },
278+ }
279+ if err := nrc .bgpServer .AddNeighbor (n ); err != nil {
280+ glog .Errorf ("Failed to add node %s as peer due to %s" , nodeIP , err )
281+ }
282+ activeNodes [nodeIP .String ()] = true
283+ } else if nodeUpdate .Op == watchers .REMOVE {
284+ glog .Infof ("Received node %s removed update from watch API, so remove node from peer" , nodeIP )
285+ n := & config.Neighbor {
286+ Config : config.NeighborConfig {
287+ NeighborAddress : nodeIP .String (),
288+ PeerAs : nrc .asnNumber ,
289+ },
290+ }
291+ if err := nrc .bgpServer .DeleteNeighbor (n ); err != nil {
292+ glog .Errorf ("Failed to remove node %s as peer due to %s" , nodeIP , err )
293+ }
294+ delete (activeNodes , nodeIP .String ())
295+ }
213296}
214297
215298func NewNetworkRoutingController (clientset * kubernetes.Clientset , kubeRouterConfig * options.KubeRouterConfig ) (* NetworkRoutingController , error ) {
@@ -294,6 +377,7 @@ func NewNetworkRoutingController(clientset *kubernetes.Clientset, kubeRouterConf
294377 panic (err )
295378 }
296379
380+ watchers .NodeWatcher .RegisterHandler (& nrc )
297381 go nrc .watchBgpUpdates ()
298382
299383 return & nrc , nil
0 commit comments