Skip to content

Commit 97fdf81

Browse files
authored
fix: use cached ctrlruntime client in IPAM pool monitor (#2043)
Signed-off-by: Evan Baker <[email protected]>
1 parent 6325924 commit 97fdf81

File tree

2 files changed

+36
-21
lines changed

2 files changed

+36
-21
lines changed

cns/restserver/ipam.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,10 @@ func (service *HTTPRestService) handleDebugPodContext(w http.ResponseWriter, r *
485485
func (service *HTTPRestService) handleDebugRestData(w http.ResponseWriter, r *http.Request) {
486486
service.RLock()
487487
defer service.RUnlock()
488+
if service.IPAMPoolMonitor == nil {
489+
http.Error(w, "not ready", http.StatusServiceUnavailable)
490+
return
491+
}
488492
resp := GetHTTPServiceDataResponse{
489493
HTTPRestServiceData: HTTPRestServiceData{
490494
PodIPIDByPodInterfaceKey: service.PodIPIDByPodInterfaceKey,

cns/service/main.go

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,20 +1147,12 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
11471147
if err != nil {
11481148
return errors.Wrap(err, "failed to create ctrl client")
11491149
}
1150-
nnccli := nodenetworkconfig.NewClient(directcli)
1150+
directnnccli := nodenetworkconfig.NewClient(directcli)
11511151
if err != nil {
11521152
return errors.Wrap(err, "failed to create NNC client")
11531153
}
11541154
// TODO(rbtr): nodename and namespace should be in the cns config
1155-
scopedcli := nncctrl.NewScopedClient(nnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName})
1156-
1157-
clusterSubnetStateChan := make(chan v1alpha1.ClusterSubnetState)
1158-
// initialize the ipam pool monitor
1159-
poolOpts := ipampool.Options{
1160-
RefreshDelay: poolIPAMRefreshRateInMilliseconds * time.Millisecond,
1161-
}
1162-
poolMonitor := ipampool.NewMonitor(httpRestServiceImplementation, scopedcli, clusterSubnetStateChan, &poolOpts)
1163-
httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor
1155+
directscopedcli := nncctrl.NewScopedClient(directnnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName})
11641156

11651157
logger.Printf("Reconciling initial CNS state")
11661158
// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
@@ -1170,7 +1162,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
11701162
err = retry.Do(func() error {
11711163
attempt++
11721164
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
1173-
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
1165+
err = reconcileInitialCNSState(ctx, directscopedcli, httpRestServiceImplementation, podInfoByIPProvider)
11741166
if err != nil {
11751167
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
11761168
}
@@ -1181,16 +1173,6 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
11811173
}
11821174
logger.Printf("reconciled initial CNS state after %d attempts", attempt)
11831175

1184-
// start the pool Monitor before the Reconciler, since it needs to be ready to receive an
1185-
// NodeNetworkConfig update by the time the Reconciler tries to send it.
1186-
go func() {
1187-
logger.Printf("Starting IPAM Pool Monitor")
1188-
if e := poolMonitor.Start(ctx); e != nil {
1189-
logger.Errorf("[Azure CNS] Failed to start pool monitor with err: %v", e)
1190-
}
1191-
}()
1192-
logger.Printf("initialized and started IPAM pool monitor")
1193-
11941176
// the nodeScopedCache sets Selector options on the Manager cache which are used
11951177
// to perform *server-side* filtering of the cached objects. This is very important
11961178
// for high node/pod count clusters, as it keeps us from watching objects at the
@@ -1220,6 +1202,25 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
12201202
return errors.Wrap(err, "failed to create manager")
12211203
}
12221204

1205+
// Build the IPAM Pool monitor
1206+
clusterSubnetStateChan := make(chan v1alpha1.ClusterSubnetState)
1207+
1208+
// this cachedscopedclient is built using the Manager's cached client, which is
1209+
// NOT SAFE TO USE UNTIL THE MANAGER IS STARTED!
1210+
// This is okay because it is only used to build the IPAMPoolMonitor, which does not
1211+
// attempt to use the client until it has received a NodeNetworkConfig to update, and
1212+
// that can only happen once the Manager has started and the NodeNetworkConfig
1213+
// reconciler has pushed the Monitor a NodeNetworkConfig.
1214+
cachedscopedcli := nncctrl.NewScopedClient(nodenetworkconfig.NewClient(manager.GetClient()), types.NamespacedName{Namespace: "kube-system", Name: nodeName})
1215+
1216+
poolOpts := ipampool.Options{
1217+
RefreshDelay: poolIPAMRefreshRateInMilliseconds * time.Millisecond,
1218+
}
1219+
poolMonitor := ipampool.NewMonitor(httpRestServiceImplementation, cachedscopedcli, clusterSubnetStateChan, &poolOpts)
1220+
httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor
1221+
1222+
// Start building the NNC Reconciler
1223+
12231224
// get our Node so that we can xref it against the NodeNetworkConfig's to make sure that the
12241225
// NNC is not stale and represents the Node we're running on.
12251226
node, err := clientset.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
@@ -1252,6 +1253,16 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
12521253
httpRestServiceImplementation.RegisterPProfEndpoints()
12531254
}
12541255

1256+
// start the pool Monitor before the Reconciler, since it needs to be ready to receive an
1257+
// NodeNetworkConfig update by the time the Reconciler tries to send it.
1258+
go func() {
1259+
logger.Printf("Starting IPAM Pool Monitor")
1260+
if e := poolMonitor.Start(ctx); e != nil {
1261+
logger.Errorf("[Azure CNS] Failed to start pool monitor with err: %v", e)
1262+
}
1263+
}()
1264+
logger.Printf("initialized and started IPAM pool monitor")
1265+
12551266
// Start the Manager which starts the reconcile loop.
12561267
// The Reconciler will send an initial NodeNetworkConfig update to the PoolMonitor, starting the
12571268
// Monitor's internal loop.

0 commit comments

Comments
 (0)