Skip to content

Commit 31010ab

Browse files
authored
Merge pull request #831 from YifeiZhuang/nodetop-crd-reconcile
Add nodetopologyCR deletion/reconciling
2 parents 35fe66d + eb24603 commit 31010ab

9 files changed

+982
-193
lines changed

pkg/controller/nodeipam/ipam/BUILD

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ go_library(
99
"cloud_cidr_allocator_metrics.go",
1010
"controller_legacyprovider.go",
1111
"doc.go",
12-
"multi_subnetworks_handler.go",
1312
"multinetwork_cloud_cidr_allocator.go",
13+
"node_topology_syncer.go",
1414
"range_allocator.go",
15+
"task_queue.go",
1516
"timeout.go",
1617
],
1718
importpath = "k8s.io/cloud-provider-gcp/pkg/controller/nodeipam/ipam",
@@ -64,9 +65,10 @@ go_test(
6465
srcs = [
6566
"cloud_cidr_allocator_test.go",
6667
"controller_test.go",
67-
"multi_subnetworks_handler_test.go",
6868
"multinetwork_cloud_cidr_allocator_test.go",
69+
"node_topology_syncer_test.go",
6970
"range_allocator_test.go",
71+
"task_queue_test.go",
7072
"timeout_test.go",
7173
],
7274
embed = [":ipam"],

pkg/controller/nodeipam/ipam/cidr_allocator.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ const (
7676

7777
// updateMaxRetries is the max retries for a failed node
7878
updateMaxRetries = 10
79+
80+
// The no. of workers in parallel to update nodetopology CR
81+
nodeTopologyWorkers = 30
82+
83+
// The duration of periodic reconciliation on the nodetopology CR
84+
nodeTopologyReconcileInterval = 10 * time.Minute
7985
)
8086

8187
// nodePollInterval is used in listing node

pkg/controller/nodeipam/ipam/cloud_cidr_allocator.go

Lines changed: 52 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,10 @@ type cloudCIDRAllocator struct {
8989
nodeLister corelisters.NodeLister
9090
// nodesSynced returns true if the node shared informer has been synced at least once.
9191
nodesSynced cache.InformerSynced
92-
// nodeTopologyClient will be used to read/patch the nodetopology CR.
93-
nodeTopologyClient nodetopologyclientset.Interface
9492

95-
recorder record.EventRecorder
96-
queue workqueue.RateLimitingInterface
93+
recorder record.EventRecorder
94+
queue workqueue.RateLimitingInterface
95+
nodeTopologyQueue *TaskQueue
9796

9897
stackType clusterStackType
9998
}
@@ -132,17 +131,24 @@ func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Inter
132131
stackType = stackIPv6
133132
}
134133

135-
ca := &cloudCIDRAllocator{
136-
client: client,
134+
nodeTopologySyncer := &NodeTopologySyncer{
135+
nodeTopologyClient: nodeTopologyClient,
137136
cloud: gceCloud,
138-
networksLister: nwInformer.Lister(),
139-
gnpLister: gnpInformer.Lister(),
140137
nodeLister: nodeInformer.Lister(),
141-
nodesSynced: nodeInformer.Informer().HasSynced,
142-
nodeTopologyClient: nodeTopologyClient,
143-
recorder: recorder,
144-
queue: workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{Name: workqueueName}),
145-
stackType: stackType,
138+
}
139+
nodetopologyQueue := NewTaskQueue("nodetopologgTaskQueue", "nodetopologyCRD", nodeTopologyWorkers, nodeTopologyKeyFun, nodeTopologySyncer.sync)
140+
141+
ca := &cloudCIDRAllocator{
142+
client: client,
143+
cloud: gceCloud,
144+
networksLister: nwInformer.Lister(),
145+
gnpLister: gnpInformer.Lister(),
146+
nodeLister: nodeInformer.Lister(),
147+
nodesSynced: nodeInformer.Informer().HasSynced,
148+
recorder: recorder,
149+
queue: workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{Name: workqueueName}),
150+
nodeTopologyQueue: nodetopologyQueue,
151+
stackType: stackType,
146152
}
147153

148154
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
@@ -168,6 +174,26 @@ func NewCloudCIDRAllocator(client clientset.Interface, cloud cloudprovider.Inter
168174
}),
169175
DeleteFunc: nodeutil.CreateDeleteNodeHandler(ca.ReleaseCIDR),
170176
})
177+
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
178+
AddFunc: nodeutil.CreateAddNodeHandler(func(node *v1.Node) error {
179+
if ca.nodeTopologyQueue != nil {
180+
ca.nodeTopologyQueue.Enqueue(node)
181+
}
182+
return nil
183+
}),
184+
UpdateFunc: nodeutil.CreateUpdateNodeHandler(func(oldNode, newNode *v1.Node) error {
185+
if ca.nodeTopologyQueue != nil {
186+
nodetopologyQueue.Enqueue(newNode)
187+
}
188+
return nil
189+
}),
190+
DeleteFunc: nodeutil.CreateDeleteNodeHandler(func(node *v1.Node) error {
191+
if ca.nodeTopologyQueue != nil {
192+
nodetopologyQueue.Enqueue(node)
193+
}
194+
return nil
195+
}),
196+
})
171197

172198
nwInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
173199
AddFunc: func(originalObj interface{}) {
@@ -244,6 +270,7 @@ func (ca *cloudCIDRAllocator) Run(stopCh <-chan struct{}) {
244270
ctx, cancelFn := context.WithCancel(context.Background())
245271
defer cancelFn()
246272
defer ca.queue.ShutDown()
273+
defer ca.nodeTopologyQueue.Shutdown()
247274

248275
klog.Infof("Starting cloud CIDR allocator")
249276
defer klog.Infof("Shutting down cloud CIDR allocator")
@@ -255,6 +282,18 @@ func (ca *cloudCIDRAllocator) Run(stopCh <-chan struct{}) {
255282
for i := 0; i < cidrUpdateWorkers; i++ {
256283
go wait.UntilWithContext(ctx, ca.runWorker, time.Second)
257284
}
285+
if ca.nodeTopologyQueue != nil {
286+
ca.nodeTopologyQueue.Run()
287+
}
288+
289+
go func() {
290+
time.Sleep(nodeTopologyReconcileInterval)
291+
wait.Until(
292+
func() {
293+
ca.nodeTopologyQueue.Enqueue(nodeTopologyReconcileFakeNode)
294+
},
295+
nodeTopologyReconcileInterval, stopCh)
296+
}()
258297

259298
<-stopCh
260299
}
@@ -433,11 +472,6 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
433472
}
434473
}
435474

436-
if err := ca.updateNodeTopology(node); err != nil {
437-
// This is only required for multi subnet clusters. Log and ignore the error.
438-
klog.ErrorS(err, "Failed to update the node topology resource", "nodeName", node.Name)
439-
}
440-
441475
return err
442476
}
443477

@@ -522,7 +556,6 @@ func (ca *cloudCIDRAllocator) updateNodeCIDR(node, oldNode *v1.Node) error {
522556
func (ca *cloudCIDRAllocator) ReleaseCIDR(node *v1.Node) error {
523557
klog.V(2).Infof("Node %v PodCIDR (%v) will be released by external cloud provider (not managed by controller)",
524558
node.Name, node.Spec.PodCIDR)
525-
// TODO: Handle the nodetopology CR subnet deletion here
526559
return nil
527560
}
528561

0 commit comments

Comments
 (0)