@@ -14,18 +14,12 @@ import (
14
14
"github.com/pkg/errors"
15
15
cabptv1 "github.com/talos-systems/cluster-api-bootstrap-provider-talos/api/v1alpha3"
16
16
controlplanev1 "github.com/talos-systems/cluster-api-control-plane-provider-talos/api/v1alpha3"
17
- "github.com/talos-systems/talos/pkg/machinery/api/machine"
18
- talosclient "github.com/talos-systems/talos/pkg/machinery/client"
19
- talosconfig "github.com/talos-systems/talos/pkg/machinery/client/config"
20
17
corev1 "k8s.io/api/core/v1"
21
18
apierrors "k8s.io/apimachinery/pkg/api/errors"
22
19
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23
20
"k8s.io/apimachinery/pkg/runtime"
24
- "k8s.io/apimachinery/pkg/types"
25
21
kerrors "k8s.io/apimachinery/pkg/util/errors"
26
22
"k8s.io/apiserver/pkg/storage/names"
27
- "k8s.io/client-go/kubernetes"
28
- "k8s.io/client-go/tools/clientcmd"
29
23
"k8s.io/utils/pointer"
30
24
capiv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
31
25
"sigs.k8s.io/cluster-api/controllers/external"
@@ -323,25 +317,7 @@ func (r *TalosControlPlaneReconciler) scaleDownControlPlane(ctx context.Context,
323
317
324
318
r .Log .Info ("Found control plane machines" , "machines" , len (machines ))
325
319
326
- kubeconfigSecret := & corev1.Secret {}
327
-
328
- err = r .Client .Get (ctx ,
329
- types.NamespacedName {
330
- Namespace : cluster .Namespace ,
331
- Name : cluster .Name + "-kubeconfig" ,
332
- },
333
- kubeconfigSecret ,
334
- )
335
- if err != nil {
336
- return ctrl.Result {RequeueAfter : 20 * time .Second }, err
337
- }
338
-
339
- config , err := clientcmd .RESTConfigFromKubeConfig (kubeconfigSecret .Data ["value" ])
340
- if err != nil {
341
- return ctrl.Result {RequeueAfter : 20 * time .Second }, err
342
- }
343
-
344
- clientset , err := kubernetes .NewForConfig (config )
320
+ clientset , err := r .kubeconfigForCluster (ctx , cluster )
345
321
if err != nil {
346
322
return ctrl.Result {RequeueAfter : 20 * time .Second }, err
347
323
}
@@ -353,6 +329,11 @@ func (r *TalosControlPlaneReconciler) scaleDownControlPlane(ctx context.Context,
353
329
354
330
node , err := clientset .CoreV1 ().Nodes ().Get (machine .Status .NodeRef .Name , metav1.GetOptions {})
355
331
if err != nil {
332
+ // It's possible for the node to already be deleted in the workload cluster, so we just
333
+ // requeue if that's that case instead of throwing a scary error.
334
+ if apierrors .IsNotFound (err ) {
335
+ return ctrl.Result {RequeueAfter : 20 * time .Second }, nil
336
+ }
356
337
return ctrl.Result {RequeueAfter : 20 * time .Second }, err
357
338
}
358
339
@@ -375,82 +356,16 @@ func (r *TalosControlPlaneReconciler) scaleDownControlPlane(ctx context.Context,
375
356
return ctrl.Result {RequeueAfter : 20 * time .Second }, fmt .Errorf ("%q machine does not have a nodeRef" , oldest .Name )
376
357
}
377
358
378
- var address string
379
-
380
- node , err := clientset .CoreV1 ().Nodes ().Get (oldest .Status .NodeRef .Name , metav1.GetOptions {})
381
- if err != nil {
382
- return ctrl.Result {RequeueAfter : 20 * time .Second }, err
383
- }
384
-
385
- for _ , addr := range node .Status .Addresses {
386
- if addr .Type == corev1 .NodeInternalIP {
387
- address = addr .Address
388
- break
389
- }
390
- }
391
-
392
- if address == "" {
393
- return ctrl.Result {RequeueAfter : 20 * time .Second }, fmt .Errorf ("no address was found for node %q" , node .Name )
394
- }
395
-
396
- var (
397
- cfgs cabptv1.TalosConfigList
398
- found * cabptv1.TalosConfig
399
- )
400
-
401
- err = r .Client .List (ctx , & cfgs )
402
- if err != nil {
403
- return ctrl.Result {RequeueAfter : 20 * time .Second }, err
404
- }
405
-
406
- for _ , cfg := range cfgs .Items {
407
- for _ , ref := range cfg .OwnerReferences {
408
- if ref .Kind == "Machine" && ref .Name == oldest .Name {
409
- found = & cfg
410
- break
411
- }
412
- }
413
- }
414
-
415
- if found == nil {
416
- return ctrl.Result {RequeueAfter : 20 * time .Second }, fmt .Errorf ("failed to find TalosConfig for %q" , oldest .Name )
417
- }
359
+ node := oldest .Status .NodeRef
418
360
419
- t , err := talosconfig . FromString ( found . Status . TalosConfig )
361
+ c , err := r . talosconfigForMachine ( ctx , clientset , oldest )
420
362
if err != nil {
421
363
return ctrl.Result {RequeueAfter : 20 * time .Second }, err
422
364
}
423
365
424
- c , err := talosclient . New (ctx , talosclient . WithEndpoints ( address ), talosclient . WithConfig ( t ) )
366
+ err = r . gracefulEtcdLeave (ctx , c , cluster , oldest )
425
367
if err != nil {
426
- return ctrl.Result {RequeueAfter : 20 * time .Second }, err
427
- }
428
-
429
- r .Log .Info ("Verifying etcd status" , "machine" , oldest .Name , "node" , node .Name , "address" , address )
430
-
431
- svcs , err := c .ServiceInfo (ctx , "etcd" )
432
- if err != nil {
433
- return ctrl.Result {RequeueAfter : 20 * time .Second }, err
434
- }
435
-
436
- for _ , svc := range svcs {
437
- if svc .Service .State != "Finished" {
438
- r .Log .Info ("Forfeiting leadership" , "machine" , oldest .Status .NodeRef .Name )
439
-
440
- _ , err = c .EtcdForfeitLeadership (ctx , & machine.EtcdForfeitLeadershipRequest {})
441
- if err != nil {
442
- return ctrl.Result {RequeueAfter : 20 * time .Second }, err
443
- }
444
-
445
- r .Log .Info ("Leaving etcd" , "machine" , oldest .Name , "node" , node .Name , "address" , address )
446
-
447
- err = c .EtcdLeaveCluster (ctx , & machine.EtcdLeaveClusterRequest {})
448
- if err != nil {
449
- return ctrl.Result {RequeueAfter : 20 * time .Second }, err
450
- }
451
- }
452
-
453
- break
368
+ return ctrl.Result {}, err
454
369
}
455
370
456
371
r .Log .Info ("Deleting machine" , "machine" , oldest .Name , "node" , node .Name )
@@ -463,7 +378,7 @@ func (r *TalosControlPlaneReconciler) scaleDownControlPlane(ctx context.Context,
463
378
// NB: We shutdown the node here so that a loadbalancer will drop the backend.
464
379
// The Kubernetes API server is configured to talk to etcd on localhost, but
465
380
// at this point etcd has been stopped.
466
- r .Log .Info ("Shutting down node" , "machine" , oldest .Name , "node" , node .Name , "address" , address )
381
+ r .Log .Info ("Shutting down node" , "machine" , oldest .Name , "node" , node .Name )
467
382
468
383
err = c .Shutdown (ctx )
469
384
if err != nil {
@@ -649,25 +564,7 @@ func (r *TalosControlPlaneReconciler) updateStatus(ctx context.Context, tcp *con
649
564
return nil
650
565
}
651
566
652
- kubeconfigSecret := & corev1.Secret {}
653
-
654
- err = r .Client .Get (ctx ,
655
- types.NamespacedName {
656
- Namespace : cluster .Namespace ,
657
- Name : cluster .Name + "-kubeconfig" ,
658
- },
659
- kubeconfigSecret ,
660
- )
661
- if err != nil {
662
- return err
663
- }
664
-
665
- config , err := clientcmd .RESTConfigFromKubeConfig (kubeconfigSecret .Data ["value" ])
666
- if err != nil {
667
- return err
668
- }
669
-
670
- clientset , err := kubernetes .NewForConfig (config )
567
+ clientset , err := r .kubeconfigForCluster (ctx , util .ObjectKey (cluster ))
671
568
if err != nil {
672
569
return err
673
570
}
0 commit comments