Skip to content

Commit 182f656

Browse files
rsmittytalos-bot
authored andcommitted
chore: breakout common functions
This PR breaks out etcd and configfile functions from the main controller code. It's mostly just a readability and DRY change as I was getting confused when dealing with it. I'll be building on the etcd file with my next PR. Signed-off-by: Spencer Smith <[email protected]>
1 parent f7191a0 commit 182f656

File tree

3 files changed

+165
-115
lines changed

3 files changed

+165
-115
lines changed

controllers/configs.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
package controllers
6+
7+
import (
8+
"context"
9+
"fmt"
10+
11+
cabptv1 "github.com/talos-systems/cluster-api-bootstrap-provider-talos/api/v1alpha3"
12+
talosclient "github.com/talos-systems/talos/pkg/machinery/client"
13+
talosconfig "github.com/talos-systems/talos/pkg/machinery/client/config"
14+
corev1 "k8s.io/api/core/v1"
15+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
16+
"k8s.io/apimachinery/pkg/types"
17+
"k8s.io/client-go/kubernetes"
18+
"k8s.io/client-go/tools/clientcmd"
19+
capiv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
20+
"sigs.k8s.io/controller-runtime/pkg/client"
21+
)
22+
23+
// kubeconfigForCluster will fetch a kubeconfig secret based on cluster name/namespace,
24+
// use it to create a clientset, and return it.
25+
func (r *TalosControlPlaneReconciler) kubeconfigForCluster(ctx context.Context, cluster client.ObjectKey) (*kubernetes.Clientset, error) {
26+
kubeconfigSecret := &corev1.Secret{}
27+
28+
err := r.Client.Get(ctx,
29+
types.NamespacedName{
30+
Namespace: cluster.Namespace,
31+
Name: cluster.Name + "-kubeconfig",
32+
},
33+
kubeconfigSecret,
34+
)
35+
if err != nil {
36+
return nil, err
37+
}
38+
39+
config, err := clientcmd.RESTConfigFromKubeConfig(kubeconfigSecret.Data["value"])
40+
if err != nil {
41+
return nil, err
42+
}
43+
44+
clientset, err := kubernetes.NewForConfig(config)
45+
if err != nil {
46+
return nil, err
47+
}
48+
49+
return clientset, nil
50+
}
51+
52+
// talosconfigForMachine will generate a talosconfig that uses *all* found addresses as the endpoints.
53+
func (r *TalosControlPlaneReconciler) talosconfigForMachine(ctx context.Context, clientset *kubernetes.Clientset, machine capiv1.Machine) (*talosclient.Client, error) {
54+
if machine.Status.NodeRef == nil {
55+
return nil, fmt.Errorf("%q machine does not have a nodeRef", machine.Name)
56+
}
57+
58+
node, err := clientset.CoreV1().Nodes().Get(machine.Status.NodeRef.Name, metav1.GetOptions{})
59+
if err != nil {
60+
return nil, err
61+
}
62+
63+
addrList := []string{}
64+
for _, addr := range node.Status.Addresses {
65+
addrList = append(addrList, addr.Address)
66+
}
67+
68+
if len(addrList) == 0 {
69+
return nil, fmt.Errorf("no addresses were found for node %q", node.Name)
70+
}
71+
72+
var (
73+
cfgs cabptv1.TalosConfigList
74+
found *cabptv1.TalosConfig
75+
)
76+
77+
err = r.Client.List(ctx, &cfgs)
78+
if err != nil {
79+
return nil, err
80+
}
81+
82+
for _, cfg := range cfgs.Items {
83+
for _, ref := range cfg.OwnerReferences {
84+
if ref.Kind == "Machine" && ref.Name == machine.Name {
85+
found = &cfg
86+
break
87+
}
88+
}
89+
}
90+
91+
if found == nil {
92+
return nil, fmt.Errorf("failed to find TalosConfig for %q", machine.Name)
93+
}
94+
95+
t, err := talosconfig.FromString(found.Status.TalosConfig)
96+
if err != nil {
97+
return nil, err
98+
}
99+
100+
c, err := talosclient.New(ctx, talosclient.WithEndpoints(addrList...), talosclient.WithConfig(t))
101+
if err != nil {
102+
return nil, err
103+
}
104+
105+
return c, nil
106+
}

controllers/etcd.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
package controllers
6+
7+
import (
8+
"context"
9+
10+
"github.com/talos-systems/talos/pkg/machinery/api/machine"
11+
talosclient "github.com/talos-systems/talos/pkg/machinery/client"
12+
capiv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
13+
"sigs.k8s.io/controller-runtime/pkg/client"
14+
)
15+
16+
// gracefulEtcdLeave removes a given machine from the etcd cluster by forfeiting leadership
17+
// and issuing a "leave" request from the machine itself.
18+
func (r *TalosControlPlaneReconciler) gracefulEtcdLeave(ctx context.Context, c *talosclient.Client, cluster client.ObjectKey, machineToLeave capiv1.Machine) error {
19+
r.Log.Info("Verifying etcd status", "machine", machineToLeave.Name, "node", machineToLeave.Status.NodeRef.Name)
20+
21+
svcs, err := c.ServiceInfo(ctx, "etcd")
22+
if err != nil {
23+
return err
24+
}
25+
26+
for _, svc := range svcs {
27+
if svc.Service.State != "Finished" {
28+
r.Log.Info("Forfeiting leadership", "machine", machineToLeave.Status.NodeRef.Name)
29+
30+
_, err = c.EtcdForfeitLeadership(ctx, &machine.EtcdForfeitLeadershipRequest{})
31+
if err != nil {
32+
return err
33+
}
34+
35+
r.Log.Info("Leaving etcd", "machine", machineToLeave.Name, "node", machineToLeave.Status.NodeRef.Name)
36+
37+
err = c.EtcdLeaveCluster(ctx, &machine.EtcdLeaveClusterRequest{})
38+
if err != nil {
39+
return err
40+
}
41+
}
42+
43+
break
44+
}
45+
46+
return nil
47+
}

controllers/taloscontrolplane_controller.go

Lines changed: 12 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,12 @@ import (
1414
"github.com/pkg/errors"
1515
cabptv1 "github.com/talos-systems/cluster-api-bootstrap-provider-talos/api/v1alpha3"
1616
controlplanev1 "github.com/talos-systems/cluster-api-control-plane-provider-talos/api/v1alpha3"
17-
"github.com/talos-systems/talos/pkg/machinery/api/machine"
18-
talosclient "github.com/talos-systems/talos/pkg/machinery/client"
19-
talosconfig "github.com/talos-systems/talos/pkg/machinery/client/config"
2017
corev1 "k8s.io/api/core/v1"
2118
apierrors "k8s.io/apimachinery/pkg/api/errors"
2219
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2320
"k8s.io/apimachinery/pkg/runtime"
24-
"k8s.io/apimachinery/pkg/types"
2521
kerrors "k8s.io/apimachinery/pkg/util/errors"
2622
"k8s.io/apiserver/pkg/storage/names"
27-
"k8s.io/client-go/kubernetes"
28-
"k8s.io/client-go/tools/clientcmd"
2923
"k8s.io/utils/pointer"
3024
capiv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
3125
"sigs.k8s.io/cluster-api/controllers/external"
@@ -323,25 +317,7 @@ func (r *TalosControlPlaneReconciler) scaleDownControlPlane(ctx context.Context,
323317

324318
r.Log.Info("Found control plane machines", "machines", len(machines))
325319

326-
kubeconfigSecret := &corev1.Secret{}
327-
328-
err = r.Client.Get(ctx,
329-
types.NamespacedName{
330-
Namespace: cluster.Namespace,
331-
Name: cluster.Name + "-kubeconfig",
332-
},
333-
kubeconfigSecret,
334-
)
335-
if err != nil {
336-
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
337-
}
338-
339-
config, err := clientcmd.RESTConfigFromKubeConfig(kubeconfigSecret.Data["value"])
340-
if err != nil {
341-
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
342-
}
343-
344-
clientset, err := kubernetes.NewForConfig(config)
320+
clientset, err := r.kubeconfigForCluster(ctx, cluster)
345321
if err != nil {
346322
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
347323
}
@@ -353,6 +329,11 @@ func (r *TalosControlPlaneReconciler) scaleDownControlPlane(ctx context.Context,
353329

354330
node, err := clientset.CoreV1().Nodes().Get(machine.Status.NodeRef.Name, metav1.GetOptions{})
355331
if err != nil {
332+
// It's possible for the node to already be deleted in the workload cluster, so we just
333+
// requeue if that's that case instead of throwing a scary error.
334+
if apierrors.IsNotFound(err) {
335+
return ctrl.Result{RequeueAfter: 20 * time.Second}, nil
336+
}
356337
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
357338
}
358339

@@ -375,82 +356,16 @@ func (r *TalosControlPlaneReconciler) scaleDownControlPlane(ctx context.Context,
375356
return ctrl.Result{RequeueAfter: 20 * time.Second}, fmt.Errorf("%q machine does not have a nodeRef", oldest.Name)
376357
}
377358

378-
var address string
379-
380-
node, err := clientset.CoreV1().Nodes().Get(oldest.Status.NodeRef.Name, metav1.GetOptions{})
381-
if err != nil {
382-
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
383-
}
384-
385-
for _, addr := range node.Status.Addresses {
386-
if addr.Type == corev1.NodeInternalIP {
387-
address = addr.Address
388-
break
389-
}
390-
}
391-
392-
if address == "" {
393-
return ctrl.Result{RequeueAfter: 20 * time.Second}, fmt.Errorf("no address was found for node %q", node.Name)
394-
}
395-
396-
var (
397-
cfgs cabptv1.TalosConfigList
398-
found *cabptv1.TalosConfig
399-
)
400-
401-
err = r.Client.List(ctx, &cfgs)
402-
if err != nil {
403-
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
404-
}
405-
406-
for _, cfg := range cfgs.Items {
407-
for _, ref := range cfg.OwnerReferences {
408-
if ref.Kind == "Machine" && ref.Name == oldest.Name {
409-
found = &cfg
410-
break
411-
}
412-
}
413-
}
414-
415-
if found == nil {
416-
return ctrl.Result{RequeueAfter: 20 * time.Second}, fmt.Errorf("failed to find TalosConfig for %q", oldest.Name)
417-
}
359+
node := oldest.Status.NodeRef
418360

419-
t, err := talosconfig.FromString(found.Status.TalosConfig)
361+
c, err := r.talosconfigForMachine(ctx, clientset, oldest)
420362
if err != nil {
421363
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
422364
}
423365

424-
c, err := talosclient.New(ctx, talosclient.WithEndpoints(address), talosclient.WithConfig(t))
366+
err = r.gracefulEtcdLeave(ctx, c, cluster, oldest)
425367
if err != nil {
426-
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
427-
}
428-
429-
r.Log.Info("Verifying etcd status", "machine", oldest.Name, "node", node.Name, "address", address)
430-
431-
svcs, err := c.ServiceInfo(ctx, "etcd")
432-
if err != nil {
433-
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
434-
}
435-
436-
for _, svc := range svcs {
437-
if svc.Service.State != "Finished" {
438-
r.Log.Info("Forfeiting leadership", "machine", oldest.Status.NodeRef.Name)
439-
440-
_, err = c.EtcdForfeitLeadership(ctx, &machine.EtcdForfeitLeadershipRequest{})
441-
if err != nil {
442-
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
443-
}
444-
445-
r.Log.Info("Leaving etcd", "machine", oldest.Name, "node", node.Name, "address", address)
446-
447-
err = c.EtcdLeaveCluster(ctx, &machine.EtcdLeaveClusterRequest{})
448-
if err != nil {
449-
return ctrl.Result{RequeueAfter: 20 * time.Second}, err
450-
}
451-
}
452-
453-
break
368+
return ctrl.Result{}, err
454369
}
455370

456371
r.Log.Info("Deleting machine", "machine", oldest.Name, "node", node.Name)
@@ -463,7 +378,7 @@ func (r *TalosControlPlaneReconciler) scaleDownControlPlane(ctx context.Context,
463378
// NB: We shutdown the node here so that a loadbalancer will drop the backend.
464379
// The Kubernetes API server is configured to talk to etcd on localhost, but
465380
// at this point etcd has been stopped.
466-
r.Log.Info("Shutting down node", "machine", oldest.Name, "node", node.Name, "address", address)
381+
r.Log.Info("Shutting down node", "machine", oldest.Name, "node", node.Name)
467382

468383
err = c.Shutdown(ctx)
469384
if err != nil {
@@ -649,25 +564,7 @@ func (r *TalosControlPlaneReconciler) updateStatus(ctx context.Context, tcp *con
649564
return nil
650565
}
651566

652-
kubeconfigSecret := &corev1.Secret{}
653-
654-
err = r.Client.Get(ctx,
655-
types.NamespacedName{
656-
Namespace: cluster.Namespace,
657-
Name: cluster.Name + "-kubeconfig",
658-
},
659-
kubeconfigSecret,
660-
)
661-
if err != nil {
662-
return err
663-
}
664-
665-
config, err := clientcmd.RESTConfigFromKubeConfig(kubeconfigSecret.Data["value"])
666-
if err != nil {
667-
return err
668-
}
669-
670-
clientset, err := kubernetes.NewForConfig(config)
567+
clientset, err := r.kubeconfigForCluster(ctx, util.ObjectKey(cluster))
671568
if err != nil {
672569
return err
673570
}

0 commit comments

Comments
 (0)