Skip to content

Commit 8a73e6a

Browse files
committed
feat: get rid of init nodes and use bootstrap API to setup cluster
That required couple changes in the manager flow: - no longer get node IPs using `kubectl get nodes`, but use machines addresses instead. - add `MachinesBootstrapped` condition that means that `bootstrap` was called on one of the machines. - add `Bootstrapped` flag to the `TalosControlPlaneStatus` to track bootstrap call. Signed-off-by: Artem Chernyshev <[email protected]>
1 parent 205f4be commit 8a73e6a

File tree

13 files changed

+162
-86
lines changed

13 files changed

+162
-86
lines changed

.drone.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ steps:
8181
from_secret: aws_secret_access_key
8282
CI: true
8383
REGISTRY: registry.dev.talos-systems.io
84+
GITHUB_TOKEN:
85+
from_secret: github_token
8486
commands:
8587
- make integration-test
8688
volumes:

api/v1alpha3/conditions.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@ const (
1515
MachinesReadyCondition clusterv1.ConditionType = "MachinesReady"
1616
)
1717

18+
const (
19+
// MachinesBootstrapped is tracking control planes bootstrap status.
20+
MachinesBootstrapped clusterv1.ConditionType = "MachinesBootstrapped"
21+
22+
// WaitingForMachinesReason (Severity=Info) documents a TalosControlPlane bootstrap is waiting
23+
// for all control plane nodes to be created.
24+
WaitingForMachinesReason = "WaitingForMachines"
25+
)
26+
1827
const (
1928
// AvailableCondition documents that the first control plane instance has completed Talos boot sequence
2029
// and so the control plane is available and an API server instance is ready for processing requests.
@@ -23,6 +32,10 @@ const (
2332
// WaitingForTalosBootReason (Severity=Info) documents a TalosControlPlane object waiting for the first
2433
// control plane instance to complete Talos boot sequence.
2534
WaitingForTalosBootReason = "WaitingForTalosBoot"
35+
36+
// InvalidControlPlaneConfigReason (Severity=Error) documents that controlplane config is invalid and the provider
37+
// can not proceed with the bootstrap.
38+
InvalidControlPlaneConfigReason = "InvalidControlPlaneConfig"
2639
)
2740

2841
const (

api/v1alpha3/taloscontrolplane_types.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ const (
1616
)
1717

1818
type ControlPlaneConfig struct {
19-
InitConfig cabptv1.TalosConfigSpec `json:"init"`
19+
// Deprecated: starting from cacppt v0.4.0 provider doesn't use init configs.
20+
InitConfig cabptv1.TalosConfigSpec `json:"init,omitempty"`
2021
ControlPlaneConfig cabptv1.TalosConfigSpec `json:"controlplane"`
2122
}
2223

@@ -79,6 +80,11 @@ type TalosControlPlaneStatus struct {
7980
// +optional
8081
Ready bool `json:"ready"`
8182

83+
// Bootstrapped denotes whether any nodes received bootstrap request
84+
// which is required to start etcd and Kubernetes components in Talos.
85+
// +optional
86+
Bootstrapped bool `json:"bootstrapped,omitempty"`
87+
8288
// FailureReason indicates that there is a terminal problem reconciling the
8389
// state, and will be set to a token value suitable for
8490
// programmatic interpretation.

config/crd/bases/controlplane.cluster.x-k8s.io_taloscontrolplanes.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ spec:
8787
- generateType
8888
type: object
8989
init:
90-
description: TalosConfigSpec defines the desired state of TalosConfig
90+
description: 'Deprecated: starting from cacppt v0.4.0 provider doesn''t use init configs.'
9191
properties:
9292
configPatches:
9393
items:
@@ -114,7 +114,6 @@ spec:
114114
type: object
115115
required:
116116
- controlplane
117-
- init
118117
type: object
119118
infrastructureTemplate:
120119
description: InfrastructureTemplate is a required reference to a custom resource offered by an infrastructure provider.
@@ -158,6 +157,9 @@ spec:
158157
status:
159158
description: TalosControlPlaneStatus defines the observed state of TalosControlPlane
160159
properties:
160+
bootstrapped:
161+
description: Bootstrapped denotes wheither any nodes recieved bootstrap request which is required to start etcd and Kubernetes components in Talos.
162+
type: boolean
161163
conditions:
162164
description: Conditions defines current service state of the KubeadmControlPlane.
163165
items:

controllers/configs.go

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
talosclient "github.com/talos-systems/talos/pkg/machinery/client"
1515
talosconfig "github.com/talos-systems/talos/pkg/machinery/client/config"
1616
corev1 "k8s.io/api/core/v1"
17-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1817
"k8s.io/apimachinery/pkg/types"
1918
"k8s.io/client-go/kubernetes"
2019
"k8s.io/client-go/tools/clientcmd"
@@ -76,7 +75,7 @@ func (r *TalosControlPlaneReconciler) kubeconfigForCluster(ctx context.Context,
7675
}
7776

7877
// talosconfigForMachine will generate a talosconfig that uses *all* found addresses as the endpoints.
79-
func (r *TalosControlPlaneReconciler) talosconfigForMachines(ctx context.Context, clientset *kubernetes.Clientset, machines ...clusterv1.Machine) (*talosclient.Client, error) {
78+
func (r *TalosControlPlaneReconciler) talosconfigForMachines(ctx context.Context, machines ...clusterv1.Machine) (*talosclient.Client, error) {
8079
if len(machines) == 0 {
8180
return nil, fmt.Errorf("at least one machine should be provided")
8281
}
@@ -86,24 +85,14 @@ func (r *TalosControlPlaneReconciler) talosconfigForMachines(ctx context.Context
8685
var t *talosconfig.Config
8786

8887
for _, machine := range machines {
89-
if machine.Status.NodeRef == nil {
90-
return nil, fmt.Errorf("%q machine does not have a nodeRef", machine.Name)
91-
}
92-
93-
// grab all addresses as endpoints
94-
node, err := clientset.CoreV1().Nodes().Get(ctx, machine.Status.NodeRef.Name, metav1.GetOptions{})
95-
if err != nil {
96-
return nil, err
97-
}
98-
99-
for _, addr := range node.Status.Addresses {
100-
if addr.Type == corev1.NodeExternalIP || addr.Type == corev1.NodeInternalIP {
88+
for _, addr := range machine.Status.Addresses {
89+
if addr.Type == clusterv1.MachineExternalIP || addr.Type == clusterv1.MachineInternalIP {
10190
addrList = append(addrList, addr.Address)
10291
}
10392
}
10493

10594
if len(addrList) == 0 {
106-
return nil, fmt.Errorf("no addresses were found for node %q", node.Name)
95+
return nil, fmt.Errorf("no addresses were found for node %q", machine.Name)
10796
}
10897

10998
if t == nil {
@@ -113,7 +102,7 @@ func (r *TalosControlPlaneReconciler) talosconfigForMachines(ctx context.Context
113102
)
114103

115104
// find talosconfig in the machine's namespace
116-
err = r.Client.List(ctx, &cfgs, client.InNamespace(machine.Namespace))
105+
err := r.Client.List(ctx, &cfgs, client.InNamespace(machine.Namespace))
117106
if err != nil {
118107
return nil, err
119108
}

controllers/consts.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
package controllers
6+
7+
import "time"
8+
9+
const (
10+
// deleteRequeueAfter is how long to wait before checking again to see if
11+
// all control plane machines have been deleted.
12+
deleteRequeueAfter = 30 * time.Second
13+
14+
// preflightFailedRequeueAfter is how long to wait before trying to scale
15+
// up/down if some preflight check for those operation has failed.
16+
preflightFailedRequeueAfter = 15 * time.Second
17+
18+
// dependentCertRequeueAfter is how long to wait before checking again to see if
19+
// dependent certificates have been created.
20+
dependentCertRequeueAfter = 30 * time.Second
21+
)

controllers/etcd.go

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ func (r *TalosControlPlaneReconciler) etcdHealthcheck(ctx context.Context, clust
3232
}
3333
}
3434

35-
c, err := r.talosconfigForMachines(ctx, kubeclient.Clientset, machines...)
35+
c, err := r.talosconfigForMachines(ctx, machines...)
3636
if err != nil {
3737
return err
3838
}
@@ -182,14 +182,7 @@ func (r *TalosControlPlaneReconciler) auditEtcd(ctx context.Context, cluster cli
182182
return fmt.Errorf("no CP machine which is not being deleted and has node ref")
183183
}
184184

185-
kubeclient, err := r.kubeconfigForCluster(ctx, cluster)
186-
if err != nil {
187-
return err
188-
}
189-
190-
defer kubeclient.Close() //nolint:errcheck
191-
192-
c, err := r.talosconfigForMachines(ctx, kubeclient.Clientset, designatedCPMachine)
185+
c, err := r.talosconfigForMachines(ctx, designatedCPMachine)
193186
if err != nil {
194187
return err
195188
}

controllers/health.go

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import (
1616
"google.golang.org/grpc/codes"
1717
"google.golang.org/grpc/status"
1818
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
19-
"sigs.k8s.io/cluster-api/util"
2019
)
2120

2221
type errServiceUnhealthy struct {
@@ -29,14 +28,7 @@ func (e *errServiceUnhealthy) Error() string {
2928
}
3029

3130
func (r *TalosControlPlaneReconciler) nodesHealthcheck(ctx context.Context, cluster *clusterv1.Cluster, machines []clusterv1.Machine) error {
32-
kubeclient, err := r.kubeconfigForCluster(ctx, util.ObjectKey(cluster))
33-
if err != nil {
34-
return err
35-
}
36-
37-
defer kubeclient.Close() //nolint:errcheck
38-
39-
client, err := r.talosconfigForMachines(ctx, kubeclient.Clientset, machines...)
31+
client, err := r.talosconfigForMachines(ctx, machines...)
4032
if err != nil {
4133
return err
4234
}
@@ -63,14 +55,7 @@ func (r *TalosControlPlaneReconciler) nodesHealthcheck(ctx context.Context, clus
6355
}
6456

6557
func (r *TalosControlPlaneReconciler) ensureNodesBooted(ctx context.Context, cluster *clusterv1.Cluster, machines []clusterv1.Machine) error {
66-
kubeclient, err := r.kubeconfigForCluster(ctx, util.ObjectKey(cluster))
67-
if err != nil {
68-
return err
69-
}
70-
71-
defer kubeclient.Close() //nolint:errcheck
72-
73-
client, err := r.talosconfigForMachines(ctx, kubeclient.Clientset, machines...)
58+
client, err := r.talosconfigForMachines(ctx, machines...)
7459
if err != nil {
7560
return err
7661
}

0 commit comments

Comments
 (0)