Skip to content

Commit 1662815

Browse files
committed
feat: catch up with Kubeadm in terms of conditions
This is the first set of conditions support: - `ResizedCondition`. - `ControlPlaneComponentsHealthy` - `EtcdClusterHealthy` - `Available` - `MachinesReady` - `MachinesCreated` Signed-off-by: Artem Chernyshev <[email protected]>
1 parent 43eb75b commit 1662815

File tree

12 files changed

+544
-153
lines changed

12 files changed

+544
-153
lines changed

.drone.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ type: kubernetes
55

66
services:
77
- name: docker
8-
image: docker:20.10-dind
8+
image: ghcr.io/smira/docker:20.10-dind-hacked
99
entrypoint: [dockerd]
1010
privileged: true
1111
volumes:

api/v1alpha3/conditions.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
// +kubebuilder:object:generate=true
6+
// +groupName=controlplane.cluster.x-k8s.io
7+
package v1alpha3
8+
9+
import clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4"
10+
11+
// Conditions and condition Reasons for the TalosControlPlane object
12+
13+
const (
14+
// MachinesReadyCondition reports an aggregate of current status of the machines controlled by the TalosControlPlane.
15+
MachinesReadyCondition clusterv1.ConditionType = "MachinesReady"
16+
)
17+
18+
const (
19+
// AvailableCondition documents that the first control plane instance has completed Talos boot sequence
20+
// and so the control plane is available and an API server instance is ready for processing requests.
21+
AvailableCondition clusterv1.ConditionType = "Available"
22+
23+
// WaitingForTalosBootReason (Severity=Info) documents a TalosControlPlane object waiting for the first
24+
// control plane instance to complete Talos boot sequence.
25+
WaitingForTalosBootReason = "WaitingForTalosBoot"
26+
)
27+
28+
const (
29+
// ResizedCondition documents a TalosControlPlane that is resizing the set of controlled machines.
30+
ResizedCondition clusterv1.ConditionType = "Resized"
31+
32+
// ScalingUpReason (Severity=Info) documents a TalosControlPlane that is increasing the number of replicas.
33+
ScalingUpReason = "ScalingUp"
34+
35+
// ScalingDownReason (Severity=Info) documents a TalosControlPlane that is decreasing the number of replicas.
36+
ScalingDownReason = "ScalingDown"
37+
)
38+
39+
const (
40+
// ControlPlaneComponentsHealthyCondition reports the overall status of control plane components
41+
// implemented as static pods generated by Talos including kube-api-server, kube-controller manager,
42+
// kube-scheduler and etcd.
43+
ControlPlaneComponentsHealthyCondition clusterv1.ConditionType = "ControlPlaneComponentsHealthy"
44+
45+
// ControlPlaneComponentsUnhealthyReason (Severity=Error) documents a control plane component not healthy.
46+
ControlPlaneComponentsUnhealthyReason = "ControlPlaneComponentsUnhealthy"
47+
48+
// ControlPlaneComponentsInspectionFailedReason documents a failure in inspecting the control plane component status.
49+
ControlPlaneComponentsInspectionFailedReason = "ControlPlaneComponentsInspectionFailed"
50+
)
51+
52+
const (
53+
// EtcdClusterHealthyCondition documents the overall etcd cluster's health.
54+
EtcdClusterHealthyCondition clusterv1.ConditionType = "EtcdClusterHealthyCondition"
55+
56+
// EtcdClusterUnhealthyReason (Severity=Error) is set when the etcd cluster is unhealthy.
57+
EtcdClusterUnhealthyReason = "EtcdClusterUnhealthy"
58+
)
59+
60+
const (
61+
// MachinesCreatedCondition documents that the machines controlled by the TalosControlPlane are created.
62+
// When this condition is false, it indicates that there was an error when cloning the infrastructure/bootstrap template or
63+
// when generating the machine object.
64+
MachinesCreatedCondition clusterv1.ConditionType = "MachinesCreated"
65+
66+
// InfrastructureTemplateCloningFailedReason (Severity=Error) documents a TalosControlPlane failing to
67+
// clone the infrastructure template.
68+
InfrastructureTemplateCloningFailedReason = "InfrastructureTemplateCloningFailed"
69+
70+
// BootstrapTemplateCloningFailedReason (Severity=Error) documents a TalosControlPlane failing to
71+
// clone the bootstrap template.
72+
BootstrapTemplateCloningFailedReason = "BootstrapTemplateCloningFailed"
73+
74+
// MachineGenerationFailedReason (Severity=Error) documents a TalosControlPlane failing to
75+
// generate a machine object.
76+
MachineGenerationFailedReason = "MachineGenerationFailed"
77+
)

api/v1alpha3/taloscontrolplane_types.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
cabptv1 "github.com/talos-systems/cluster-api-bootstrap-provider-talos/api/v1alpha3"
99
corev1 "k8s.io/api/core/v1"
1010
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4"
1112
)
1213

1314
const (
@@ -88,6 +89,14 @@ type TalosControlPlaneStatus struct {
8889
// state, and will be set to a descriptive error message.
8990
// +optional
9091
FailureMessage *string `json:"failureMessage,omitempty"`
92+
93+
// ObservedGeneration is the latest generation observed by the controller.
94+
// +optional
95+
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
96+
97+
// Conditions defines current service state of the KubeadmControlPlane.
98+
// +optional
99+
Conditions clusterv1.Conditions `json:"conditions,omitempty"`
91100
}
92101

93102
// +kubebuilder:object:root=true
@@ -110,6 +119,16 @@ type TalosControlPlane struct {
110119
Status TalosControlPlaneStatus `json:"status,omitempty"`
111120
}
112121

122+
// GetConditions returns the set of conditions for this object.
123+
func (in *TalosControlPlane) GetConditions() clusterv1.Conditions {
124+
return in.Status.Conditions
125+
}
126+
127+
// SetConditions sets the conditions on this object.
128+
func (in *TalosControlPlane) SetConditions(conditions clusterv1.Conditions) {
129+
in.Status.Conditions = conditions
130+
}
131+
113132
// +kubebuilder:object:root=true
114133

115134
// TalosControlPlaneList contains a list of TalosControlPlane

api/v1alpha3/zz_generated.deepcopy.go

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/controlplane.cluster.x-k8s.io_taloscontrolplanes.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,35 @@ spec:
158158
status:
159159
description: TalosControlPlaneStatus defines the observed state of TalosControlPlane
160160
properties:
161+
conditions:
162+
description: Conditions defines current service state of the KubeadmControlPlane.
163+
items:
164+
description: Condition defines an observation of a Cluster API resource operational state.
165+
properties:
166+
lastTransitionTime:
167+
description: Last time the condition transitioned from one status to another. This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
168+
format: date-time
169+
type: string
170+
message:
171+
description: A human readable message indicating details about the transition. This field may be empty.
172+
type: string
173+
reason:
174+
description: The reason for the condition's last transition in CamelCase. The specific API may choose whether or not this field is considered a guaranteed API. This field may not be empty.
175+
type: string
176+
severity:
177+
description: Severity provides an explicit classification of Reason code, so the users or machines can immediately understand the current situation and act accordingly. The Severity field MUST be set only when Status=False.
178+
type: string
179+
status:
180+
description: Status of the condition, one of True, False, Unknown.
181+
type: string
182+
type:
183+
description: Type of condition in CamelCase or in foo.example.com/CamelCase. Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be useful (see .node.status.conditions), the ability to deconflict is important.
184+
type: string
185+
required:
186+
- status
187+
- type
188+
type: object
189+
type: array
161190
failureMessage:
162191
description: ErrorMessage indicates that there is a terminal problem reconciling the state, and will be set to a descriptive error message.
163192
type: string
@@ -167,6 +196,10 @@ spec:
167196
initialized:
168197
description: Initialized denotes whether or not the control plane has the uploaded talos-config configmap.
169198
type: boolean
199+
observedGeneration:
200+
description: ObservedGeneration is the latest generation observed by the controller.
201+
format: int64
202+
type: integer
170203
ready:
171204
description: Ready denotes that the TalosControlPlane API Server is ready to receive requests.
172205
type: boolean

controllers/configs.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ package controllers
77
import (
88
"context"
99
"fmt"
10+
"net"
11+
"time"
1012

1113
cabptv1 "github.com/talos-systems/cluster-api-bootstrap-provider-talos/api/v1alpha3"
1214
talosclient "github.com/talos-systems/talos/pkg/machinery/client"
@@ -16,13 +18,31 @@ import (
1618
"k8s.io/apimachinery/pkg/types"
1719
"k8s.io/client-go/kubernetes"
1820
"k8s.io/client-go/tools/clientcmd"
21+
"k8s.io/client-go/util/connrotation"
1922
capiv1 "sigs.k8s.io/cluster-api/api/v1alpha4"
2023
"sigs.k8s.io/controller-runtime/pkg/client"
2124
)
2225

26+
type kubernetesClient struct {
27+
*kubernetes.Clientset
28+
29+
dialer *connrotation.Dialer
30+
}
31+
32+
// Close kubernetes client.
33+
func (k *kubernetesClient) Close() error {
34+
k.dialer.CloseAll()
35+
36+
return nil
37+
}
38+
39+
func newDialer() *connrotation.Dialer {
40+
return connrotation.NewDialer((&net.Dialer{Timeout: 30 * time.Second, KeepAlive: 30 * time.Second}).DialContext)
41+
}
42+
2343
// kubeconfigForCluster will fetch a kubeconfig secret based on cluster name/namespace,
2444
// use it to create a clientset, and return it.
25-
func (r *TalosControlPlaneReconciler) kubeconfigForCluster(ctx context.Context, cluster client.ObjectKey) (*kubernetes.Clientset, error) {
45+
func (r *TalosControlPlaneReconciler) kubeconfigForCluster(ctx context.Context, cluster client.ObjectKey) (*kubernetesClient, error) {
2646
kubeconfigSecret := &corev1.Secret{}
2747

2848
err := r.Client.Get(ctx,
@@ -41,12 +61,18 @@ func (r *TalosControlPlaneReconciler) kubeconfigForCluster(ctx context.Context,
4161
return nil, err
4262
}
4363

64+
dialer := newDialer()
65+
config.Dial = dialer.DialContext
66+
4467
clientset, err := kubernetes.NewForConfig(config)
4568
if err != nil {
4669
return nil, err
4770
}
4871

49-
return clientset, nil
72+
return &kubernetesClient{
73+
Clientset: clientset,
74+
dialer: dialer,
75+
}, nil
5076
}
5177

5278
// talosconfigForMachine will generate a talosconfig that uses *all* found addresses as the endpoints.

controllers/etcd.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@ import (
1717
)
1818

1919
func (r *TalosControlPlaneReconciler) etcdHealthcheck(ctx context.Context, cluster *capiv1.Cluster, ownedMachines []capiv1.Machine) error {
20-
clientset, err := r.kubeconfigForCluster(ctx, util.ObjectKey(cluster))
20+
kubeclient, err := r.kubeconfigForCluster(ctx, util.ObjectKey(cluster))
2121
if err != nil {
2222
return err
2323
}
2424

25+
defer kubeclient.Close() //nolint:errcheck
26+
2527
machines := []capiv1.Machine{}
2628

2729
for _, machine := range ownedMachines {
@@ -30,11 +32,13 @@ func (r *TalosControlPlaneReconciler) etcdHealthcheck(ctx context.Context, clust
3032
}
3133
}
3234

33-
c, err := r.talosconfigForMachines(ctx, clientset, machines...)
35+
c, err := r.talosconfigForMachines(ctx, kubeclient.Clientset, machines...)
3436
if err != nil {
3537
return err
3638
}
3739

40+
defer c.Close() //nolint:errcheck
41+
3842
service := "etcd"
3943

4044
params := make([]interface{}, 0, len(machines)*2)
@@ -178,16 +182,20 @@ func (r *TalosControlPlaneReconciler) auditEtcd(ctx context.Context, cluster cli
178182
return fmt.Errorf("no CP machine which is not being deleted and has node ref")
179183
}
180184

181-
clientset, err := r.kubeconfigForCluster(ctx, cluster)
185+
kubeclient, err := r.kubeconfigForCluster(ctx, cluster)
182186
if err != nil {
183187
return err
184188
}
185189

186-
c, err := r.talosconfigForMachines(ctx, clientset, designatedCPMachine)
190+
defer kubeclient.Close() //nolint:errcheck
191+
192+
c, err := r.talosconfigForMachines(ctx, kubeclient.Clientset, designatedCPMachine)
187193
if err != nil {
188194
return err
189195
}
190196

197+
defer c.Close() //nolint:errcheck
198+
191199
response, err := c.EtcdMemberList(ctx, &machine.EtcdMemberListRequest{})
192200
if err != nil {
193201
return fmt.Errorf("error getting etcd members via %q (endpoints %v): %w", designatedCPMachine.Name, c.GetConfigContext().Endpoints, err)

0 commit comments

Comments
 (0)