Skip to content

Commit 6ce4942

Browse files
authored
Merge pull request #8959 from sbueringer/pr-improve-kcp-logging
🌱 KCP: Log the reason of a Machine rollout
2 parents 89bbf6a + 2e4f6d9 commit 6ce4942

File tree

4 files changed

+199
-126
lines changed

4 files changed

+199
-126
lines changed

controlplane/kubeadm/internal/control_plane.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -167,22 +167,34 @@ func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.Kubead
167167
}
168168

169169
// MachinesNeedingRollout return a list of machines that need to be rolled out.
170-
func (c *ControlPlane) MachinesNeedingRollout() collections.Machines {
170+
func (c *ControlPlane) MachinesNeedingRollout() (collections.Machines, map[string]string) {
171171
// Ignore machines to be deleted.
172172
machines := c.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
173173

174174
// Return machines if they are scheduled for rollout or if with an outdated configuration.
175-
return machines.Filter(
176-
NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP),
177-
)
175+
machinesNeedingRollout := make(collections.Machines, len(machines))
176+
rolloutReasons := map[string]string{}
177+
for _, m := range machines {
178+
reason, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
179+
if needsRollout {
180+
machinesNeedingRollout.Insert(m)
181+
rolloutReasons[m.Name] = reason
182+
}
183+
}
184+
return machinesNeedingRollout, rolloutReasons
178185
}
179186

180187
// UpToDateMachines returns the machines that are up to date with the control
181188
// plane's configuration and therefore do not require rollout.
182189
func (c *ControlPlane) UpToDateMachines() collections.Machines {
183-
return c.Machines.Filter(
184-
collections.Not(NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP)),
185-
)
190+
upToDateMachines := make(collections.Machines, len(c.Machines))
191+
for _, m := range c.Machines {
192+
_, needsRollout := NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP, m)
193+
if !needsRollout {
194+
upToDateMachines.Insert(m)
195+
}
196+
}
197+
return upToDateMachines
186198
}
187199

188200
// getInfraResources fetches the external infrastructure resource for each machine in the collection and returns a map of machine.Name -> infraResource.

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package controllers
1919
import (
2020
"context"
2121
"fmt"
22+
"strings"
2223
"time"
2324

2425
"github.com/blang/semver"
@@ -393,12 +394,16 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, controlPl
393394
}
394395

395396
// Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations.
396-
needRollout := controlPlane.MachinesNeedingRollout()
397+
machinesNeedingRollout, rolloutReasons := controlPlane.MachinesNeedingRollout()
397398
switch {
398-
case len(needRollout) > 0:
399-
log.Info("Rolling out Control Plane machines", "needRollout", needRollout.Names())
400-
conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), len(controlPlane.Machines)-len(needRollout))
401-
return r.upgradeControlPlane(ctx, controlPlane, needRollout)
399+
case len(machinesNeedingRollout) > 0:
400+
var reasons []string
401+
for _, rolloutReason := range rolloutReasons {
402+
reasons = append(reasons, rolloutReason)
403+
}
404+
log.Info(fmt.Sprintf("Rolling out Control Plane machines: %s", strings.Join(reasons, ",")), "machinesNeedingRollout", machinesNeedingRollout.Names())
405+
conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(machinesNeedingRollout), len(controlPlane.Machines)-len(machinesNeedingRollout))
406+
return r.upgradeControlPlane(ctx, controlPlane, machinesNeedingRollout)
402407
default:
403408
// make sure last upgrade operation is marked as completed.
404409
// NOTE: we are checking the condition already exists in order to avoid to set this condition at the first

controlplane/kubeadm/internal/filters.go

Lines changed: 108 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ package internal
1818

1919
import (
2020
"encoding/json"
21+
"fmt"
2122
"reflect"
23+
"strings"
2224

2325
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2426
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -29,101 +31,137 @@ import (
2931
"sigs.k8s.io/cluster-api/util/collections"
3032
)
3133

32-
// MatchesMachineSpec returns a filter to find all machines that matches with KCP config and do not require any rollout.
34+
// matchesMachineSpec checks if a Machine matches any of a set of KubeadmConfigs and a set of infra machine configs.
35+
// If it doesn't, it returns the reasons why.
3336
// Kubernetes version, infrastructure template, and KubeadmConfig field need to be equivalent.
3437
// Note: We don't need to compare the entire MachineSpec to determine if a Machine needs to be rolled out,
3538
// because all the fields in the MachineSpec, except for version, the infrastructureRef and bootstrap.ConfigRef, are either:
3639
// - mutated in-place (ex: NodeDrainTimeout)
3740
// - are not dictated by KCP (ex: ProviderID)
3841
// - are not relevant for the rollout decision (ex: failureDomain).
39-
func MatchesMachineSpec(infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane) func(machine *clusterv1.Machine) bool {
40-
return collections.And(
41-
collections.MatchesKubernetesVersion(kcp.Spec.Version),
42-
MatchesKubeadmBootstrapConfig(machineConfigs, kcp),
43-
MatchesTemplateClonedFrom(infraConfigs, kcp),
44-
)
42+
func matchesMachineSpec(infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool) {
43+
mismatchReasons := []string{}
44+
45+
if !collections.MatchesKubernetesVersion(kcp.Spec.Version)(machine) {
46+
machineVersion := ""
47+
if machine != nil && machine.Spec.Version != nil {
48+
machineVersion = *machine.Spec.Version
49+
}
50+
mismatchReasons = append(mismatchReasons, fmt.Sprintf("Machine version %q is not equal to KCP version %q", machineVersion, kcp.Spec.Version))
51+
}
52+
53+
if reason, matches := matchesKubeadmBootstrapConfig(machineConfigs, kcp, machine); !matches {
54+
mismatchReasons = append(mismatchReasons, reason)
55+
}
56+
57+
if reason, matches := matchesTemplateClonedFrom(infraConfigs, kcp, machine); !matches {
58+
mismatchReasons = append(mismatchReasons, reason)
59+
}
60+
61+
if len(mismatchReasons) > 0 {
62+
return strings.Join(mismatchReasons, ","), false
63+
}
64+
65+
return "", true
4566
}
4667

47-
// NeedsRollout returns a filter to determine if a machine needs rollout.
48-
func NeedsRollout(reconciliationTime, rolloutAfter *metav1.Time, rolloutBefore *controlplanev1.RolloutBefore, infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane) func(machine *clusterv1.Machine) bool {
49-
return collections.Or(
50-
// Machines whose certificates are about to expire.
51-
collections.ShouldRolloutBefore(reconciliationTime, rolloutBefore),
52-
// Machines that are scheduled for rollout (KCP.Spec.RolloutAfter set, the RolloutAfter deadline is expired, and the machine was created before the deadline).
53-
collections.ShouldRolloutAfter(reconciliationTime, rolloutAfter),
54-
// Machines that do not match with KCP config.
55-
collections.Not(MatchesMachineSpec(infraConfigs, machineConfigs, kcp)),
56-
)
68+
// NeedsRollout checks if a Machine needs to be rolled out and returns the reason why.
69+
func NeedsRollout(reconciliationTime, rolloutAfter *metav1.Time, rolloutBefore *controlplanev1.RolloutBefore, infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool) {
70+
rolloutReasons := []string{}
71+
72+
// Machines whose certificates are about to expire.
73+
if collections.ShouldRolloutBefore(reconciliationTime, rolloutBefore)(machine) {
74+
rolloutReasons = append(rolloutReasons, "certificates will expire soon, rolloutBefore expired")
75+
}
76+
77+
// Machines that are scheduled for rollout (KCP.Spec.RolloutAfter set,
78+
// the RolloutAfter deadline is expired, and the machine was created before the deadline).
79+
if collections.ShouldRolloutAfter(reconciliationTime, rolloutAfter)(machine) {
80+
rolloutReasons = append(rolloutReasons, "rolloutAfter expired")
81+
}
82+
83+
// Machines that do not match with KCP config.
84+
if mismatchReason, matches := matchesMachineSpec(infraConfigs, machineConfigs, kcp, machine); !matches {
85+
rolloutReasons = append(rolloutReasons, mismatchReason)
86+
}
87+
88+
if len(rolloutReasons) > 0 {
89+
return fmt.Sprintf("Machine %s needs rollout: %s", machine.Name, strings.Join(rolloutReasons, ",")), true
90+
}
91+
92+
return "", false
5793
}
5894

59-
// MatchesTemplateClonedFrom returns a filter to find all machines that have a corresponding infrastructure machine that
60-
// matches a given KCP infra template.
95+
// matchesTemplateClonedFrom checks if a Machine has a corresponding infrastructure machine that
96+
// matches a given KCP infra template and if it doesn't match returns the reason why.
6197
// Note: Differences to the labels and annotations on the infrastructure machine are not considered for matching
6298
// criteria, because changes to labels and annotations are propagated in-place to the infrastructure machines.
6399
// TODO: This function will be renamed in a follow-up PR to something better. (ex: MatchesInfraMachine).
64-
func MatchesTemplateClonedFrom(infraConfigs map[string]*unstructured.Unstructured, kcp *controlplanev1.KubeadmControlPlane) collections.Func {
65-
return func(machine *clusterv1.Machine) bool {
66-
if machine == nil {
67-
return false
68-
}
69-
infraObj, found := infraConfigs[machine.Name]
70-
if !found {
71-
// Return true here because failing to get infrastructure machine should not be considered as unmatching.
72-
return true
73-
}
74-
75-
clonedFromName, ok1 := infraObj.GetAnnotations()[clusterv1.TemplateClonedFromNameAnnotation]
76-
clonedFromGroupKind, ok2 := infraObj.GetAnnotations()[clusterv1.TemplateClonedFromGroupKindAnnotation]
77-
if !ok1 || !ok2 {
78-
// All kcp cloned infra machines should have this annotation.
79-
// Missing the annotation may be due to older version machines or adopted machines.
80-
// Should not be considered as mismatch.
81-
return true
82-
}
100+
func matchesTemplateClonedFrom(infraConfigs map[string]*unstructured.Unstructured, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool) {
101+
if machine == nil {
102+
return "Machine cannot be compared with KCP.spec.machineTemplate.infrastructureRef: Machine is nil", false
103+
}
104+
infraObj, found := infraConfigs[machine.Name]
105+
if !found {
106+
// Return true here because failing to get infrastructure machine should not be considered as unmatching.
107+
return "", true
108+
}
83109

84-
// Check if the machine's infrastructure reference has been created from the current KCP infrastructure template.
85-
if clonedFromName != kcp.Spec.MachineTemplate.InfrastructureRef.Name ||
86-
clonedFromGroupKind != kcp.Spec.MachineTemplate.InfrastructureRef.GroupVersionKind().GroupKind().String() {
87-
return false
88-
}
110+
clonedFromName, ok1 := infraObj.GetAnnotations()[clusterv1.TemplateClonedFromNameAnnotation]
111+
clonedFromGroupKind, ok2 := infraObj.GetAnnotations()[clusterv1.TemplateClonedFromGroupKindAnnotation]
112+
if !ok1 || !ok2 {
113+
// All kcp cloned infra machines should have this annotation.
114+
// Missing the annotation may be due to older version machines or adopted machines.
115+
// Should not be considered as mismatch.
116+
return "", true
117+
}
89118

90-
return true
119+
// Check if the machine's infrastructure reference has been created from the current KCP infrastructure template.
120+
if clonedFromName != kcp.Spec.MachineTemplate.InfrastructureRef.Name ||
121+
clonedFromGroupKind != kcp.Spec.MachineTemplate.InfrastructureRef.GroupVersionKind().GroupKind().String() {
122+
return fmt.Sprintf("Infrastructure template on KCP rotated from %s %s to %s %s",
123+
clonedFromGroupKind, clonedFromName,
124+
kcp.Spec.MachineTemplate.InfrastructureRef.GroupVersionKind().GroupKind().String(), kcp.Spec.MachineTemplate.InfrastructureRef.Name), false
91125
}
126+
127+
return "", true
92128
}
93129

94-
// MatchesKubeadmBootstrapConfig checks if machine's KubeadmConfigSpec is equivalent with KCP's KubeadmConfigSpec.
130+
// matchesKubeadmBootstrapConfig checks if machine's KubeadmConfigSpec is equivalent with KCP's KubeadmConfigSpec.
95131
// Note: Differences to the labels and annotations on the KubeadmConfig are not considered for matching
96132
// criteria, because changes to labels and annotations are propagated in-place to KubeadmConfig.
97-
func MatchesKubeadmBootstrapConfig(machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane) collections.Func {
98-
return func(machine *clusterv1.Machine) bool {
99-
if machine == nil {
100-
return false
101-
}
133+
func matchesKubeadmBootstrapConfig(machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane, machine *clusterv1.Machine) (string, bool) {
134+
if machine == nil {
135+
return "Machine KubeadmConfig cannot be compared: Machine is nil", false
136+
}
102137

103-
// Check if KCP and machine ClusterConfiguration matches, if not return
104-
if match := matchClusterConfiguration(kcp, machine); !match {
105-
return false
106-
}
138+
// Check if KCP and machine ClusterConfiguration matches, if not return
139+
if !matchClusterConfiguration(kcp, machine) {
140+
return "Machine ClusterConfiguration is outdated", false
141+
}
107142

108-
bootstrapRef := machine.Spec.Bootstrap.ConfigRef
109-
if bootstrapRef == nil {
110-
// Missing bootstrap reference should not be considered as unmatching.
111-
// This is a safety precaution to avoid selecting machines that are broken, which in the future should be remediated separately.
112-
return true
113-
}
143+
bootstrapRef := machine.Spec.Bootstrap.ConfigRef
144+
if bootstrapRef == nil {
145+
// Missing bootstrap reference should not be considered as unmatching.
146+
// This is a safety precaution to avoid selecting machines that are broken, which in the future should be remediated separately.
147+
return "", true
148+
}
114149

115-
machineConfig, found := machineConfigs[machine.Name]
116-
if !found {
117-
// Return true here because failing to get KubeadmConfig should not be considered as unmatching.
118-
// This is a safety precaution to avoid rolling out machines if the client or the api-server is misbehaving.
119-
return true
120-
}
150+
machineConfig, found := machineConfigs[machine.Name]
151+
if !found {
152+
// Return true here because failing to get KubeadmConfig should not be considered as unmatching.
153+
// This is a safety precaution to avoid rolling out machines if the client or the api-server is misbehaving.
154+
return "", true
155+
}
121156

122-
// Check if KCP and machine InitConfiguration or JoinConfiguration matches
123-
// NOTE: only one between init configuration and join configuration is set on a machine, depending
124-
// on the fact that the machine was the initial control plane node or a joining control plane node.
125-
return matchInitOrJoinConfiguration(machineConfig, kcp)
157+
// Check if KCP and machine InitConfiguration or JoinConfiguration matches
158+
// NOTE: only one between init configuration and join configuration is set on a machine, depending
159+
// on the fact that the machine was the initial control plane node or a joining control plane node.
160+
if !matchInitOrJoinConfiguration(machineConfig, kcp) {
161+
return "Machine InitConfiguration or JoinConfiguration are outdated", false
126162
}
163+
164+
return "", true
127165
}
128166

129167
// matchClusterConfiguration verifies if KCP and machine ClusterConfiguration matches.

0 commit comments

Comments
 (0)