Skip to content

Commit 542ac03

Browse files
authored
Merge pull request #7498 from killianmuldoon/mhc/improve-validation
🐛 Improve MHC validation for topology-managed MHC
2 parents 80e7c65 + 667b272 commit 542ac03

File tree

5 files changed

+245
-79
lines changed

5 files changed

+245
-79
lines changed

api/v1beta1/machinehealthcheck_webhook.go

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -136,37 +136,52 @@ func (m *MachineHealthCheck) validate(old *MachineHealthCheck) error {
136136
)
137137
}
138138

139+
allErrs = append(allErrs, m.ValidateCommonFields(specPath)...)
140+
141+
if len(allErrs) == 0 {
142+
return nil
143+
}
144+
return apierrors.NewInvalid(GroupVersion.WithKind("MachineHealthCheck").GroupKind(), m.Name, allErrs)
145+
}
146+
147+
// ValidateCommonFields validates UnhealthyConditions NodeStartupTimeout, MaxUnhealthy, and RemediationTemplate of the MHC.
148+
// These are the fields in common with other types which define MachineHealthChecks such as MachineHealthCheckClass and MachineHealthCheckTopology.
149+
func (m *MachineHealthCheck) ValidateCommonFields(fldPath *field.Path) field.ErrorList {
150+
var allErrs field.ErrorList
151+
139152
if m.Spec.NodeStartupTimeout != nil &&
140153
m.Spec.NodeStartupTimeout.Seconds() != disabledNodeStartupTimeout.Seconds() &&
141154
m.Spec.NodeStartupTimeout.Seconds() < minNodeStartupTimeout.Seconds() {
142155
allErrs = append(
143156
allErrs,
144-
field.Invalid(specPath.Child("nodeStartupTimeout"), m.Spec.NodeStartupTimeout.Seconds(), "must be at least 30s"),
157+
field.Invalid(fldPath.Child("nodeStartupTimeout"), m.Spec.NodeStartupTimeout.String(), "must be at least 30s"),
145158
)
146159
}
147-
148160
if m.Spec.MaxUnhealthy != nil {
149161
if _, err := intstr.GetScaledValueFromIntOrPercent(m.Spec.MaxUnhealthy, 0, false); err != nil {
150162
allErrs = append(
151163
allErrs,
152-
field.Invalid(specPath.Child("maxUnhealthy"), m.Spec.MaxUnhealthy, fmt.Sprintf("must be either an int or a percentage: %v", err.Error())),
164+
field.Invalid(fldPath.Child("maxUnhealthy"), m.Spec.MaxUnhealthy, fmt.Sprintf("must be either an int or a percentage: %v", err.Error())),
153165
)
154166
}
155167
}
156-
157168
if m.Spec.RemediationTemplate != nil && m.Spec.RemediationTemplate.Namespace != m.Namespace {
158169
allErrs = append(
159170
allErrs,
160171
field.Invalid(
161-
specPath.Child("remediationTemplate", "namespace"),
172+
fldPath.Child("remediationTemplate", "namespace"),
162173
m.Spec.RemediationTemplate.Namespace,
163174
"must match metadata.namespace",
164175
),
165176
)
166177
}
167178

168-
if len(allErrs) == 0 {
169-
return nil
179+
if len(m.Spec.UnhealthyConditions) == 0 {
180+
allErrs = append(allErrs, field.Forbidden(
181+
fldPath.Child("unhealthyConditions"),
182+
"must have at least one entry",
183+
))
170184
}
171-
return apierrors.NewInvalid(GroupVersion.WithKind("MachineHealthCheck").GroupKind(), m.Name, allErrs)
185+
186+
return allErrs
172187
}

api/v1beta1/machinehealthcheck_webhook_test.go

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ func TestMachineHealthCheckDefault(t *testing.T) {
3939
MatchLabels: map[string]string{"foo": "bar"},
4040
},
4141
RemediationTemplate: &corev1.ObjectReference{},
42+
UnhealthyConditions: []UnhealthyCondition{
43+
{
44+
Type: corev1.NodeReady,
45+
Status: corev1.ConditionFalse,
46+
},
47+
},
4248
},
4349
}
4450
t.Run("for MachineHealthCheck", utildefaulting.DefaultValidateTest(mhc))
@@ -77,6 +83,12 @@ func TestMachineHealthCheckLabelSelectorAsSelectorValidation(t *testing.T) {
7783
Selector: metav1.LabelSelector{
7884
MatchLabels: tt.selectors,
7985
},
86+
UnhealthyConditions: []UnhealthyCondition{
87+
{
88+
Type: corev1.NodeReady,
89+
Status: corev1.ConditionFalse,
90+
},
91+
},
8092
},
8193
}
8294
if tt.expectErr {
@@ -123,6 +135,12 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) {
123135
"test": "test",
124136
},
125137
},
138+
UnhealthyConditions: []UnhealthyCondition{
139+
{
140+
Type: corev1.NodeReady,
141+
Status: corev1.ConditionFalse,
142+
},
143+
},
126144
},
127145
}
128146
oldMHC := &MachineHealthCheck{
@@ -133,6 +151,12 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) {
133151
"test": "test",
134152
},
135153
},
154+
UnhealthyConditions: []UnhealthyCondition{
155+
{
156+
Type: corev1.NodeReady,
157+
Status: corev1.ConditionFalse,
158+
},
159+
},
136160
},
137161
}
138162

@@ -145,6 +169,58 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) {
145169
}
146170
}
147171

172+
func TestMachineHealthCheckUnhealthyConditions(t *testing.T) {
173+
tests := []struct {
174+
name string
175+
unhealthConditions []UnhealthyCondition
176+
expectErr bool
177+
}{
178+
{
179+
name: "pass with correctly defined unhealthyConditions",
180+
unhealthConditions: []UnhealthyCondition{
181+
{
182+
Type: corev1.NodeReady,
183+
Status: corev1.ConditionFalse,
184+
},
185+
},
186+
expectErr: false,
187+
},
188+
{
189+
name: "fail if the UnhealthCondition array is nil",
190+
unhealthConditions: nil,
191+
expectErr: true,
192+
},
193+
{
194+
name: "fail if the UnhealthCondition array is empty",
195+
unhealthConditions: []UnhealthyCondition{},
196+
expectErr: true,
197+
},
198+
}
199+
200+
for _, tt := range tests {
201+
t.Run(tt.name, func(t *testing.T) {
202+
g := NewWithT(t)
203+
mhc := &MachineHealthCheck{
204+
Spec: MachineHealthCheckSpec{
205+
Selector: metav1.LabelSelector{
206+
MatchLabels: map[string]string{
207+
"test": "test",
208+
},
209+
},
210+
UnhealthyConditions: tt.unhealthConditions,
211+
},
212+
}
213+
if tt.expectErr {
214+
g.Expect(mhc.ValidateCreate()).NotTo(Succeed())
215+
g.Expect(mhc.ValidateUpdate(mhc)).NotTo(Succeed())
216+
} else {
217+
g.Expect(mhc.ValidateCreate()).To(Succeed())
218+
g.Expect(mhc.ValidateUpdate(mhc)).To(Succeed())
219+
}
220+
})
221+
}
222+
}
223+
148224
func TestMachineHealthCheckNodeStartupTimeout(t *testing.T) {
149225
zero := metav1.Duration{Duration: 0}
150226
twentyNineSeconds := metav1.Duration{Duration: 29 * time.Second}
@@ -200,6 +276,12 @@ func TestMachineHealthCheckNodeStartupTimeout(t *testing.T) {
200276
"test": "test",
201277
},
202278
},
279+
UnhealthyConditions: []UnhealthyCondition{
280+
{
281+
Type: corev1.NodeReady,
282+
Status: corev1.ConditionFalse,
283+
},
284+
},
203285
},
204286
}
205287

@@ -253,6 +335,12 @@ func TestMachineHealthCheckMaxUnhealthy(t *testing.T) {
253335
"test": "test",
254336
},
255337
},
338+
UnhealthyConditions: []UnhealthyCondition{
339+
{
340+
Type: corev1.NodeReady,
341+
Status: corev1.ConditionFalse,
342+
},
343+
},
256344
},
257345
}
258346

@@ -268,7 +356,16 @@ func TestMachineHealthCheckMaxUnhealthy(t *testing.T) {
268356

269357
func TestMachineHealthCheckSelectorValidation(t *testing.T) {
270358
g := NewWithT(t)
271-
mhc := &MachineHealthCheck{}
359+
mhc := &MachineHealthCheck{
360+
Spec: MachineHealthCheckSpec{
361+
UnhealthyConditions: []UnhealthyCondition{
362+
{
363+
Type: corev1.NodeReady,
364+
Status: corev1.ConditionFalse,
365+
},
366+
},
367+
},
368+
}
272369
err := mhc.validate(nil)
273370
g.Expect(err).ToNot(BeNil())
274371
g.Expect(err.Error()).To(ContainSubstring("selector must not be empty"))
@@ -285,6 +382,12 @@ func TestMachineHealthCheckClusterNameSelectorValidation(t *testing.T) {
285382
"baz": "qux",
286383
},
287384
},
385+
UnhealthyConditions: []UnhealthyCondition{
386+
{
387+
Type: corev1.NodeReady,
388+
Status: corev1.ConditionFalse,
389+
},
390+
},
288391
},
289392
}
290393
err := mhc.validate(nil)
@@ -305,6 +408,12 @@ func TestMachineHealthCheckRemediationTemplateNamespaceValidation(t *testing.T)
305408
Spec: MachineHealthCheckSpec{
306409
Selector: metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}},
307410
RemediationTemplate: &corev1.ObjectReference{Namespace: "foo"},
411+
UnhealthyConditions: []UnhealthyCondition{
412+
{
413+
Type: corev1.NodeReady,
414+
Status: corev1.ConditionFalse,
415+
},
416+
},
308417
},
309418
}
310419
invalid := valid.DeepCopy()

internal/webhooks/cluster.go

Lines changed: 51 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -366,64 +366,64 @@ func (webhook *Cluster) getClusterClassForCluster(ctx context.Context, cluster *
366366
func validateMachineHealthChecks(cluster *clusterv1.Cluster, clusterClass *clusterv1.ClusterClass) field.ErrorList {
367367
var allErrs field.ErrorList
368368

369-
// Validate ControlPlane MachineHealthCheck if defined.
370-
if cluster.Spec.Topology.ControlPlane.MachineHealthCheck != nil && !cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() {
371-
// Ensure ControlPlane does not define a MachineHealthCheck if the ClusterClass does not define MachineInfrastructure.
372-
if clusterClass.Spec.ControlPlane.MachineInfrastructure == nil {
373-
allErrs = append(allErrs, field.Forbidden(
374-
field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck"),
375-
"can be set only if spec.controlPlane.machineInfrastructure is set in ClusterClass",
376-
))
377-
}
378-
// Ensure ControlPlane MachineHealthCheck defines UnhealthyConditions.
379-
if len(cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.UnhealthyConditions) == 0 {
380-
allErrs = append(allErrs, field.Forbidden(
381-
field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck", "unhealthyConditions"),
382-
"must have at least one value",
383-
))
369+
if cluster.Spec.Topology.ControlPlane.MachineHealthCheck != nil {
370+
fldPath := field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck")
371+
372+
// Validate ControlPlane MachineHealthCheck if defined.
373+
if !cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() {
374+
// Ensure ControlPlane does not define a MachineHealthCheck if the ClusterClass does not define MachineInfrastructure.
375+
if clusterClass.Spec.ControlPlane.MachineInfrastructure == nil {
376+
allErrs = append(allErrs, field.Forbidden(
377+
fldPath,
378+
"can be set only if spec.controlPlane.machineInfrastructure is set in ClusterClass",
379+
))
380+
}
381+
allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, cluster.Namespace,
382+
&cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass)...)
384383
}
385-
}
386384

387-
// If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is
388-
// available either in the Cluster topology or in the ClusterClass.
389-
// (One of these definitions will be used in the controller to create the MachineHealthCheck)
390-
if cluster.Spec.Topology.ControlPlane.MachineHealthCheck != nil &&
391-
cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable != nil &&
392-
*cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable &&
393-
cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() &&
394-
clusterClass.Spec.ControlPlane.MachineHealthCheck == nil {
395-
allErrs = append(allErrs, field.Forbidden(
396-
field.NewPath("spec", "topology", "controlPlane", "machineHealthCheck", "enable"),
397-
fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable),
398-
))
385+
// If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is
386+
// available either in the Cluster topology or in the ClusterClass.
387+
// (One of these definitions will be used in the controller to create the MachineHealthCheck)
388+
389+
// Check if the machineHealthCheck is explicitly enabled in the ControlPlaneTopology.
390+
if cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable != nil && *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable {
391+
// Ensure the MHC is defined in at least one of the ControlPlaneTopology of the Cluster or the ControlPlaneClass of the ClusterClass.
392+
if cluster.Spec.Topology.ControlPlane.MachineHealthCheck.MachineHealthCheckClass.IsZero() && clusterClass.Spec.ControlPlane.MachineHealthCheck == nil {
393+
allErrs = append(allErrs, field.Forbidden(
394+
fldPath.Child("enable"),
395+
fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *cluster.Spec.Topology.ControlPlane.MachineHealthCheck.Enable),
396+
))
397+
}
398+
}
399399
}
400400

401401
if cluster.Spec.Topology.Workers != nil {
402402
for i, md := range cluster.Spec.Topology.Workers.MachineDeployments {
403-
// If MachineHealthCheck is defined ensure it defines UnhealthyConditions.
404-
if md.MachineHealthCheck != nil && !md.MachineHealthCheck.MachineHealthCheckClass.IsZero() {
405-
if len(md.MachineHealthCheck.MachineHealthCheckClass.UnhealthyConditions) == 0 {
406-
allErrs = append(allErrs, field.Forbidden(
407-
field.NewPath("spec", "topology", "workers", "machineDeployments", "machineHealthCheck").Index(i).Child("unhealthyConditions"),
408-
"must have at least one value",
409-
))
403+
if md.MachineHealthCheck != nil {
404+
fldPath := field.NewPath("spec", "topology", "workers", "machineDeployments", "machineHealthCheck").Index(i)
405+
406+
// Validate the MachineDeployment MachineHealthCheck if defined.
407+
if !md.MachineHealthCheck.MachineHealthCheckClass.IsZero() {
408+
allErrs = append(allErrs, validateMachineHealthCheckClass(fldPath, cluster.Namespace,
409+
&md.MachineHealthCheck.MachineHealthCheckClass)...)
410410
}
411-
}
412411

413-
// If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is
414-
// available either in the Cluster topology or in the ClusterClass.
415-
// (One of these definitions will be used in the controller to create the MachineHealthCheck)
416-
mdClass := machineDeploymentClassOfName(clusterClass, md.Class)
417-
if mdClass != nil { // Note: we skip handling the nil case here as it is already handled in previous validations.
418-
if md.MachineHealthCheck != nil &&
419-
md.MachineHealthCheck.Enable != nil &&
420-
*md.MachineHealthCheck.Enable &&
421-
md.MachineHealthCheck.MachineHealthCheckClass.IsZero() &&
422-
mdClass.MachineHealthCheck == nil {
423-
allErrs = append(allErrs, field.Forbidden(
424-
field.NewPath("spec", "topology", "workers", "machineDeployments", "machineHealthCheck").Index(i).Child("enable"),
425-
fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *md.MachineHealthCheck.Enable),
426-
))
412+
// If MachineHealthCheck is explicitly enabled then make sure that a MachineHealthCheck definition is
413+
// available either in the Cluster topology or in the ClusterClass.
414+
// (One of these definitions will be used in the controller to create the MachineHealthCheck)
415+
mdClass := machineDeploymentClassOfName(clusterClass, md.Class)
416+
if mdClass != nil { // Note: we skip handling the nil case here as it is already handled in previous validations.
417+
// Check if the machineHealthCheck is explicitly enabled in the machineDeploymentTopology.
418+
if md.MachineHealthCheck.Enable != nil && *md.MachineHealthCheck.Enable {
419+
// Ensure the MHC is defined in at least one of the MachineDeploymentTopology of the Cluster or the MachineDeploymentClass of the ClusterClass.
420+
if md.MachineHealthCheck.MachineHealthCheckClass.IsZero() && mdClass.MachineHealthCheck == nil {
421+
allErrs = append(allErrs, field.Forbidden(
422+
fldPath.Child("enable"),
423+
fmt.Sprintf("cannot be set to %t as MachineHealthCheck definition is not available in the Cluster topology or the ClusterClass", *md.MachineHealthCheck.Enable),
424+
))
425+
}
426+
}
427427
}
428428
}
429429
}
@@ -433,7 +433,7 @@ func validateMachineHealthChecks(cluster *clusterv1.Cluster, clusterClass *clust
433433
}
434434

435435
// machineDeploymentClassOfName find a MachineDeploymentClass of the given name in the provided ClusterClass.
436-
// Returns nill if can not find one.
436+
// Returns nil if it can not find one.
437437
// TODO: Check if there is already a helper function that can do this.
438438
func machineDeploymentClassOfName(clusterClass *clusterv1.ClusterClass, name string) *clusterv1.MachineDeploymentClass {
439439
for _, mdClass := range clusterClass.Spec.Workers.MachineDeployments {

0 commit comments

Comments
 (0)