Skip to content

Commit c145c99

Browse files
fix(preflight): ensure MDs without overrides are also checked (#1216)
We were not adding preflight checks for worker machine deployments that did not have variable overrides set. Note: this is stacked on top of #1215 mostly for unit tests. Adding a do not merge label until #1215 is merged. **How has this been tested?** 1. Misconfigured storage container (doesn't actually exist) on cluster in machine details or on cluster in failure domain (e.g. `ncn-dev-sandbox-gpu` doesn't have `k8s` storage container) 2. Create failure domain ``` apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: NutanixFailureDomain metadata: name: fd-2 namespace: default spec: prismElementCluster: type: name name: ncn-dev-sandbox-gpu subnets: - type: name name: subnet-2 ``` 3. Create a cluster without the changes in this PR ``` apiVersion: cluster.x-k8s.io/v1beta1 kind: Cluster metadata: ... spec: ... topology: class: nutanix-quick-start controlPlane: metadata: {} replicas: 3 variables: ... - name: workerConfig value: nutanix: machineDetails: bootType: uefi cluster: name: ncn-dev-sandbox-gpu type: name imageLookup: baseOS: rocky-9.6 format: nkp-{{.BaseOS}}-release-{{.K8sVersion}}-* memorySize: 4Gi subnets: - name: vlan173 type: name systemDiskSize: 40Gi vcpuSockets: 2 vcpusPerSocket: 1 version: 1.33.1 workers: machineDeployments: - class: default-worker name: md-variable-override variables: overrides: - name: workerConfig value: nutanix: machineDetails: ... cluster: name: ncn-dev-sandbox-gpu type: name - class: default-worker failureDomain: fd-2 name: md-variable-override-failure-domain variables: overrides: - name: workerConfig value: nutanix: machineDetails: ... cluster: name: ncn-dev-sandbox-gpu type: name - class: default-worker name: md-no-overrides - class: default-worker failureDomain: fd-2 name: md-no-overrides-failure-domain ``` 4. Observer pre-flight failure on only 2 machine deployments ``` The request is invalid: * $.spec.topology.workers.machineDeployments[[email protected]=="md-variable-override-failure-domain"].failureDomain: Found no Storage Containers with name "k8s" on Cluster "ncn-dev-sandbox-gpu". Create a Storage Container with this name on Cluster "ncn-dev-sandbox-gpu", and then retry. * $.spec.topology.workers.machineDeployments[[email protected]=="md-variable-override"].variables[[email protected]=workerConfig].value.nutanix.machineDetails: Found no Storage Containers with name "k8s" on Cluster "ncn-dev-sandbox-gpu". Create a Storage Container with this name on Cluster "ncn-dev-sandbox-gpu", and then retry. ``` 6. Create the same cluster with changes in this PR and observe pre-flight failure on all 4 machine deployments ``` The request is invalid: * $.spec.topology.workers.machineDeployments[[email protected]=="md-variable-override-failure-domain"].failureDomain: Found no Storage Containers with name "k8s" on Cluster "ncn-dev-sandbox-gpu". Create a Storage Container with this name on Cluster "ncn-dev-sandbox-gpu", and then retry. * $.spec.topology.workers.machineDeployments[[email protected]=="md-no-overrides"].variables[[email protected]=workerConfig].value.nutanix.machineDetails: Found no Storage Containers with name "k8s" on Cluster "ncn-dev-sandbox-gpu". Create a Storage Container with this name on Cluster "ncn-dev-sandbox-gpu", and then retry. * $.spec.topology.workers.machineDeployments[[email protected]=="md-no-overrides-failure-domain"].failureDomain: Found no Storage Containers with name "k8s" on Cluster "ncn-dev-sandbox-gpu". Create a Storage Container with this name on Cluster "ncn-dev-sandbox-gpu", and then retry. * $.spec.topology.workers.machineDeployments[[email protected]=="md-variable-override"].variables[[email protected]=workerConfig].value.nutanix.machineDetails: Found no Storage Containers with name "k8s" on Cluster "ncn-dev-sandbox-gpu". Create a Storage Container with this name on Cluster "ncn-dev-sandbox-gpu", and then retry. ```
1 parent e6aaf8d commit c145c99

File tree

2 files changed

+198
-14
lines changed

2 files changed

+198
-14
lines changed

pkg/webhook/preflight/nutanix/specs.go

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"context"
88
"fmt"
99

10+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
11+
1012
carenv1 "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/api/v1alpha1"
1113
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/api/variables"
1214
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/webhook/preflight"
@@ -77,12 +79,41 @@ func newConfigurationCheck(
7779
failureDomainByMachineDeploymentName[md.Name] = *md.FailureDomain
7880
}
7981

80-
if md.Variables == nil {
82+
var workerConfigVar *clusterv1.ClusterVariable
83+
var workerConfigFieldPath string
84+
if md.Variables != nil {
85+
workerConfigVar = variables.GetClusterVariableByName(
86+
carenv1.WorkerConfigVariableName,
87+
md.Variables.Overrides,
88+
)
89+
if workerConfigVar != nil {
90+
workerConfigFieldPath = fmt.Sprintf(
91+
"$.spec.topology.workers.machineDeployments[[email protected]==%q].variables[[email protected]=='%s'].value.nutanix",
92+
md.Name,
93+
carenv1.WorkerConfigVariableName,
94+
)
95+
}
96+
}
97+
if workerConfigVar == nil {
98+
workerConfigVar = variables.GetClusterVariableByName(
99+
carenv1.WorkerConfigVariableName,
100+
cd.cluster.Spec.Topology.Variables,
101+
)
102+
if workerConfigVar != nil {
103+
workerConfigFieldPath = fmt.Sprintf(
104+
"$.spec.topology.variables[[email protected]=='%s'].value.nutanix",
105+
carenv1.WorkerConfigVariableName,
106+
)
107+
}
108+
}
109+
110+
if workerConfigVar == nil {
81111
continue
82112
}
113+
83114
nutanixWorkerNodeConfigSpec := &carenv1.NutanixWorkerNodeConfigSpec{}
84115
err := variables.UnmarshalClusterVariable(
85-
variables.GetClusterVariableByName(carenv1.WorkerConfigVariableName, md.Variables.Overrides),
116+
workerConfigVar,
86117
nutanixWorkerNodeConfigSpec,
87118
)
88119
if err != nil {
@@ -92,24 +123,22 @@ func newConfigurationCheck(
92123
configurationCheck.result.Causes = append(configurationCheck.result.Causes,
93124
preflight.Cause{
94125
Message: fmt.Sprintf(
95-
"Failed to unmarshal topology machineDeployment variable %q: %s. Review the Cluster.", ///nolint:lll // Message is long.
126+
"Failed to unmarshal variable %q: %s. Review the Cluster.", ///nolint:lll // Message is long.
96127
carenv1.WorkerConfigVariableName,
97128
err,
98129
),
99-
//nolint:lll // The field is long.
100-
Field: fmt.Sprintf(
101-
"$.spec.topology.workers.machineDeployments[[email protected]==%q].variables[[email protected]=workerConfig].value.nutanix.machineDetails",
102-
md.Name,
103-
),
130+
Field: workerConfigFieldPath,
104131
},
105132
)
106133
}
134+
107135
// Save the NutanixWorkerNodeConfigSpec only if it contains Nutanix configuration.
108136
if nutanixWorkerNodeConfigSpec.Nutanix != nil {
109137
nutanixWorkerNodeConfigSpecByMachineDeploymentName[md.Name] = nutanixWorkerNodeConfigSpec
110138
}
111139
}
112140
}
141+
113142
// Save the NutanixWorkerNodeConfigSpecByMachineDeploymentName only if it contains at least one Nutanix configuration.
114143
if len(nutanixWorkerNodeConfigSpecByMachineDeploymentName) > 0 {
115144
cd.nutanixWorkerNodeConfigSpecByMachineDeploymentName = nutanixWorkerNodeConfigSpecByMachineDeploymentName

pkg/webhook/preflight/nutanix/specs_test.go

Lines changed: 161 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/go-logr/logr/testr"
1111
"github.com/stretchr/testify/assert"
1212
v1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
13+
"k8s.io/utils/ptr"
1314
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1415

1516
carenv1 "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/api/v1alpha1"
@@ -58,7 +59,7 @@ func TestNewConfigurationCheck(t *testing.T) {
5859
Name: carenv1.ClusterConfigVariableName,
5960
Value: v1.JSON{
6061
Raw: []byte(
61-
`{"controlPlane": {"nutanix": {"prismElement": {"address": "pe.example.com"}}}}`,
62+
`{"controlPlane": {"nutanix": {"cluster": {"name": "cluster-from-variable"}}}}`,
6263
),
6364
},
6465
},
@@ -96,7 +97,7 @@ func TestNewConfigurationCheck(t *testing.T) {
9697
Name: carenv1.WorkerConfigVariableName,
9798
Value: v1.JSON{
9899
Raw: []byte(
99-
`{"nutanix": {"prismElement": {"address": "pe.example.com"}}}`,
100+
`{"nutanix": {"cluster": {"name": "cluster-from-override"}}}`,
100101
),
101102
},
102103
},
@@ -183,8 +184,8 @@ func TestNewConfigurationCheck(t *testing.T) {
183184
InternalError: true,
184185
Causes: []preflight.Cause{
185186
{
186-
Message: "Failed to unmarshal topology machineDeployment variable \"workerConfig\": failed to unmarshal json: invalid character 'i' looking for beginning of object key string. Review the Cluster.", ///nolint:lll // The message is long.
187-
Field: "$.spec.topology.workers.machineDeployments[[email protected]==\"md-0\"].variables[[email protected]=workerConfig].value.nutanix.machineDetails", ///nolint:lll // The field is long.
187+
Message: "Failed to unmarshal variable \"workerConfig\": failed to unmarshal json: invalid character 'i' looking for beginning of object key string. Review the Cluster.", ///nolint:lll // The message is long.
188+
Field: "$.spec.topology.workers.machineDeployments[[email protected]==\"md-0\"].variables[[email protected]=='workerConfig'].value.nutanix", ///nolint:lll // The field is long.
188189
},
189190
},
190191
},
@@ -238,7 +239,7 @@ func TestNewConfigurationCheck(t *testing.T) {
238239
Name: carenv1.WorkerConfigVariableName,
239240
Value: v1.JSON{
240241
Raw: []byte(
241-
`{"nutanix": {"prismElement": {"address": "pe1.example.com"}}}`,
242+
`{"nutanix": {"cluster": {"name": "cluster-from-override"}}}`,
242243
),
243244
},
244245
},
@@ -253,7 +254,7 @@ func TestNewConfigurationCheck(t *testing.T) {
253254
Name: carenv1.WorkerConfigVariableName,
254255
Value: v1.JSON{
255256
Raw: []byte(
256-
`{"nutanix": {"prismElement": {"address": "pe2.example.com"}}}`,
257+
`{"nutanix": {"cluster": {"name": "cluster-from-override"}}}`,
257258
),
258259
},
259260
},
@@ -272,6 +273,89 @@ func TestNewConfigurationCheck(t *testing.T) {
272273
expectedWorkerNodeConfigSpecMapNotEmpty: true,
273274
expectedWorkerNodeConfigSpecMapEntryCount: 2,
274275
},
276+
{
277+
name: "worker config from cluster variables",
278+
cluster: &clusterv1.Cluster{
279+
Spec: clusterv1.ClusterSpec{
280+
Topology: &clusterv1.Topology{
281+
Variables: []clusterv1.ClusterVariable{
282+
{
283+
Name: carenv1.ClusterConfigVariableName,
284+
Value: v1.JSON{
285+
Raw: []byte(`{}`),
286+
},
287+
},
288+
{
289+
Name: carenv1.WorkerConfigVariableName,
290+
Value: v1.JSON{
291+
Raw: []byte(
292+
`{"nutanix": {"cluster": {"name": "cluster-from-variable"}}}`,
293+
),
294+
},
295+
},
296+
},
297+
Workers: &clusterv1.WorkersTopology{
298+
MachineDeployments: []clusterv1.MachineDeploymentTopology{
299+
{
300+
Name: "md-0",
301+
},
302+
},
303+
},
304+
},
305+
},
306+
},
307+
expectedResult: preflight.CheckResult{
308+
Allowed: true,
309+
},
310+
expectedNutanixClusterConfigSpec: false,
311+
expectedWorkerNodeConfigSpecMapNotEmpty: true,
312+
expectedWorkerNodeConfigSpecMapEntryCount: 1,
313+
},
314+
{
315+
name: "worker config with failure domain",
316+
cluster: &clusterv1.Cluster{
317+
Spec: clusterv1.ClusterSpec{
318+
Topology: &clusterv1.Topology{
319+
Variables: []clusterv1.ClusterVariable{
320+
{
321+
Name: carenv1.ClusterConfigVariableName,
322+
Value: v1.JSON{
323+
Raw: []byte(
324+
`{"nutanix": {"failureDomains": ["fd-1", "fd-2", "fd-3"]}}`,
325+
),
326+
},
327+
},
328+
},
329+
Workers: &clusterv1.WorkersTopology{
330+
MachineDeployments: []clusterv1.MachineDeploymentTopology{
331+
{
332+
Name: "md-0",
333+
Variables: &clusterv1.MachineDeploymentVariables{
334+
Overrides: []clusterv1.ClusterVariable{
335+
{
336+
Name: carenv1.WorkerConfigVariableName,
337+
Value: v1.JSON{
338+
Raw: []byte(
339+
`{"nutanix": {"cluster": {"name": "worker-cluster"}, "subnets": [{"name": "worker-subnet"}]}}`,
340+
),
341+
},
342+
},
343+
},
344+
},
345+
FailureDomain: ptr.To("fd-1"),
346+
},
347+
},
348+
},
349+
},
350+
},
351+
},
352+
expectedResult: preflight.CheckResult{
353+
Allowed: true,
354+
},
355+
expectedNutanixClusterConfigSpec: true,
356+
expectedWorkerNodeConfigSpecMapNotEmpty: true,
357+
expectedWorkerNodeConfigSpecMapEntryCount: 1,
358+
},
275359
{
276360
name: "worker config without nutanix field",
277361
cluster: &clusterv1.Cluster{
@@ -342,6 +426,77 @@ func TestNewConfigurationCheck(t *testing.T) {
342426
expectedWorkerNodeConfigSpecMapNotEmpty: false,
343427
expectedWorkerNodeConfigSpecMapEntryCount: 0,
344428
},
429+
{
430+
name: "mixed worker scenarios - with/without overrides and with/without failure domains",
431+
cluster: &clusterv1.Cluster{
432+
Spec: clusterv1.ClusterSpec{
433+
Topology: &clusterv1.Topology{
434+
Variables: []clusterv1.ClusterVariable{
435+
{
436+
Name: carenv1.ClusterConfigVariableName,
437+
Value: v1.JSON{
438+
Raw: []byte(`{"nutanix": {"failureDomains": ["fd-1", "fd-2", "fd-3"]}}`),
439+
},
440+
},
441+
{
442+
Name: carenv1.WorkerConfigVariableName,
443+
Value: v1.JSON{
444+
Raw: []byte(`{"nutanix": {"cluster": {"name": "cluster-from-variable"}}}`),
445+
},
446+
},
447+
},
448+
Workers: &clusterv1.WorkersTopology{
449+
MachineDeployments: []clusterv1.MachineDeploymentTopology{
450+
{
451+
Name: "md-with-overrides",
452+
Variables: &clusterv1.MachineDeploymentVariables{
453+
Overrides: []clusterv1.ClusterVariable{
454+
{
455+
Name: carenv1.WorkerConfigVariableName,
456+
Value: v1.JSON{
457+
Raw: []byte(
458+
`{"nutanix": {"cluster": {"name": "cluster-from-override"}}}`,
459+
),
460+
},
461+
},
462+
},
463+
},
464+
},
465+
{
466+
Name: "md-without-overrides",
467+
},
468+
{
469+
Name: "md-with-overrides-and-fd",
470+
FailureDomain: ptr.To("fd-1"),
471+
Variables: &clusterv1.MachineDeploymentVariables{
472+
Overrides: []clusterv1.ClusterVariable{
473+
{
474+
Name: carenv1.WorkerConfigVariableName,
475+
Value: v1.JSON{
476+
Raw: []byte(
477+
`{"nutanix": {"cluster": {"name": "cluster-from-override"}}}`,
478+
),
479+
},
480+
},
481+
},
482+
},
483+
},
484+
{
485+
Name: "md-without-overrides-and-fd",
486+
FailureDomain: ptr.To("fd-1"),
487+
},
488+
},
489+
},
490+
},
491+
},
492+
},
493+
expectedResult: preflight.CheckResult{
494+
Allowed: true,
495+
},
496+
expectedNutanixClusterConfigSpec: true,
497+
expectedWorkerNodeConfigSpecMapNotEmpty: true,
498+
expectedWorkerNodeConfigSpecMapEntryCount: 4,
499+
},
345500
}
346501

347502
for _, tt := range tests {

0 commit comments

Comments
 (0)