Skip to content

Commit d9226f2

Browse files
Make Cluster webhook less strict for out of date ClusterClasses
1 parent cf6f77a commit d9226f2

File tree

3 files changed

+162
-30
lines changed

3 files changed

+162
-30
lines changed

cmd/clusterctl/client/cluster/mover.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ import (
3636

3737
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3838
logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log"
39-
"sigs.k8s.io/cluster-api/util/annotations"
4039
"sigs.k8s.io/cluster-api/util/conditions"
4140
"sigs.k8s.io/cluster-api/util/patch"
4241
"sigs.k8s.io/cluster-api/util/yaml"
@@ -639,7 +638,7 @@ func pauseClusterClass(proxy Proxy, n *node, pause bool) error {
639638
// Update the annotation to the desired state
640639
ccAnnotations := clusterClass.GetAnnotations()
641640
if ccAnnotations == nil {
642-
ccAnnotations = make(map[string]string)
641+
ccAnnotations = map[string]string{}
643642
}
644643
if pause {
645644
// Set the pause annotation.
@@ -649,12 +648,8 @@ func pauseClusterClass(proxy Proxy, n *node, pause bool) error {
649648
delete(ccAnnotations, clusterv1.PausedAnnotation)
650649
}
651650

652-
// If the ClusterClass is already at desired state return early.
653-
if !annotations.AddAnnotations(clusterClass, ccAnnotations) {
654-
return nil
655-
}
656-
657-
// Update the cluster class with the new annotations.
651+
// Update the ClusterClass with the new annotations.
652+
clusterClass.SetAnnotations(ccAnnotations)
658653
if err := patchHelper.Patch(ctx, clusterClass); err != nil {
659654
return errors.Wrapf(err, "error patching ClusterClass %s/%s", n.identity.Namespace, n.identity.Name)
660655
}

internal/webhooks/cluster.go

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ type Cluster struct {
6262
var _ webhook.CustomDefaulter = &Cluster{}
6363
var _ webhook.CustomValidator = &Cluster{}
6464

65+
var errClusterClassNotReconciled = errors.New("ClusterClass is not up to date")
66+
6567
// Default satisfies the defaulting webhook interface.
6668
func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error {
6769
// We gather all defaulting errors and return them together.
@@ -88,11 +90,11 @@ func (webhook *Cluster) Default(ctx context.Context, obj runtime.Object) error {
8890
}
8991
clusterClass, err := webhook.pollClusterClassForCluster(ctx, cluster)
9092
if err != nil {
91-
// If the ClusterClass can't be found ignore the error.
92-
if apierrors.IsNotFound(err) {
93+
// If the ClusterClass can't be found or is not up to date ignore the error.
94+
if apierrors.IsNotFound(err) || errors.Is(err, errClusterClassNotReconciled) {
9395
return nil
9496
}
95-
return apierrors.NewInternalError(errors.Wrapf(err, "Cluster %s can't be defaulted. Valid ClusterClass %s can not be retrieved", cluster.Name, cluster.Spec.Topology.Class))
97+
return apierrors.NewInternalError(errors.Wrapf(err, "Cluster %s can't be defaulted. ClusterClass %s can not be retrieved", cluster.Name, cluster.Spec.Topology.Class))
9698
}
9799

98100
allErrs = append(allErrs, DefaultVariables(cluster, clusterClass)...)
@@ -242,27 +244,28 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
242244
}
243245

244246
// Get the ClusterClass referenced in the Cluster.
245-
clusterClass, clusterClassGetErr := webhook.pollClusterClassForCluster(ctx, newCluster)
246-
if clusterClassGetErr != nil && !apierrors.IsNotFound(clusterClassGetErr) {
247-
// If the error is anything other than "Not Found" return all errors at this point.
247+
clusterClass, clusterClassPollErr := webhook.pollClusterClassForCluster(ctx, newCluster)
248+
if clusterClassPollErr != nil &&
249+
// If the error is anything other than "NotFound" or "NotReconciled" return all errors at this point.
250+
!(apierrors.IsNotFound(clusterClassPollErr) || errors.Is(clusterClassPollErr, errClusterClassNotReconciled)) {
248251
allErrs = append(
249252
allErrs, field.InternalError(
250253
fldPath.Child("class"),
251-
clusterClassGetErr))
254+
clusterClassPollErr))
252255
return allErrs
253256
}
254-
if clusterClassGetErr == nil {
257+
if clusterClassPollErr == nil {
255258
// If there's no error validate the Cluster based on the ClusterClass.
256259
allErrs = append(allErrs, ValidateClusterForClusterClass(newCluster, clusterClass, fldPath)...)
257260
}
258261
if oldCluster != nil { // On update
259262
// The ClusterClass must exist to proceed with update validation. Return an error if the ClusterClass was
260263
// not found.
261-
if clusterClassGetErr != nil {
264+
if apierrors.IsNotFound(clusterClassPollErr) {
262265
allErrs = append(
263266
allErrs, field.InternalError(
264267
fldPath.Child("class"),
265-
clusterClassGetErr))
268+
clusterClassPollErr))
266269
return allErrs
267270
}
268271

@@ -341,7 +344,7 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu
341344
allErrs = append(
342345
allErrs, field.Forbidden(
343346
fldPath.Child("class"),
344-
fmt.Sprintf("ClusterClass with name %q could not be found, change from class %[1]q to class %q cannot be validated",
347+
fmt.Sprintf("valid ClusterClass with name %q could not be found, change from class %[1]q to class %q cannot be validated",
345348
oldCluster.Spec.Topology.Class, newCluster.Spec.Topology.Class)))
346349

347350
// Return early with errors if the ClusterClass can't be retrieved.
@@ -529,24 +532,35 @@ func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *cl
529532
// pollClusterClassForCluster will retry getting the ClusterClass referenced in the Cluster for two seconds.
530533
func (webhook *Cluster) pollClusterClassForCluster(ctx context.Context, cluster *clusterv1.Cluster) (*clusterv1.ClusterClass, error) {
531534
clusterClass := &clusterv1.ClusterClass{}
532-
var clusterClassGetErr error
535+
var clusterClassPollErr error
536+
// TODO: Add a webhook warning if the ClusterClass is not up to date or not found.
533537
_ = util.PollImmediate(200*time.Millisecond, 2*time.Second, func() (bool, error) {
534-
if clusterClassGetErr = webhook.Client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: cluster.Spec.Topology.Class}, clusterClass); clusterClassGetErr != nil {
538+
if clusterClassPollErr = webhook.Client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: cluster.Spec.Topology.Class}, clusterClass); clusterClassPollErr != nil {
535539
return false, nil //nolint:nilerr
536540
}
537541

538-
// Return an error if the ClusterClass has not successfully reconciled because variables aren't correctly
539-
// reconciled.
540-
// TODO: Decide whether to check generation here. This requires creating templates before creating the Cluster and
541-
// may interfere with the way clusterctl move works.
542-
if !conditions.Has(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) ||
543-
conditions.IsFalse(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) {
544-
clusterClassGetErr = errors.New("ClusterClass is not up to date. If this persists check ClusterClass status")
545-
return false, nil
542+
if clusterClassPollErr = clusterClassIsReconciled(clusterClass); clusterClassPollErr != nil {
543+
return false, nil //nolint:nilerr
546544
}
545+
clusterClassPollErr = nil
547546
return true, nil
548547
})
549-
return clusterClass, clusterClassGetErr
548+
return clusterClass, clusterClassPollErr
549+
}
550+
551+
// clusterClassIsReconciled returns errClusterClassNotReconciled if the ClusterClass has not successfully reconciled or if the
552+
// ClusterClass variables have not been successfully reconciled.
553+
func clusterClassIsReconciled(clusterClass *clusterv1.ClusterClass) error {
554+
// If the clusterClass metadata generation does not match the status observed generation, the ClusterClass has not been successfully reconciled.
555+
if clusterClass.Generation != clusterClass.Status.ObservedGeneration {
556+
return errClusterClassNotReconciled
557+
}
558+
// If the clusterClass does not have ClusterClassVariablesReconciled==True, the ClusterClass has not been successfully reconciled.
559+
if !conditions.Has(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) ||
560+
conditions.IsFalse(clusterClass, clusterv1.ClusterClassVariablesReconciledCondition) {
561+
return errClusterClassNotReconciled
562+
}
563+
return nil
550564
}
551565

552566
// TODO: This function will not be needed once per-definition defaulting and validation is implemented.

internal/webhooks/cluster_test.go

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1833,6 +1833,129 @@ func TestMovingBetweenManagedAndUnmanaged(t *testing.T) {
18331833
}
18341834
}
18351835

1836+
// TestClusterClassPollingErrors tests when a Cluster can be reconciled given different reconcile states of the ClusterClass.
1837+
func TestClusterClassPollingErrors(t *testing.T) {
1838+
defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.ClusterTopology, true)()
1839+
g := NewWithT(t)
1840+
ref := &corev1.ObjectReference{
1841+
APIVersion: "group.test.io/foo",
1842+
Kind: "barTemplate",
1843+
Name: "baz",
1844+
Namespace: "default",
1845+
}
1846+
1847+
topology := builder.ClusterTopology().WithClass("class1").WithVersion("v1.24.3").Build()
1848+
secondTopology := builder.ClusterTopology().WithClass("class2").WithVersion("v1.24.3").Build()
1849+
notFoundTopology := builder.ClusterTopology().WithClass("doesnotexist").WithVersion("v1.24.3").Build()
1850+
1851+
baseClusterClass := builder.ClusterClass(metav1.NamespaceDefault, "class1").
1852+
WithInfrastructureClusterTemplate(refToUnstructured(ref)).
1853+
WithControlPlaneTemplate(refToUnstructured(ref)).
1854+
WithControlPlaneInfrastructureMachineTemplate(refToUnstructured(ref))
1855+
1856+
// ccFullyReconciled is a ClusterClass with a matching generation and observed generation, and VariablesReconciled=True.
1857+
ccFullyReconciled := baseClusterClass.DeepCopy().Build()
1858+
ccFullyReconciled.Generation = 1
1859+
ccFullyReconciled.Status.ObservedGeneration = 1
1860+
conditions.MarkTrue(ccFullyReconciled, clusterv1.ClusterClassVariablesReconciledCondition)
1861+
1862+
// secondFullyReconciled is a second ClusterClass with a matching generation and observed generation, and VariablesReconciled=True.
1863+
secondFullyReconciled := ccFullyReconciled.DeepCopy()
1864+
secondFullyReconciled.SetName("class2")
1865+
1866+
// ccGenerationMismatch is a ClusterClass with a mismatched generation and observed generation, but VariablesReconciledCondition=True.
1867+
ccGenerationMismatch := baseClusterClass.DeepCopy().Build()
1868+
ccGenerationMismatch.Generation = 999
1869+
ccGenerationMismatch.Status.ObservedGeneration = 1
1870+
conditions.MarkTrue(ccGenerationMismatch, clusterv1.ClusterClassVariablesReconciledCondition)
1871+
1872+
// ccVariablesReconciledFalse with VariablesReconciled=False.
1873+
ccVariablesReconciledFalse := baseClusterClass.DeepCopy().Build()
1874+
conditions.MarkFalse(ccGenerationMismatch, clusterv1.ClusterClassVariablesReconciledCondition, "", clusterv1.ConditionSeverityError, "")
1875+
1876+
tests := []struct {
1877+
name string
1878+
cluster *clusterv1.Cluster
1879+
oldCluster *clusterv1.Cluster
1880+
clusterClasses []*clusterv1.ClusterClass
1881+
wantErr bool
1882+
}{
1883+
{
1884+
name: "Pass on create if ClusterClass is fully reconciled",
1885+
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(),
1886+
clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled},
1887+
wantErr: false,
1888+
},
1889+
{
1890+
name: "Pass on create if ClusterClass generation does not match observedGeneration",
1891+
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(),
1892+
clusterClasses: []*clusterv1.ClusterClass{ccGenerationMismatch},
1893+
wantErr: false,
1894+
},
1895+
{
1896+
name: "Pass on create if ClusterClass generation matches observedGeneration but VariablesReconciled=False",
1897+
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(),
1898+
clusterClasses: []*clusterv1.ClusterClass{ccVariablesReconciledFalse},
1899+
wantErr: false,
1900+
},
1901+
{
1902+
name: "Pass on create if ClusterClass is not found",
1903+
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(notFoundTopology).Build(),
1904+
clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled},
1905+
wantErr: false,
1906+
},
1907+
{
1908+
name: "Pass on update if oldCluster ClusterClass is fully reconciled",
1909+
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(secondTopology).Build(),
1910+
oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(),
1911+
clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled, secondFullyReconciled},
1912+
wantErr: false,
1913+
},
1914+
{
1915+
name: "Fail on update if oldCluster ClusterClass generation does not match observedGeneration",
1916+
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(secondTopology).Build(),
1917+
oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(),
1918+
clusterClasses: []*clusterv1.ClusterClass{ccGenerationMismatch, secondFullyReconciled},
1919+
wantErr: true,
1920+
},
1921+
{
1922+
name: "Fail on update if old Cluster ClusterClass is not found",
1923+
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(),
1924+
oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(notFoundTopology).Build(),
1925+
clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled},
1926+
wantErr: true,
1927+
},
1928+
{
1929+
name: "Fail on update if new Cluster ClusterClass is not found",
1930+
cluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(notFoundTopology).Build(),
1931+
oldCluster: builder.Cluster(metav1.NamespaceDefault, "cluster1").WithTopology(topology).Build(),
1932+
clusterClasses: []*clusterv1.ClusterClass{ccFullyReconciled},
1933+
wantErr: true,
1934+
},
1935+
}
1936+
1937+
for _, tt := range tests {
1938+
t.Run(tt.name, func(t *testing.T) {
1939+
// Sets up a reconcile with a fakeClient for the test case.
1940+
objs := []client.Object{}
1941+
for _, cc := range tt.clusterClasses {
1942+
objs = append(objs, cc)
1943+
}
1944+
c := &Cluster{Client: fake.NewClientBuilder().
1945+
WithObjects(objs...).
1946+
WithScheme(fakeScheme).
1947+
Build()}
1948+
1949+
// Checks the return error.
1950+
if tt.wantErr {
1951+
g.Expect(c.validate(ctx, tt.oldCluster, tt.cluster)).NotTo(Succeed())
1952+
} else {
1953+
g.Expect(c.validate(ctx, tt.oldCluster, tt.cluster)).To(Succeed())
1954+
}
1955+
})
1956+
}
1957+
}
1958+
18361959
func refToUnstructured(ref *corev1.ObjectReference) *unstructured.Unstructured {
18371960
gvk := ref.GetObjectKind().GroupVersionKind()
18381961
output := &unstructured.Unstructured{}

0 commit comments

Comments
 (0)