Skip to content

Commit d754ad5

Browse files
committed
clusterctl: always run crd migration if possible to reduce conversion webhook usage
If objects are still stored in the old version, every time an object is requested the kube-apiserver will call the conversion webhook. Instead of lots of conversion webhook calls we once do the migration if possible. Also increases the timeout used for the migrations: Before running CRD migration clusterctl usually upgrades cert-manager which may lead to updating Certificates and rolling out new certificates to our controllers. There is a chance that this can take up to 90s in which conversion, validating or mutatingwebhooks may be unavailable. Because the migration gets run more aggresively during upgrades this also increases the relevant timeouts.
1 parent 3e4c196 commit d754ad5

File tree

2 files changed

+48
-8
lines changed

2 files changed

+48
-8
lines changed

cmd/clusterctl/client/cluster/crd_migration.go

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"k8s.io/apimachinery/pkg/runtime/schema"
3030
"k8s.io/apimachinery/pkg/util/rand"
3131
"k8s.io/apimachinery/pkg/util/sets"
32+
"k8s.io/apimachinery/pkg/util/wait"
3233
"sigs.k8s.io/controller-runtime/pkg/client"
3334

3435
"sigs.k8s.io/cluster-api/cmd/clusterctl/internal/scheme"
@@ -115,9 +116,9 @@ func (m *crdMigrator) run(ctx context.Context, newCRD *apiextensionsv1.CustomRes
115116
}
116117

117118
currentStatusStoredVersions := sets.Set[string]{}.Insert(currentCRD.Status.StoredVersions...)
118-
// If the new CRD still contains all current stored versions, nothing to do
119-
// as no previous storage version will be dropped.
120-
if servedVersions.HasAll(currentStatusStoredVersions.UnsortedList()...) {
119+
// If the old CRD only contains its current storageVersion as storedVersion,
120+
// nothing to do as all objects are already on the current storageVersion.
121+
if currentStatusStoredVersions.Len() == 1 && currentCRD.Status.StoredVersions[0] == currentStorageVersion {
121122
log.V(2).Info("CRD migration check passed", "name", newCRD.Name)
122123
return false, nil
123124
}
@@ -157,7 +158,7 @@ func (m *crdMigrator) migrateResourcesForCRD(ctx context.Context, crd *apiextens
157158

158159
var i int
159160
for {
160-
if err := retryWithExponentialBackoff(ctx, newReadBackoff(), func(ctx context.Context) error {
161+
if err := retryWithExponentialBackoff(ctx, newCRDMigrationBackoff(), func(ctx context.Context) error {
161162
return m.Client.List(ctx, list, client.Continue(list.GetContinue()))
162163
}); err != nil {
163164
return errors.Wrapf(err, "failed to list %q", list.GetKind())
@@ -167,7 +168,7 @@ func (m *crdMigrator) migrateResourcesForCRD(ctx context.Context, crd *apiextens
167168
obj := list.Items[i]
168169

169170
log.V(5).Info("Migrating", logf.UnstructuredToValues(obj)...)
170-
if err := retryWithExponentialBackoff(ctx, newWriteBackoff(), func(ctx context.Context) error {
171+
if err := retryWithExponentialBackoff(ctx, newCRDMigrationBackoff(), func(ctx context.Context) error {
171172
return handleMigrateErr(m.Client.Update(ctx, &obj))
172173
}); err != nil {
173174
return errors.Wrapf(err, "failed to migrate %s/%s", obj.GetNamespace(), obj.GetName())
@@ -230,3 +231,20 @@ func storageVersionForCRD(crd *apiextensionsv1.CustomResourceDefinition) (string
230231
}
231232
return "", errors.Errorf("could not find storage version for CRD %q", crd.Name)
232233
}
234+
235+
// newCRDMigrationBackoff creates a new API Machinery backoff parameter set suitable for use with crd migration operations.
236+
// Clusterctl upgrades cert-manager right before doing CRD migration. This may lead to rollout of new certificates.
237+
// The time between new certificate creation + injection into objects (CRD, Webhooks) and the new secrets getting propagated
238+
// to the controller can be 60-90s, because the kubelet only periodically syncs secret contents to pods.
239+
// During this timespan conversion, validating- or mutationwebhooks may be unavailable and cause a failure.
240+
func newCRDMigrationBackoff() wait.Backoff {
241+
// Return a exponential backoff configuration which returns durations for a total time of ~1m30s.
242+
// Example: 0, .25s, .6s, 1.2, 2.1s, 3.4s, 5.5s, 8s, 12s, 19s, 28s, 43s, 64s, 97s
243+
// Jitter is added as a random fraction of the duration multiplied by the jitter factor.
244+
return wait.Backoff{
245+
Duration: 500 * time.Millisecond,
246+
Factor: 1.5,
247+
Steps: 9,
248+
Jitter: 0.1,
249+
}
250+
}

cmd/clusterctl/client/cluster/crd_migration_test.go

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,28 @@ func Test_CRDMigrator(t *testing.T) {
111111
},
112112
wantIsMigrated: false,
113113
},
114+
{
115+
name: "No-op if new CRD adds a new versions and unserves the old",
116+
currentCRD: &apiextensionsv1.CustomResourceDefinition{
117+
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
118+
Spec: apiextensionsv1.CustomResourceDefinitionSpec{
119+
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
120+
{Name: "v1alpha1", Storage: true, Served: true},
121+
},
122+
},
123+
Status: apiextensionsv1.CustomResourceDefinitionStatus{StoredVersions: []string{"v1alpha1"}},
124+
},
125+
newCRD: &apiextensionsv1.CustomResourceDefinition{
126+
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
127+
Spec: apiextensionsv1.CustomResourceDefinitionSpec{
128+
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
129+
{Name: "v1beta1", Storage: true, Served: true}, // v1beta1 is being added
130+
{Name: "v1alpha1", Served: false}, // v1alpha1 still exists but is not served anymore
131+
},
132+
},
133+
},
134+
wantIsMigrated: false,
135+
},
114136
{
115137
name: "Fails if new CRD drops current storage version",
116138
currentCRD: &apiextensionsv1.CustomResourceDefinition{
@@ -185,7 +207,7 @@ func Test_CRDMigrator(t *testing.T) {
185207
Names: apiextensionsv1.CustomResourceDefinitionNames{Kind: "Foo", ListKind: "FooList"},
186208
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
187209
{Name: "v1", Storage: true, Served: true}, // v1 is being added
188-
{Name: "v1beta1", Served: true}, // v1beta1 still there (required for migration)
210+
{Name: "v1beta1", Served: true}, // v1beta1 still there
189211
// v1alpha1 is being dropped
190212
},
191213
},
@@ -246,8 +268,8 @@ func Test_CRDMigrator(t *testing.T) {
246268
Names: apiextensionsv1.CustomResourceDefinitionNames{Kind: "Foo", ListKind: "FooList"},
247269
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
248270
{Name: "v1", Storage: true, Served: true}, // v1 is being added
249-
{Name: "v1beta1", Served: true}, // v1beta1 still there (required for migration)
250-
{Name: "v1alpha1", Served: false}, // v1alpha1 is no longer being served (required for migration)
271+
{Name: "v1beta1", Served: true}, // v1beta1 still there (required for future migration)
272+
{Name: "v1alpha1", Served: false}, // v1alpha1 is no longer being served
251273
},
252274
},
253275
},

0 commit comments

Comments
 (0)