Skip to content

Commit adb6819

Browse files
authored
Merge pull request #7355 from sbueringer/pr-detect-cert-expiry-from-apiserver
🌱 Detect certificate expiry from kube-apiserver serving cert
2 parents d59b9f3 + a7a4740 commit adb6819

File tree

10 files changed

+372
-87
lines changed

10 files changed

+372
-87
lines changed

bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,8 @@ const (
6666
const (
6767
// DefaultTokenTTL is the default TTL used for tokens.
6868
DefaultTokenTTL = 15 * time.Minute
69-
70-
// This hard-coded duration matches the hard-coded value used by kubeadm certificate generation.
71-
certificateExpiryDuration = 365 * 24 * time.Hour
7269
)
7370

74-
// now returns the current time.
75-
// This is defined as a variable so that it can be overridden in unit tests.
76-
var now = time.Now
77-
7871
// InitLocker is a lock that is used around kubeadm init.
7972
type InitLocker interface {
8073
Lock(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) bool
@@ -511,10 +504,6 @@ func (r *KubeadmConfigReconciler) handleClusterNotInitialized(ctx context.Contex
511504
return ctrl.Result{}, err
512505
}
513506

514-
// Update the certificate expiration time in the config.
515-
// This annotation will be used by KCP to trigger control plane machines rollout before the certificate generated on the machine are going to expire.
516-
r.addCertificateExpiryAnnotation(scope.Config)
517-
518507
return ctrl.Result{}, nil
519508
}
520509

@@ -716,9 +705,6 @@ func (r *KubeadmConfigReconciler) joinControlplane(ctx context.Context, scope *S
716705
return ctrl.Result{}, err
717706
}
718707

719-
// Update the certificate expiration time in the config.
720-
r.addCertificateExpiryAnnotation(scope.Config)
721-
722708
return ctrl.Result{}, nil
723709
}
724710

@@ -1036,19 +1022,3 @@ func (r *KubeadmConfigReconciler) storeBootstrapData(ctx context.Context, scope
10361022
conditions.MarkTrue(scope.Config, bootstrapv1.DataSecretAvailableCondition)
10371023
return nil
10381024
}
1039-
1040-
// addCertificateExpiryAnnotation sets the certificate expiration time as an
1041-
// annotation on KubeadmConfig, if it doesn't exist already.
1042-
// NOTE: the certificate expiry date stored in the annotation will be slightly different from the one
1043-
// actually used in the certificates - that depends on the exact time kubeadm runs on the machine-,
1044-
// but this approximation is acceptable given that it happens before the actual expiration date.
1045-
func (r *KubeadmConfigReconciler) addCertificateExpiryAnnotation(config *bootstrapv1.KubeadmConfig) {
1046-
annotations := config.GetAnnotations()
1047-
if annotations == nil {
1048-
annotations = map[string]string{}
1049-
}
1050-
if _, ok := annotations[clusterv1.MachineCertificatesExpiryDateAnnotation]; !ok {
1051-
annotations[clusterv1.MachineCertificatesExpiryDateAnnotation] = now().Add(certificateExpiryDuration).Format(time.RFC3339)
1052-
config.SetAnnotations(annotations)
1053-
}
1054-
}

bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller_test.go

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2069,48 +2069,6 @@ func TestKubeadmConfigReconciler_ResolveUsers(t *testing.T) {
20692069
}
20702070
}
20712071

2072-
func TestKubeadmConfigReconciler_ReconcileCertificateExpiryTime(t *testing.T) {
2073-
fakeNow, _ := time.Parse(time.RFC3339, "2022-01-01T00:00:00Z")
2074-
now = func() time.Time {
2075-
return fakeNow
2076-
}
2077-
oneYearFromNow := "2023-01-01T00:00:00Z"
2078-
time2 := "2023-10-01T00:00:00Z"
2079-
2080-
tests := []struct {
2081-
name string
2082-
cfg *bootstrapv1.KubeadmConfig
2083-
wantTime string
2084-
}{
2085-
{
2086-
name: "set the expiry time to one year from now if the expiry time is not set",
2087-
cfg: &bootstrapv1.KubeadmConfig{},
2088-
wantTime: oneYearFromNow,
2089-
},
2090-
{
2091-
name: "do not change the expiry time if it is already set",
2092-
cfg: &bootstrapv1.KubeadmConfig{
2093-
ObjectMeta: metav1.ObjectMeta{
2094-
Annotations: map[string]string{
2095-
clusterv1.MachineCertificatesExpiryDateAnnotation: time2,
2096-
},
2097-
},
2098-
},
2099-
wantTime: time2,
2100-
},
2101-
}
2102-
2103-
for _, tt := range tests {
2104-
t.Run(tt.name, func(t *testing.T) {
2105-
g := NewWithT(t)
2106-
k := &KubeadmConfigReconciler{}
2107-
k.addCertificateExpiryAnnotation(tt.cfg)
2108-
annotations := tt.cfg.GetAnnotations()
2109-
g.Expect(annotations[clusterv1.MachineCertificatesExpiryDateAnnotation]).To(Equal(tt.wantTime))
2110-
})
2111-
}
2112-
}
2113-
21142072
// test utils.
21152073

21162074
// newWorkerMachineForCluster returns a Machine with the passed Cluster's information and a pre-configured name.

controlplane/kubeadm/internal/cluster.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ func (m *Management) GetWorkloadCluster(ctx context.Context, clusterKey client.O
160160
}
161161
tlsConfig.InsecureSkipVerify = true
162162
return &Workload{
163+
restConfig: restConfig,
163164
Client: c,
164165
CoreDNSMigrator: &CoreDNSMigrator{},
165166
etcdClientGenerator: NewEtcdClientGenerator(restConfig, tlsConfig, m.EtcdDialTimeout),

controlplane/kubeadm/internal/control_plane.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,12 @@ func (c *ControlPlane) HasDeletingMachine() bool {
244244
return len(c.Machines.Filter(collections.HasDeletionTimestamp)) > 0
245245
}
246246

247+
// GetKubeadmConfig returns the KubeadmConfig of a given machine.
248+
func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.KubeadmConfig, bool) {
249+
kubeadmConfig, ok := c.kubeadmConfigs[machineName]
250+
return kubeadmConfig, ok
251+
}
252+
247253
// MachinesNeedingRollout return a list of machines that need to be rolled out.
248254
func (c *ControlPlane) MachinesNeedingRollout() collections.Machines {
249255
// Ignore machines to be deleted.

controlplane/kubeadm/internal/controllers/controller.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,11 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster *
333333
return result, err
334334
}
335335

336+
// Reconcile certificate expiry for machines that don't have the expiry annotation on KubeadmConfig yet.
337+
if result, err := r.reconcileCertificateExpiries(ctx, controlPlane); err != nil || !result.IsZero() {
338+
return result, err
339+
}
340+
336341
// Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations.
337342
needRollout := controlPlane.MachinesNeedingRollout()
338343
switch {
@@ -578,6 +583,71 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context
578583
return ctrl.Result{}, nil
579584
}
580585

586+
func (r *KubeadmControlPlaneReconciler) reconcileCertificateExpiries(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
587+
log := ctrl.LoggerFrom(ctx)
588+
589+
// Return if there are no KCP-owned control-plane machines.
590+
if controlPlane.Machines.Len() == 0 {
591+
return ctrl.Result{}, nil
592+
}
593+
594+
// Ignore machines which are being deleted.
595+
machines := controlPlane.Machines.Filter(collections.Not(collections.HasDeletionTimestamp))
596+
597+
workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster))
598+
if err != nil {
599+
return ctrl.Result{}, errors.Wrap(err, "failed to reconcile certificate expiries: cannot get remote client to workload cluster")
600+
}
601+
602+
for _, m := range machines {
603+
log = log.WithValues("Machine", klog.KObj(m))
604+
605+
kubeadmConfig, ok := controlPlane.GetKubeadmConfig(m.Name)
606+
if !ok {
607+
// Skip if the Machine doesn't have a KubeadmConfig.
608+
continue
609+
}
610+
611+
annotations := kubeadmConfig.GetAnnotations()
612+
if _, ok := annotations[clusterv1.MachineCertificatesExpiryDateAnnotation]; ok {
613+
// Skip if annotation is already set.
614+
continue
615+
}
616+
617+
if m.Status.NodeRef == nil {
618+
// Skip if the Machine is still provisioning.
619+
continue
620+
}
621+
nodeName := m.Status.NodeRef.Name
622+
log = log.WithValues("Node", klog.KRef("", nodeName))
623+
624+
log.V(3).Info("Reconciling certificate expiry")
625+
certificateExpiry, err := workloadCluster.GetAPIServerCertificateExpiry(ctx, kubeadmConfig, nodeName)
626+
if err != nil {
627+
return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile certificate expiry for Machine/%s", m.Name)
628+
}
629+
expiry := certificateExpiry.Format(time.RFC3339)
630+
631+
log.V(2).Info(fmt.Sprintf("Setting certificate expiry to %s", expiry))
632+
patchHelper, err := patch.NewHelper(kubeadmConfig, r.Client)
633+
if err != nil {
634+
return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile certificate expiry for Machine/%s: failed to create PatchHelper for KubeadmConfig/%s", m.Name, kubeadmConfig.Name)
635+
}
636+
637+
if annotations == nil {
638+
annotations = map[string]string{}
639+
}
640+
annotations[clusterv1.MachineCertificatesExpiryDateAnnotation] = expiry
641+
kubeadmConfig.SetAnnotations(annotations)
642+
643+
if err := patchHelper.Patch(ctx, kubeadmConfig); err != nil {
644+
return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile certificate expiry for Machine/%s: failed to patch KubeadmConfig/%s", m.Name, kubeadmConfig.Name)
645+
}
646+
}
647+
648+
return ctrl.Result{}, nil
649+
}
650+
581651
func (r *KubeadmControlPlaneReconciler) adoptMachines(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, machines collections.Machines, cluster *clusterv1.Cluster) error {
582652
// We do an uncached full quorum read against the KCP to avoid re-adopting Machines the garbage collector just intentionally orphaned
583653
// See https://github.com/kubernetes/kubernetes/issues/42639

0 commit comments

Comments
 (0)