Skip to content

Commit 338c089

Browse files
committed
[registry_ctrl] Avoid changing deleted nodes
It has been observed that the registry controller can copy containerd configurations to a Windows BYOH instance, even after the Node has started the deletion process. This commit hardens the reconcile logic to avoid this timing issue. While this won't 100% fix the issue, this solution avoids introducing a mutex for manipulating a given node. Given the low severity of the issue I believe this does not warrant the complications that a mutex would add.
1 parent fe52184 commit 338c089

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

controllers/registry_controller.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
config "github.com/openshift/api/config/v1"
2424
core "k8s.io/api/core/v1"
25+
k8sapierrors "k8s.io/apimachinery/pkg/api/errors"
2526
"k8s.io/apimachinery/pkg/types"
2627
"k8s.io/client-go/kubernetes"
2728
ctrl "sigs.k8s.io/controller-runtime"
@@ -94,7 +95,17 @@ func (r *registryReconciler) Reconcile(ctx context.Context, req ctrl.Request) (r
9495
if err != nil {
9596
return ctrl.Result{}, fmt.Errorf("unable to create signer from private key secret: %w", err)
9697
}
97-
for _, node := range nodes.Items {
98+
for _, n := range nodes.Items {
99+
// Ensure the node status is up to date, going through this list could take a long time depending on the
100+
// number of Windows nodes in the cluster
101+
var node core.Node
102+
if err := r.client.Get(ctx, types.NamespacedName{Name: n.GetName()}, &node); err != nil {
103+
if k8sapierrors.IsNotFound(err) {
104+
continue
105+
}
106+
return ctrl.Result{}, fmt.Errorf("unable to get node %s: %w", n.GetName(), err)
107+
}
108+
98109
winInstance, err := r.instanceFromNode(ctx, &node)
99110
if err != nil {
100111
return ctrl.Result{}, fmt.Errorf("unable to create instance object from node: %w", err)

0 commit comments

Comments
 (0)