Skip to content

Commit 38b30b7

Browse files
authored
fix: fix Managed Cluster - Node Recycling test (#488)
* reuse client * import apierror * add comment on refresh client * move comment
1 parent eb5f768 commit 38b30b7

File tree

1 file changed

+65
-23
lines changed

1 file changed

+65
-23
lines changed

test/e2e/managed_cluster_test.go

Lines changed: 65 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
"github.com/oracle/oci-go-sdk/v65/common"
3636
oke "github.com/oracle/oci-go-sdk/v65/containerengine"
3737
corev1 "k8s.io/api/core/v1"
38+
apierrors "k8s.io/apimachinery/pkg/api/errors"
3839
"k8s.io/apimachinery/pkg/types"
3940
"k8s.io/apimachinery/pkg/util/wait"
4041
"k8s.io/klog/v2"
@@ -107,16 +108,16 @@ var _ = Describe("Managed Workload cluster creation", func() {
107108
}
108109

109110
cleanInput := cleanupInput{
110-
SpecName: specName,
111-
Cluster: result.Cluster,
112-
ClusterProxy: bootstrapClusterProxy,
113-
Namespace: namespace,
111+
SpecName: specName,
112+
Cluster: result.Cluster,
113+
ClusterProxy: bootstrapClusterProxy,
114+
Namespace: namespace,
114115
ClusterctlConfigPath: clusterctlConfigPath,
115-
CancelWatches: cancelWatches,
116-
IntervalsGetter: e2eConfig.GetIntervals,
117-
SkipCleanup: skipCleanup,
118-
AdditionalCleanup: additionalCleanup,
119-
ArtifactFolder: artifactFolder,
116+
CancelWatches: cancelWatches,
117+
IntervalsGetter: e2eConfig.GetIntervals,
118+
SkipCleanup: skipCleanup,
119+
AdditionalCleanup: additionalCleanup,
120+
ArtifactFolder: artifactFolder,
120121
}
121122
dumpSpecResourcesAndCleanup(ctx, cleanInput)
122123
})
@@ -506,33 +507,74 @@ func validateMachinePoolMachines(ctx context.Context, cluster *clusterv1.Cluster
506507
}
507508

508509
// getMachinePoolInstanceVersions returns the Kubernetes versions of the machine pool instances.
509-
// This method was forked because we need to lookup the kubeconfig with each call
510-
// as the tokens are refreshed in case of OKE
511-
func getMachinePoolInstanceVersions(ctx context.Context, clusterProxy framework.ClusterProxy, cluster *clusterv1.Cluster, machinePool *expv1.MachinePool) []string {
510+
// For managed clusters like OKE, we need to lookup the kubeconfig to access the workload cluster.
511+
// This function handles kubeconfig rotation by refreshing the client only when necessary.
512+
func getMachinePoolInstanceVersions(
513+
ctx context.Context,
514+
clusterProxy framework.ClusterProxy,
515+
cluster *clusterv1.Cluster,
516+
machinePool *expv1.MachinePool,
517+
) []string {
512518
Expect(ctx).NotTo(BeNil(), "ctx is required for getMachinePoolInstanceVersions")
513519

514520
instances := machinePool.Status.NodeRefs
515521
versions := make([]string, len(instances))
522+
if len(instances) == 0 {
523+
return versions
524+
}
525+
526+
getWorkloadClient := func(ctx context.Context) (client.Client, error) {
527+
wc := clusterProxy.GetWorkloadCluster(ctx, cluster.Namespace, cluster.Name)
528+
if wc == nil {
529+
return nil, errors.New("workload cluster is nil")
530+
}
531+
c := wc.GetClient()
532+
if c == nil {
533+
return nil, errors.New("workload client is nil")
534+
}
535+
return c, nil
536+
}
537+
538+
var workloadClient client.Client
539+
516540
for i, instance := range instances {
541+
nodeName := instance.Name
517542
node := &corev1.Node{}
518-
var nodeGetError error
543+
var lastErr error
544+
519545
err := wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 10*time.Second, true, func(ctx context.Context) (bool, error) {
520-
nodeGetError = clusterProxy.GetWorkloadCluster(ctx, cluster.Namespace, cluster.Name).
521-
GetClient().Get(ctx, client.ObjectKey{Name: instance.Name}, node)
522-
if nodeGetError != nil {
523-
return false, nil //nolint:nilerr
546+
if workloadClient == nil {
547+
workloadClient, lastErr = getWorkloadClient(ctx)
548+
if lastErr != nil {
549+
lastErr = errors.Wrap(lastErr, "failed to get workload client")
550+
return false, nil
551+
}
524552
}
525-
return true, nil
553+
554+
lastErr = workloadClient.Get(ctx, client.ObjectKey{Name: nodeName}, node)
555+
if lastErr == nil {
556+
return true, nil
557+
}
558+
559+
if apierrors.IsUnauthorized(lastErr) ||
560+
strings.Contains(strings.ToLower(lastErr.Error()), "unauthorized") {
561+
// Reset the workload client to force a fresh client on the next iteration when error occurs
562+
workloadClient = nil
563+
return false, nil
564+
}
565+
566+
return false, nil
526567
})
568+
527569
if err != nil {
528570
versions[i] = "unknown"
529-
if nodeGetError != nil {
530-
// Dump the instance name and error here so that we can log it as part of the version array later on.
531-
versions[i] = fmt.Sprintf("%s error: %s", instance.Name, errors.Wrap(err, nodeGetError.Error()))
571+
if lastErr != nil {
572+
versions[i] = fmt.Sprintf("%s error: %v", nodeName, lastErr)
532573
}
533-
} else {
534-
versions[i] = node.Status.NodeInfo.KubeletVersion
574+
continue
535575
}
576+
577+
versions[i] = node.Status.NodeInfo.KubeletVersion
536578
}
537579

538580
return versions

0 commit comments

Comments
 (0)