From 6aa6f0217ef5e290be7fb163dc534750dc4d2826 Mon Sep 17 00:00:00 2001 From: Xuan Liu Date: Thu, 20 Nov 2025 15:19:17 -0500 Subject: [PATCH 1/2] Enable IMDS to avoid specifying OCI_REGION_METADATA --- .../cloudprovider/oci/instancepools/oci_cloud_provider.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go index 727148aa9442..232f47d0bcb5 100644 --- a/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/oci/instancepools/oci_cloud_provider.go @@ -5,6 +5,7 @@ Copyright 2021-2023 Oracle and/or its affiliates. package instancepools import ( + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/common" "strings" "github.com/pkg/errors" @@ -150,6 +151,7 @@ func (ocp *OciCloudProvider) Refresh() error { // BuildOCI constructs the OciCloudProvider object that implements the could provider interface (InstancePoolManager). func BuildOCI(opts *coreoptions.AutoscalerOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + common.EnableInstanceMetadataServiceLookup() ocidType, err := ocicommon.GetAllPoolTypes(opts.NodeGroups) if err != nil { klog.Fatalf("Failed to get pool type: %v", err) From 462beda664eaca07c46dff5824577b4949367fe5 Mon Sep 17 00:00:00 2001 From: Xuan Liu Date: Thu, 20 Nov 2025 15:21:58 -0500 Subject: [PATCH 2/2] Autoscaler should not try to monitor or scale virtual nodes and virtual node pools --- .../cloudprovider/oci/nodepools/consts/annotations.go | 3 +++ .../cloudprovider/oci/nodepools/oci_cloud_provider.go | 6 ++++++ .../cloudprovider/oci/nodepools/oci_manager.go | 3 +++ .../cloudprovider/oci/nodepools/oci_manager_test.go | 10 ++++++++++ 4 files changed, 22 insertions(+) diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/consts/annotations.go b/cluster-autoscaler/cloudprovider/oci/nodepools/consts/annotations.go index 7b335a6353ea..8a00cd137ed6 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/consts/annotations.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/consts/annotations.go @@ -28,4 +28,7 @@ const ( // EphemeralStorageSize is the freeform tag key that would be used to determine the ephemeral-storage size of the node EphemeralStorageSize = "cluster-autoscaler/node-ephemeral-storage" + + // OciVirtualNodeResourceIdent is the string identifier in the ocid that indicates the resource is a virtual node pool + OciVirtualNodeResourceIdent = "virtualnode" ) diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_cloud_provider.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_cloud_provider.go index ae3f70549151..c06aeab83fda 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_cloud_provider.go @@ -10,9 +10,11 @@ import ( "k8s.io/apimachinery/pkg/api/resource" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" ocicommon "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/common" + npconsts "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/nodepools/consts" caerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" klog "k8s.io/klog/v2" + "strings" ) // OciCloudProvider creates a cloud provider object that is compatible with node pools @@ -86,6 +88,10 @@ func (ocp *OciCloudProvider) HasInstance(node *apiv1.Node) (bool, error) { if err != nil { return true, err } + // Properly handle virtual nodes and missing node pool IDs to prevent crashes + if np == nil || np.Id() == "" || strings.Contains(instance.InstanceID, npconsts.OciVirtualNodeResourceIdent) { + return false, cloudprovider.ErrNotImplemented + } nodes, err := ocp.manager.GetNodePoolNodes(np) if err != nil { return true, err diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go index a18ff920b0d4..d41b589415ab 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager.go @@ -630,6 +630,9 @@ func (m *ociManagerImpl) GetNodePoolNodes(np NodePool) ([]cloudprovider.Instance // GetNodePoolForInstance returns NodePool to which the given instance belongs. func (m *ociManagerImpl) GetNodePoolForInstance(instance ocicommon.OciRef) (NodePool, error) { + if strings.Contains(instance.InstanceID, npconsts.OciVirtualNodeResourceIdent) { + return nil, nil + } if instance.NodePoolID == "" { klog.V(4).Infof("node pool id missing from reference: %+v", instance) diff --git a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go index e7e243d827e0..ab3340a6d8ca 100644 --- a/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go +++ b/cluster-autoscaler/cloudprovider/oci/nodepools/oci_manager_test.go @@ -97,6 +97,16 @@ func TestGetNodePoolForInstance(t *testing.T) { if np.Id() != "ocid2" { t.Fatalf("got unexpected ocid %q ; wanted \"ocid2\"", np.Id()) } + + // now verify node pool can be found via lookup up by instance id in cache + np, err = manager.GetNodePoolForInstance(ocicommon.OciRef{InstanceID: "virtualnode"}) + if err != nil { + t.Fatalf("unexpected error: %+v", err) + } + + if np != nil { + t.Fatalf("got unexpected ocid %q ; wanted nil", np.Id()) + } } func TestGetNodePoolNodes(t *testing.T) {