Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions internal/testutil/fixture/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,42 @@ func (f *KubernetesServiceFixture) WithIngressIPs(ips []string) *KubernetesServi
return f
}

func (f *KubernetesServiceFixture) WithOnlyTCPPorts() *KubernetesServiceFixture {
f.svc.Spec.Ports = []v1.ServicePort{
{
Name: "http",
Protocol: v1.ProtocolTCP,
Port: 80,
NodePort: 50080,
},
{
Name: "dns-tcp",
Protocol: v1.ProtocolTCP,
Port: 53,
NodePort: 50053,
},
{
Name: "https",
Protocol: v1.ProtocolTCP,
Port: 443,
NodePort: 50443,
},
}
return f
}

func (f *KubernetesServiceFixture) WithOnlyUDPPorts() *KubernetesServiceFixture {
f.svc.Spec.Ports = []v1.ServicePort{
{
Name: "dns-udp",
Protocol: v1.ProtocolUDP,
Port: 53,
NodePort: 50053,
},
}
return f
}

func (f *KubernetesServiceFixture) Build() v1.Service {
return f.svc
}
12 changes: 9 additions & 3 deletions pkg/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,11 @@ const (
LoadBalancerSKUBasic = "basic"
// LoadBalancerSKUStandard is the load balancer standard SKU
LoadBalancerSKUStandard = "standard"
// LoadBalancerSKUService is the load balancer service SKU
LoadBalancerSKUService = "service"

// PodLabelServiceEgressGateway is the label used on the pod
PodLabelServiceEgressGateway = "kubernetes.azure.com/service-egress-gateway"

// ServiceAnnotationLoadBalancerInternal is the annotation used on the service
ServiceAnnotationLoadBalancerInternal = "service.beta.kubernetes.io/azure-load-balancer-internal"
Expand Down Expand Up @@ -364,6 +369,8 @@ const (
BackendPoolIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/loadBalancers/%s/backendAddressPools/%s"
// LoadBalancerProbeIDTemplate is the template of the load balancer probe
LoadBalancerProbeIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/loadBalancers/%s/probes/%s"
// NatGatewayIDTemplate is the template of the nat gateway
NatGatewayIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/natGateways/%s"

// InternalLoadBalancerNameSuffix is load balancer suffix
InternalLoadBalancerNameSuffix = "-internal"
Expand All @@ -381,9 +388,8 @@ const (
LoadBalancerBackendPoolConfigurationTypeNodeIPConfiguration = "nodeIPConfiguration"
// LoadBalancerBackendPoolConfigurationTypeNodeIP is the lb backend pool config type node ip
LoadBalancerBackendPoolConfigurationTypeNodeIP = "nodeIP"
// LoadBalancerBackendPoolConfigurationTypePODIP is the lb backend pool config type pod ip
// TODO (nilo19): support pod IP in the future
LoadBalancerBackendPoolConfigurationTypePODIP = "podIP"
// LoadBalancerBackendPoolConfigurationTypePodIP is the lb backend pool config type pod ip
LoadBalancerBackendPoolConfigurationTypePodIP = "podIP"
)

// error messages
Expand Down
18 changes: 14 additions & 4 deletions pkg/provider/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,18 @@ func (az *Cloud) InitializeCloudFromConfig(ctx context.Context, config *config.C
return fmt.Errorf("InitializeCloudFromConfig: cannot initialize from nil config")
}

// Use a single flag to determine if the service gateway is enabled.
// All 3 conditions must be true:
// 1. ServiceGatewayEnabled is true
// 2. lb sku is service
// 3. backendPoolType is PodIP
if az.ServiceGatewayEnabled && az.IsLBBackendPoolTypePodIPAndUseServiceLoadBalancer() {
klog.V(2).Info("InitializeCloudFromConfig: Service Gateway is enabled, using PodIP backend pool type with Service Load Balancer")
az.ServiceGatewayEnabled = true
} else {
az.ServiceGatewayEnabled = false
}

if config.RouteTableResourceGroup == "" {
config.RouteTableResourceGroup = config.ResourceGroup
}
Expand Down Expand Up @@ -298,15 +310,13 @@ func (az *Cloud) InitializeCloudFromConfig(ctx context.Context, config *config.C
}
}

if config.LoadBalancerBackendPoolConfigurationType == "" ||
// TODO(nilo19): support pod IP mode in the future
strings.EqualFold(config.LoadBalancerBackendPoolConfigurationType, consts.LoadBalancerBackendPoolConfigurationTypePODIP) {
if config.LoadBalancerBackendPoolConfigurationType == "" {
config.LoadBalancerBackendPoolConfigurationType = consts.LoadBalancerBackendPoolConfigurationTypeNodeIPConfiguration
} else {
supportedLoadBalancerBackendPoolConfigurationTypes := utilsets.NewString(
strings.ToLower(consts.LoadBalancerBackendPoolConfigurationTypeNodeIPConfiguration),
strings.ToLower(consts.LoadBalancerBackendPoolConfigurationTypeNodeIP),
strings.ToLower(consts.LoadBalancerBackendPoolConfigurationTypePODIP))
strings.ToLower(consts.LoadBalancerBackendPoolConfigurationTypePodIP))
if !supportedLoadBalancerBackendPoolConfigurationTypes.Has(strings.ToLower(config.LoadBalancerBackendPoolConfigurationType)) {
return fmt.Errorf("loadBalancerBackendPoolConfigurationType %s is not supported, supported values are %v", config.LoadBalancerBackendPoolConfigurationType, supportedLoadBalancerBackendPoolConfigurationTypes.UnsortedList())
}
Expand Down
27 changes: 27 additions & 0 deletions pkg/provider/azure_fakes.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package provider

import (
"net/netip"

"go.uber.org/mock/gomock"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
Expand All @@ -29,6 +31,7 @@ import (
"sigs.k8s.io/cloud-provider-azure/pkg/azclient/diskclient/mock_diskclient"
"sigs.k8s.io/cloud-provider-azure/pkg/azclient/interfaceclient/mock_interfaceclient"
"sigs.k8s.io/cloud-provider-azure/pkg/azclient/loadbalancerclient/mock_loadbalancerclient"
"sigs.k8s.io/cloud-provider-azure/pkg/azclient/managedclusterclient/mock_managedclusterclient"
"sigs.k8s.io/cloud-provider-azure/pkg/azclient/mock_azclient"
"sigs.k8s.io/cloud-provider-azure/pkg/azclient/privateendpointclient/mock_privateendpointclient"
"sigs.k8s.io/cloud-provider-azure/pkg/azclient/privatelinkserviceclient/mock_privatelinkserviceclient"
Expand Down Expand Up @@ -138,6 +141,9 @@ func GetTestCloud(ctrl *gomock.Controller) (az *Cloud) {
virtualMachinesClient := mock_virtualmachineclient.NewMockInterface(ctrl)
clientFactory.EXPECT().GetVirtualMachineClient().Return(virtualMachinesClient).AnyTimes()

managedClusterClient := mock_managedclusterclient.NewMockInterface(ctrl)
clientFactory.EXPECT().GetManagedClusterClient().Return(managedClusterClient).AnyTimes()

securtyGrouptrack2Client := mock_securitygroupclient.NewMockInterface(ctrl)
clientFactory.EXPECT().GetSecurityGroupClient().Return(securtyGrouptrack2Client).AnyTimes()
mockPrivateDNSClient := mock_privatezoneclient.NewMockInterface(ctrl)
Expand Down Expand Up @@ -186,3 +192,24 @@ func GetTestCloudWithExtendedLocation(ctrl *gomock.Controller) (az *Cloud) {
az.Config.ExtendedLocationType = "EdgeZone"
return az
}

// GetTestCloudWithContainerLoadBalancer returns a fake azure cloud for unit tests in Azure supporting container load balancer.
func GetTestCloudWithContainerLoadBalancer(ctrl *gomock.Controller) (az *Cloud) {
az = GetTestCloud(ctrl)
az.LoadBalancerBackendPoolConfigurationType = consts.LoadBalancerBackendPoolConfigurationTypePodIP
az.LoadBalancerSKU = consts.LoadBalancerSKUService
az.ServiceGatewayEnabled = true
return az
}

func GetTestCloudWithContainerLoadBalancerAndPrefixCidr(ctrl *gomock.Controller, isIPv6 bool) (az *Cloud) {
az = GetTestCloudWithContainerLoadBalancer(ctrl)
if !isIPv6 {
prefix, _ := netip.ParsePrefix("10.0.0.1/32")
az.PodCidrsIPv4 = []netip.Prefix{prefix}
} else {
prefix, _ := netip.ParsePrefix("2001:db8::/64")
az.PodCidrsIPv6 = []netip.Prefix{prefix}
}
return az
}
101 changes: 101 additions & 0 deletions pkg/provider/azure_natgateway_repo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
Copyright 2025 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package provider

import (
"context"
"encoding/json"
"fmt"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
)

func (az *Cloud) getNatGateway(ctx context.Context, natGatewayResourceGroup string, natGatewayName string) (*armnetwork.NatGateway, error) {
klog.Infof("NatGatewayClient.Get(%s) in resource group %s: start", natGatewayName, natGatewayResourceGroup)
result, err := az.NetworkClientFactory.GetNatGatewayClient().Get(ctx, natGatewayResourceGroup, natGatewayName, nil)
if err != nil {
klog.Errorf("NatGatewayClient.Get(%s) in resource group %s failed: %v", natGatewayName, natGatewayResourceGroup, err)
return nil, err
}
klog.V(10).Infof("NatGatewayClient.Get(%s) in resource group %s: success", natGatewayName, natGatewayResourceGroup)
klog.Infof("NatGatewayClient.Get(%s) in resource group %s: end, error: nil", natGatewayName, natGatewayResourceGroup)
return result, nil
}

// CreateOrUpdateLB invokes az.NetworkClientFactory.GetLoadBalancerClient().CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdateNatGateway(ctx context.Context, natGatewayResourceGroup string, natGateway armnetwork.NatGateway) error {
natGatewayName := ptr.Deref(natGateway.Name, "")
klog.Infof("NatGatewayClient.CreateOrUpdate(%s): start", natGatewayName)

// Endless retry loop with 5-second intervals
for {
_, err := az.NetworkClientFactory.GetNatGatewayClient().CreateOrUpdate(ctx, natGatewayResourceGroup, natGatewayName, natGateway)
if err == nil {
klog.V(10).Infof("NatGatewayClient.CreateOrUpdate(%s): success", natGatewayName)
klog.Infof("NatGatewayClient.CreateOrUpdate(%s): end, error: nil", natGatewayName)
return nil
}

natGatewayJSON, _ := json.Marshal(natGateway)
klog.Warningf("NatGatewayClient.CreateOrUpdate(%s) failed: %v, NatGateway request: %s", natGatewayName, err, string(natGatewayJSON))

// Check if context is canceled
select {
case <-ctx.Done():
klog.V(3).Infof("createOrUpdateNatGateway: context canceled, stopping retry")
return fmt.Errorf("context canceled: %w", ctx.Err())
default:
// Continue with retry
}

// Wait 5 seconds before retrying
klog.V(3).Infof("createOrUpdateNatGateway: retrying in 5 seconds for NAT Gateway %s", natGatewayName)
time.Sleep(5 * time.Second)
}
}

func (az *Cloud) deleteNatGateway(ctx context.Context, natGatewayResourceGroup string, natGatewayName string) error {
klog.Infof("NatGatewayClient.Delete(%s) in resource group %s: start", natGatewayName, natGatewayResourceGroup)

// Endless retry loop with 5-second intervals
for {
err := az.NetworkClientFactory.GetNatGatewayClient().Delete(ctx, natGatewayResourceGroup, natGatewayName)
if err == nil {
klog.V(10).Infof("NatGatewayClient.Delete(%s) in resource group %s: success", natGatewayName, natGatewayResourceGroup)
klog.Infof("NatGatewayClient.Delete(%s) in resource group %s: end, error: nil", natGatewayName, natGatewayResourceGroup)
return nil
}

klog.Errorf("NatGatewayClient.Delete(%s) in resource group %s failed: %v", natGatewayName, natGatewayResourceGroup, err)

// Check if context is canceled
select {
case <-ctx.Done():
klog.V(3).Infof("deleteNatGateway: context canceled, stopping retry")
return fmt.Errorf("context canceled: %w", ctx.Err())
default:
// Continue with retry
}

// Wait 5 seconds before retrying
klog.V(3).Infof("deleteNatGateway: retrying in 5 seconds for NAT Gateway %s", natGatewayName)
time.Sleep(5 * time.Second)
}
}
52 changes: 52 additions & 0 deletions pkg/provider/azure_publicip_repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,32 @@ import (
"sigs.k8s.io/cloud-provider-azure/pkg/util/deepcopy"
)

func (az *Cloud) CreateOrUpdatePIPOutbound(ctx context.Context, pipResourceGroup string, pip *armnetwork.PublicIPAddress) error {
klog.Infof("CreateOrUpdatePIPOutbound(%s): start", ptr.Deref(pip.Name, ""))

// Endless retry loop with 5-second intervals
for {
// Call the existing CreateOrUpdatePIP function
err := az.CreateOrUpdatePIP(nil, pipResourceGroup, pip)
if err == nil {
return nil
}

// Check if context is canceled
select {
case <-ctx.Done():
klog.V(3).Infof("CreateOrUpdatePIPOutbound: context canceled, stopping retry")
return fmt.Errorf("context canceled: %w", ctx.Err())
default:
// Continue with retry
}

// Wait 5 seconds before retrying
klog.V(3).Infof("CreateOrUpdatePIPOutbound: retrying in 5 seconds for PIP %s", ptr.Deref(pip.Name, ""))
time.Sleep(5 * time.Second)
}
}

// CreateOrUpdatePIP invokes az.NetworkClientFactory.GetPublicIPAddressClient().CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string, pip *armnetwork.PublicIPAddress) error {
ctx, cancel := getContextWithCancel()
Expand Down Expand Up @@ -74,6 +100,32 @@ func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string,
return rerr
}

func (az *Cloud) DeletePublicIPOutbound(ctx context.Context, pipResourceGroup string, pipName string) error {
klog.Infof("DeletePublicIPOutbound(%s): start", pipName)

// Endless retry loop with 5-second intervals
for {
// Call the existing DeletePublicIP function
err := az.DeletePublicIP(nil, pipResourceGroup, pipName)
if err == nil {
return nil
}

// Check if context is canceled
select {
case <-ctx.Done():
klog.V(3).Infof("DeletePublicIPOutbound: context canceled, stopping retry")
return fmt.Errorf("context canceled: %w", ctx.Err())
default:
// Continue with retry
}

// Wait 5 seconds before retrying
klog.V(3).Infof("DeletePublicIPOutbound: retrying in 5 seconds for PIP %s", pipName)
time.Sleep(5 * time.Second)
}
}

// DeletePublicIP invokes az.NetworkClientFactory.GetPublicIPAddressClient().Delete with exponential backoff retry
func (az *Cloud) DeletePublicIP(service *v1.Service, pipResourceGroup string, pipName string) error {
ctx, cancel := getContextWithCancel()
Expand Down
16 changes: 15 additions & 1 deletion pkg/provider/azure_standard.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,14 +325,28 @@ func getServiceName(service *v1.Service) string {
return fmt.Sprintf("%s/%s", service.Namespace, service.Name)
}

// This returns a unique identifier for the Service used to tag some resources.
func getServiceUID(service *v1.Service) string {
return strings.ToLower(string(service.UID))
}

// This returns a prefix for loadbalancer/security rules.
func (az *Cloud) getRulePrefix(service *v1.Service) string {
return az.GetLoadBalancerName(context.TODO(), "", service)
}

func (az *Cloud) getPublicIPName(clusterName string, service *v1.Service, isIPv6 bool) (string, error) {
isDualStack := isServiceDualStack(service)
pipName := fmt.Sprintf("%s-%s", clusterName, az.GetLoadBalancerName(context.TODO(), clusterName, service))

var pipName string
if az.ServiceGatewayEnabled {
// Base name: <serviceUID>-pip
pipName = fmt.Sprintf("%s-pip", getServiceUID(service))
} else {
// Legacy scheme: tied to clusterName — per-cluster naming.
pipName = fmt.Sprintf("%s-%s", clusterName, az.GetLoadBalancerName(context.TODO(), clusterName, service))
}

if id := getServicePIPPrefixID(service, isIPv6); id != "" {
id, err := getLastSegment(id, "/")
if err == nil {
Expand Down
Loading