Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions cloud/services/networkinterfaces/ipam.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
Copyright 2024 The Kubernetes Authors.
Portions Copyright © Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package networkinterfaces

import (
"context"

"github.com/Azure/go-autorest/autorest/to"
"github.com/go-logr/logr"
"github.com/microsoft/moc-sdk-for-go/services/network"
"go.uber.org/multierr"

ipam "dev.azure.com/msazure/msk8s/_git/azstackhci-operator.git/pkg/ipam"
"github.com/microsoft/cluster-api-provider-azurestackhci/cloud/scope"
"github.com/microsoft/cluster-api-provider-azurestackhci/cloud/telemetry"
)

// CAPHTelemetryWriter implements ipam.IPAMTelemetryWriter for CAPH.
type CAPHTelemetryWriter struct {
vmScope *scope.VirtualMachineScope
}

// WriteIPAMOperationLog implements ipam.IPAMTelemetryWriter.
func (w *CAPHTelemetryWriter) WriteIPAMOperationLog(logger logr.Logger, operation ipam.IPAMOperation, claimName string, params map[string]string, err error) {
var telemetryOp telemetry.Operation
switch operation {
case ipam.OperationCreate, ipam.OperationSync:
telemetryOp = telemetry.Create
case ipam.OperationDelete:
telemetryOp = telemetry.Delete
case ipam.OperationGet:
telemetryOp = telemetry.Get
default:
telemetryOp = telemetry.Create
}

telemetry.WriteMocOperationLog(
logger,
telemetryOp,
w.vmScope.GetCustomResourceTypeWithName(),
telemetry.IPAddressClaim,
telemetry.GenerateMocResourceName(w.vmScope.GetResourceGroup(), claimName),
params,
err,
)
}

// IPAMService wraps ipam.IPAMService for CAPH-specific functionality.
type IPAMService struct {
*ipam.IPAMService
}

// NewIPAMService creates a new IPAM service instance.
func NewIPAMService(vmscope *scope.VirtualMachineScope) *IPAMService {
logger := vmscope.GetLogger()

config := ipam.IPAMServiceConfig{
Client: vmscope.Client(),
Logger: logger,
Namespace: vmscope.Namespace(),
VnetName: vmscope.VnetName(),
CloudFqdn: vmscope.CloudAgentFqdn,
Authorizer: vmscope.Authorizer,
TelemetryWriter: &CAPHTelemetryWriter{vmScope: vmscope},
ClusterName: vmscope.ClusterName(),
CreatorID: ipam.IPClaimCreatorCAPH,
Owner: vmscope.AzureStackHCIVirtualMachine,
}

return &IPAMService{
IPAMService: ipam.NewIPAMService(config),
}
}

// AllocateNicIPClaim allocates IPClaims for all IP configurations on a NIC.
func (s *IPAMService) AllocateNicIPClaim(ctx context.Context, mocNic network.Interface, staticIPAddress string) error {
var errs error
for index, ipconfig := range *mocNic.IPConfigurations {
claimName := ipam.GenerateNICIPClaimName(*mocNic.Name, index)
if allocatedIP, err := s.AllocateIP(ctx, claimName, staticIPAddress); err != nil {
errs = multierr.Append(errs, err)
} else {
ipconfig.InterfaceIPConfigurationPropertiesFormat.PrivateIPAddress = to.StringPtr(allocatedIP)
}
}
return errs
}

// SyncNicIPClaim syncs IPClaims for all IP configurations on a NIC.
func (s *IPAMService) SyncNicIPClaim(ctx context.Context, mocNic network.Interface) error {
var errs error
for index, ipconfig := range *mocNic.IPConfigurations {
claimName := ipam.GenerateNICIPClaimName(*mocNic.Name, index)
if err := s.SyncIPClaim(ctx, claimName, *(ipconfig.InterfaceIPConfigurationPropertiesFormat.PrivateIPAddress)); err != nil {
errs = multierr.Append(errs, err)
}
}
return errs
}

// DeleteNicIPClaim deletes IPClaims for all IP configurations on a NIC.
func (s *IPAMService) DeleteNicIPClaim(ctx context.Context, nicSpec *Spec) error {
var errs error
if len(nicSpec.IPConfigurations) == 0 {
claimName := ipam.GenerateNICIPClaimName(nicSpec.Name, 0)
if errs = s.DeleteIPClaim(ctx, claimName); errs != nil {
return errs
}
return nil
}

for index := range nicSpec.IPConfigurations {
claimName := ipam.GenerateNICIPClaimName(nicSpec.Name, index)
if err := s.DeleteIPClaim(ctx, claimName); err != nil {
errs = multierr.Append(errs, err)
}
}
return errs
}
112 changes: 106 additions & 6 deletions cloud/services/networkinterfaces/networkinterfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@ package networkinterfaces

import (
"context"
"time"

"github.com/Azure/go-autorest/autorest/to"
azurestackhci "github.com/microsoft/cluster-api-provider-azurestackhci/cloud"
"github.com/microsoft/cluster-api-provider-azurestackhci/cloud/telemetry"
"github.com/microsoft/moc-sdk-for-go/services/network"
mocerrors "github.com/microsoft/moc/pkg/errors"
"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/util/wait"
)

// Spec specification for ip configuration
Expand All @@ -44,6 +47,7 @@ type Spec struct {
MacAddress string
BackendPoolNames []string
IPConfigurations IPConfigurations
IPAMService *IPAMService
}

// Get provides information about a network interface.
Expand All @@ -67,12 +71,19 @@ func (s *Service) Reconcile(ctx context.Context, spec interface{}) error {
return errors.New("invalid network interface specification")
}

if _, err := s.Get(ctx, nicSpec); err == nil {
if nic, err := s.Get(ctx, nicSpec); err == nil {
// Nic already exists, no update supported for now
// Sync back to IPAM to ensure claim exists
s.Scope.GetLogger().Info("Nic exists, attempting to sync IPClaim", "name", nicSpec.Name)
mocNic := nic.(network.Interface)
if err := s.IPAMService.SyncNicIPClaim(ctx, mocNic); err != nil {
s.Scope.GetLogger().Info("Failed to sync IPClaim during reconcile", "error", err)
// Non-blocking - don't fail NIC reconcile
}
return nil
}
logger := s.Scope.GetLogger()

logger := s.Scope.GetLogger()
nicConfig := &network.InterfaceIPConfigurationPropertiesFormat{}
nicConfig.Subnet = &network.APIEntityReference{
ID: to.StringPtr(nicSpec.VnetName),
Expand Down Expand Up @@ -101,7 +112,6 @@ func (s *Service) Reconcile(ctx context.Context, spec interface{}) error {
if len(nicSpec.IPConfigurations) > 0 {
logger.Info("Adding ipconfigurations to nic ", "len", len(nicSpec.IPConfigurations), "name", nicSpec.Name)
for _, ipconfig := range nicSpec.IPConfigurations {

networkIPConfig := network.InterfaceIPConfiguration{
Name: &ipconfig.Name,
InterfaceIPConfigurationPropertiesFormat: &network.InterfaceIPConfigurationPropertiesFormat{
Expand All @@ -127,20 +137,75 @@ func (s *Service) Reconcile(ctx context.Context, spec interface{}) error {
*networkInterface.IPConfigurations = append(*networkInterface.IPConfigurations, networkIPConfig)
}

_, err := s.Client.CreateOrUpdate(ctx,
// assign ipam IP to the moc nic object.
if err := s.IPAMService.AllocateNicIPClaim(ctx, networkInterface, nicSpec.StaticIPAddress); err != nil {
logger.Error(err, "Failed to allocate IPClaim for network interface", "name", nicSpec.Name)
}

logger.Info("creating network interface ", "name", nicSpec.Name)

createdNic, err := s.Client.CreateOrUpdate(ctx,
s.Scope.GetResourceGroup(),
nicSpec.Name,
&networkInterface)
telemetry.WriteMocOperationLog(s.Scope.GetLogger(), telemetry.CreateOrUpdate, s.Scope.GetCustomResourceTypeWithName(), telemetry.NetworkInterface,
telemetry.GenerateMocResourceName(s.Scope.GetResourceGroup(), nicSpec.Name), &networkInterface, err)
if err != nil {
return errors.Wrapf(err, "failed to create network interface %s in resource group %s", nicSpec.Name, s.Scope.GetResourceGroup())
if s.shouldRetryIfIPConflict(err, nicSpec) {
if createdNic, err = s.handleIPAddressConflictRetry(ctx, nicSpec, &networkInterface); err != nil {
return errors.Wrapf(err, "failed to retry create with network interface %s in resource group %s", nicSpec.Name, s.Scope.GetResourceGroup())
}
} else {
return errors.Wrapf(err, "failed to create network interface %s in resource group %s", nicSpec.Name, s.Scope.GetResourceGroup())
}
}

if err := s.IPAMService.SyncNicIPClaim(ctx, *createdNic); err != nil {
logger.Info("Failed to sync IPClaim after NIC creation", "error", err)
// Non-blocking - don't fail NIC reconcile
}

logger.Info("successfully created network interface ", "name", nicSpec.Name)
return err
}

// isIPConflictError checks if the error indicates an IP address conflict that should trigger a retry
func (s *Service) shouldRetryIfIPConflict(err error, nicSpec *Spec) bool {
// user specified static IP, no need to retry
if err == nil || nicSpec.StaticIPAddress != "" {
return false
}

// Check for the specific error pattern indicating IP address conflict
return mocerrors.IsAlreadySet(err)
}

func (s *Service) handleIPAddressConflictRetry(ctx context.Context, vnicSpec *Spec, networkInterface *network.Interface) (*network.Interface, error) {
logger := s.Scope.GetLogger()
logger.Info("IP allocated by IPAM is already taken in Moc, retrying", "Conflicted IP", (*networkInterface.IPConfigurations)[0].InterfaceIPConfigurationPropertiesFormat.PrivateIPAddress)

// Remove the failed mocnetworkinterface
if err := s.Delete(ctx, vnicSpec); err != nil {
return nil, err
}

for _, ipconfig := range *networkInterface.IPConfigurations {
ipconfig.InterfaceIPConfigurationPropertiesFormat.PrivateIPAddress = nil
}

logger.Info("Creating network interface with empty PrivateIPAddress")
// Recreate the mocnetworkinterface without the IPAM allocated IP
createdNic, err := s.Client.CreateOrUpdate(ctx,
s.Scope.GetResourceGroup(),
vnicSpec.Name,
networkInterface)

telemetry.WriteMocOperationLog(s.Scope.GetLogger(), telemetry.CreateOrUpdate, s.Scope.GetCustomResourceTypeWithName(), telemetry.NetworkInterface,
telemetry.GenerateMocResourceName(s.Scope.GetResourceGroup(), vnicSpec.Name), &networkInterface, err)

return createdNic, err
}

// Delete deletes the network interface with the provided name.
func (s *Service) Delete(ctx context.Context, spec interface{}) error {
telemetry.WriteMocInfoLog(ctx, s.Scope)
Expand All @@ -150,17 +215,52 @@ func (s *Service) Delete(ctx context.Context, spec interface{}) error {
}
logger := s.Scope.GetLogger()
logger.Info("deleting nic", "name", nicSpec.Name)
defer func() {
if err := s.IPAMService.DeleteNicIPClaim(ctx, nicSpec); err != nil {
logger.Error(err, "failed to delete IPAM claim for nic", "name", nicSpec.Name)
}
}()

err := s.Client.Delete(ctx, s.Scope.GetResourceGroup(), nicSpec.Name)
telemetry.WriteMocOperationLog(logger, telemetry.Delete, s.Scope.GetCustomResourceTypeWithName(), telemetry.NetworkInterface,
telemetry.GenerateMocResourceName(s.Scope.GetResourceGroup(), nicSpec.Name), nil, err)
if err != nil && azurestackhci.ResourceNotFound(err) {
// already deleted
return nil
}
if err != nil {
return errors.Wrapf(err, "failed to delete network interface %s in resource group %s", nicSpec.Name, s.Scope.GetResourceGroup())
}

err = s.ensureNicDeleted(ctx, nicSpec)
if err != nil {
return errors.Wrapf(err, "timed out waiting for deletion of network interface %s in resource group %s", nicSpec.Name, s.Scope.GetResourceGroup())
}

logger.Info("successfully deleted nic", "name", nicSpec.Name)
return err
}

// ensureNicDeleted ensures the network interface is deleted by polling Get with a 5 second timeout.
func (s *Service) ensureNicDeleted(ctx context.Context, nicSpec *Spec) error {
logger := s.Scope.GetLogger()

pollErr := wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) {
_, err := s.Get(ctx, nicSpec)
if err != nil {
if azurestackhci.ResourceNotFound(err) {
logger.Info("nic is deleted", "name", nicSpec.Name)
return true, nil // Deletion complete
}
logger.Error(err, "failed to get nic", "name", nicSpec.Name)
return false, err
}
logger.Info("nic still exists, waiting for deletion", "name", nicSpec.Name)
return false, nil // Continue polling
})

if pollErr != nil {
return errors.Wrapf(pollErr, "failed waiting for nic %s to be deleted", nicSpec.Name)
}

return nil
}
12 changes: 7 additions & 5 deletions cloud/services/networkinterfaces/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ var _ azurestackhci.Service = (*Service)(nil)

// Service provides operations on network interfaces
type Service struct {
Client networkinterface.InterfaceClient
Scope scope.ScopeInterface
Client networkinterface.InterfaceClient
Scope scope.ScopeInterface
IPAMService *IPAMService
}

// getNetworkInterfacesClient creates a new network interfaces client.
Expand All @@ -40,9 +41,10 @@ func getNetworkInterfacesClient(cloudAgentFqdn string, authorizer auth.Authorize
}

// NewService creates a new network interfaces service.
func NewService(scope scope.ScopeInterface) *Service {
func NewService(scope scope.ScopeInterface, ipamSvc *IPAMService) *Service {
return &Service{
Client: getNetworkInterfacesClient(scope.GetCloudAgentFqdn(), scope.GetAuthorizer()),
Scope: scope,
Client: getNetworkInterfacesClient(scope.GetCloudAgentFqdn(), scope.GetAuthorizer()),
Scope: scope,
IPAMService: ipamSvc,
}
}
2 changes: 1 addition & 1 deletion cloud/services/virtualmachines/virtualmachines.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func (s *Service) Reconcile(ctx context.Context, spec interface{}) error {

logger := s.Scope.GetLogger()
logger.Info("getting nic", "nic", vmSpec.NICName)
nicInterface, err := networkinterfaces.NewService(s.Scope).Get(ctx, &networkinterfaces.Spec{Name: vmSpec.NICName})
nicInterface, err := networkinterfaces.NewService(s.Scope, nil).Get(ctx, &networkinterfaces.Spec{Name: vmSpec.NICName})
if err != nil {
return err
}
Expand Down
1 change: 1 addition & 0 deletions cloud/telemetry/logutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const (
VipPool MocResourceType = "VipPool"
VirtualNetwork MocResourceType = "VirtualNetwork"
NetworkInterface MocResourceType = "NetworkInterface"
IPAddressClaim MocResourceType = "IPAddressClaim"
Disk MocResourceType = "Disk"
VirtualMachine MocResourceType = "VirtualMachine"
KeyVault MocResourceType = "KeyVault"
Expand Down
2 changes: 2 additions & 0 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"k8s.io/klog/v2"
"k8s.io/klog/v2/klogr"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
ipamv1 "sigs.k8s.io/cluster-api/exp/ipam/api/v1beta1"
"sigs.k8s.io/cluster-api/feature"
"sigs.k8s.io/cluster-api/util/record"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -61,6 +62,7 @@ func init() {
_ = clientgoscheme.AddToScheme(scheme)
_ = clusterv1.AddToScheme(scheme)
_ = infrav1beta1.AddToScheme(scheme)
_ = ipamv1.AddToScheme(scheme)
// +kubebuilder:scaffold:scheme
}

Expand Down
Loading