diff --git a/pkg/cloud/azure/actuators/machine/actuator_test.go b/pkg/cloud/azure/actuators/machine/actuator_test.go index 7a0427ddb..287cc1ccc 100644 --- a/pkg/cloud/azure/actuators/machine/actuator_test.go +++ b/pkg/cloud/azure/actuators/machine/actuator_test.go @@ -89,13 +89,13 @@ func newMachine(t *testing.T, machineConfig machinev1.AzureMachineProviderSpec, } } -func newFakeScope(t *testing.T, label string) *actuators.MachineScope { +func newFakeMachineScope(t *testing.T, label string, cloudEnv string) *actuators.MachineScope { labels := make(map[string]string) labels[actuators.MachineRoleLabel] = label labels[machinev1.MachineClusterIDLabel] = "clusterID" machineConfig := machinev1.AzureMachineProviderSpec{} m := newMachine(t, machineConfig, labels) - return &actuators.MachineScope{ + return actuators.NewFakeMachineScope(actuators.FakeMachineScopeParams{ Context: context.Background(), Machine: m, MachineConfig: &machinev1.AzureMachineProviderSpec{ @@ -106,7 +106,8 @@ func newFakeScope(t *testing.T, label string) *actuators.MachineScope { ManagedIdentity: "dummyIdentity", }, MachineStatus: &machinev1.AzureMachineProviderStatus{}, - } + CloudEnv: cloudEnv, + }) } func newFakeReconciler(t *testing.T) *Reconciler { @@ -137,7 +138,7 @@ func newFakeReconciler(t *testing.T) *Reconciler { networkInterfacesSvc.EXPECT().Delete(gomock.Any(), gomock.Any()).Return(nil).AnyTimes() return &Reconciler{ - scope: newFakeScope(t, actuators.ControlPlane), + scope: newFakeMachineScope(t, actuators.ControlPlane, string(configv1.AzurePublicCloud)), networkInterfacesSvc: networkInterfacesSvc, virtualMachinesSvc: fakeVMSuccessSvc, virtualMachinesExtSvc: fakeSuccessSvc, @@ -447,7 +448,7 @@ func (s *FakeAvailabilityZonesService) Delete(ctx context.Context, spec azure.Sp } func TestAvailabilityZones(t *testing.T) { - fakeScope := newFakeScope(t, actuators.ControlPlane) + fakeScope := newFakeMachineScope(t, actuators.ControlPlane, string(configv1.AzurePublicCloud)) fakeReconciler := newFakeReconcilerWithScope(t, fakeScope) fakeReconciler.scope.MachineConfig.Zone = "2" @@ -491,7 +492,7 @@ func TestGetZone(t *testing.T) { } for _, tc := range testCases { - fakeScope := newFakeScope(t, actuators.ControlPlane) + fakeScope := newFakeMachineScope(t, actuators.ControlPlane, string(configv1.AzurePublicCloud)) fakeReconciler := newFakeReconcilerWithScope(t, fakeScope) fakeReconciler.scope.MachineConfig.Zone = tc.inputZone @@ -512,7 +513,7 @@ func TestGetZone(t *testing.T) { } func TestCustomUserData(t *testing.T) { - fakeScope := newFakeScope(t, actuators.Node) + fakeScope := newFakeMachineScope(t, actuators.Node, string(configv1.AzurePublicCloud)) userDataSecret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: "testCustomUserData", @@ -541,7 +542,7 @@ func TestCustomUserData(t *testing.T) { } func TestCustomDataFailures(t *testing.T) { - fakeScope := newFakeScope(t, actuators.Node) + fakeScope := newFakeMachineScope(t, actuators.Node, string(configv1.AzurePublicCloud)) userDataSecret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: "testCustomUserData", diff --git a/pkg/cloud/azure/actuators/machine/reconciler.go b/pkg/cloud/azure/actuators/machine/reconciler.go index a64703842..05115c5c3 100644 --- a/pkg/cloud/azure/actuators/machine/reconciler.go +++ b/pkg/cloud/azure/actuators/machine/reconciler.go @@ -529,6 +529,29 @@ func (s *Reconciler) Delete(ctx context.Context) error { return fmt.Errorf("failed to delete OS disk: %w", err) } + // On Azure Stack Hub, DeleteOption is not supported on the VM API. + // We need to manually delete data disks that have deletionPolicy set to Delete. + if s.scope.IsStackHub() && len(s.scope.MachineConfig.DataDisks) > 0 { + for _, disk := range s.scope.MachineConfig.DataDisks { + if disk.DeletionPolicy == machinev1.DiskDeletionPolicyTypeDelete { + dataDiskName := azure.GenerateDataDiskName(s.scope.Machine.Name, disk.NameSuffix) + + dataDiskSpec := &disks.Spec{ + Name: dataDiskName, + } + if err := s.disksSvc.Delete(ctx, dataDiskSpec); err != nil { + metrics.RegisterFailedInstanceDelete(&metrics.MachineLabels{ + Name: s.scope.Machine.Name, + Namespace: s.scope.Machine.Namespace, + Reason: "failed to delete data disk", + }) + + return fmt.Errorf("failed to delete data disk %s: %w", dataDiskName, err) + } + } + } + } + if s.scope.MachineConfig.Vnet == "" { return fmt.Errorf("MachineConfig vnet is missing on machine %s", s.scope.Machine.Name) } diff --git a/pkg/cloud/azure/actuators/machine/reconciler_test.go b/pkg/cloud/azure/actuators/machine/reconciler_test.go index aae049b2b..82ac49b59 100644 --- a/pkg/cloud/azure/actuators/machine/reconciler_test.go +++ b/pkg/cloud/azure/actuators/machine/reconciler_test.go @@ -11,11 +11,13 @@ import ( "github.com/Azure/go-autorest/autorest" "github.com/golang/mock/gomock" . "github.com/onsi/gomega" + configv1 "github.com/openshift/api/config/v1" machinev1 "github.com/openshift/api/machine/v1beta1" machinecontroller "github.com/openshift/machine-api-operator/pkg/controller/machine" "github.com/openshift/machine-api-provider-azure/pkg/cloud/azure" "github.com/openshift/machine-api-provider-azure/pkg/cloud/azure/actuators" mock_azure "github.com/openshift/machine-api-provider-azure/pkg/cloud/azure/mock" + "github.com/openshift/machine-api-provider-azure/pkg/cloud/azure/services/disks" "github.com/openshift/machine-api-provider-azure/pkg/cloud/azure/services/networkinterfaces" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -205,9 +207,11 @@ func TestSetMachineCloudProviderSpecificsTable(t *testing.T) { expectedSpecLabels map[string]string }{ { - name: "with a blank vm", - scope: func(t *testing.T) *actuators.MachineScope { return newFakeScope(t, "worker") }, - vm: armcompute.VirtualMachine{}, + name: "with a blank vm", + scope: func(t *testing.T) *actuators.MachineScope { + return newFakeMachineScope(t, "worker", string(configv1.AzurePublicCloud)) + }, + vm: armcompute.VirtualMachine{}, expectedLabels: map[string]string{ actuators.MachineRoleLabel: "worker", machinev1.MachineClusterIDLabel: "clusterID", @@ -218,8 +222,10 @@ func TestSetMachineCloudProviderSpecificsTable(t *testing.T) { expectedSpecLabels: nil, }, { - name: "with a running vm", - scope: func(t *testing.T) *actuators.MachineScope { return newFakeScope(t, "good-worker") }, + name: "with a running vm", + scope: func(t *testing.T) *actuators.MachineScope { + return newFakeMachineScope(t, "good-worker", string(configv1.AzurePublicCloud)) + }, vm: armcompute.VirtualMachine{ Properties: &armcompute.VirtualMachineProperties{ ProvisioningState: ptr.To[string]("Running"), @@ -235,8 +241,10 @@ func TestSetMachineCloudProviderSpecificsTable(t *testing.T) { expectedSpecLabels: nil, }, { - name: "with a VMSize set vm", - scope: func(t *testing.T) *actuators.MachineScope { return newFakeScope(t, "sized-worker") }, + name: "with a VMSize set vm", + scope: func(t *testing.T) *actuators.MachineScope { + return newFakeMachineScope(t, "sized-worker", string(configv1.AzurePublicCloud)) + }, vm: armcompute.VirtualMachine{ Properties: &armcompute.VirtualMachineProperties{ HardwareProfile: &armcompute.HardwareProfile{ @@ -255,8 +263,10 @@ func TestSetMachineCloudProviderSpecificsTable(t *testing.T) { expectedSpecLabels: nil, }, { - name: "with a vm location", - scope: func(t *testing.T) *actuators.MachineScope { return newFakeScope(t, "located-worker") }, + name: "with a vm location", + scope: func(t *testing.T) *actuators.MachineScope { + return newFakeMachineScope(t, "located-worker", string(configv1.AzurePublicCloud)) + }, vm: armcompute.VirtualMachine{ Location: ptr.To[string]("nowhere"), }, @@ -271,8 +281,10 @@ func TestSetMachineCloudProviderSpecificsTable(t *testing.T) { expectedSpecLabels: nil, }, { - name: "with a vm with zones", - scope: func(t *testing.T) *actuators.MachineScope { return newFakeScope(t, "zoned-worker") }, + name: "with a vm with zones", + scope: func(t *testing.T) *actuators.MachineScope { + return newFakeMachineScope(t, "zoned-worker", string(configv1.AzurePublicCloud)) + }, vm: armcompute.VirtualMachine{ Zones: abcZones, }, @@ -287,8 +299,10 @@ func TestSetMachineCloudProviderSpecificsTable(t *testing.T) { expectedSpecLabels: nil, }, { - name: "with a vm with a faultdomain set", - scope: func(t *testing.T) *actuators.MachineScope { return newFakeScope(t, "availabilityset-worker") }, + name: "with a vm with a faultdomain set", + scope: func(t *testing.T) *actuators.MachineScope { + return newFakeMachineScope(t, "availabilityset-worker", string(configv1.AzurePublicCloud)) + }, vm: armcompute.VirtualMachine{ Properties: &armcompute.VirtualMachineProperties{ InstanceView: &armcompute.VirtualMachineInstanceView{ @@ -309,7 +323,7 @@ func TestSetMachineCloudProviderSpecificsTable(t *testing.T) { { name: "with a vm on spot", scope: func(t *testing.T) *actuators.MachineScope { - scope := newFakeScope(t, "spot-worker") + scope := newFakeMachineScope(t, "spot-worker", string(configv1.AzurePublicCloud)) scope.MachineConfig.SpotVMOptions = &machinev1.SpotVMOptions{} return scope }, @@ -708,7 +722,9 @@ func TestMachineUpdateWithProvisionsngFailedNic(t *testing.T) { testCtx := context.TODO() - scope := func(t *testing.T) *actuators.MachineScope { return newFakeScope(t, "worker") } + scope := func(t *testing.T) *actuators.MachineScope { + return newFakeMachineScope(t, "worker", string(configv1.AzurePublicCloud)) + } r := newFakeReconcilerWithScope(t, scope(t)) r.networkInterfacesSvc = networkSvc @@ -744,3 +760,161 @@ func TestMachineUpdateWithProvisionsngFailedNic(t *testing.T) { }) } } + +// TestStackHubDataDiskDeletion tests that data disks with DeletionPolicy=Delete are manually deleted on Azure Stack Hub +func TestStackHubDataDiskDeletion(t *testing.T) { + testCases := []struct { + name string + isStackHub bool + dataDisks []machinev1.DataDisk + }{ + { + name: "Stack Hub with single data disk with Delete policy", + isStackHub: true, + dataDisks: []machinev1.DataDisk{ + { + NameSuffix: "disk1", + DiskSizeGB: 128, + Lun: 0, + DeletionPolicy: machinev1.DiskDeletionPolicyTypeDelete, + ManagedDisk: machinev1.DataDiskManagedDiskParameters{StorageAccountType: machinev1.StorageAccountPremiumLRS}, + }, + }, + }, + { + name: "Stack Hub with multiple data disks with Delete policy", + isStackHub: true, + dataDisks: []machinev1.DataDisk{ + { + NameSuffix: "disk1", + DiskSizeGB: 128, + Lun: 0, + DeletionPolicy: machinev1.DiskDeletionPolicyTypeDelete, + ManagedDisk: machinev1.DataDiskManagedDiskParameters{StorageAccountType: machinev1.StorageAccountPremiumLRS}, + }, + { + NameSuffix: "disk2", + DiskSizeGB: 256, + Lun: 1, + DeletionPolicy: machinev1.DiskDeletionPolicyTypeDelete, + ManagedDisk: machinev1.DataDiskManagedDiskParameters{StorageAccountType: machinev1.StorageAccountPremiumLRS}, + }, + }, + }, + { + name: "Stack Hub with data disk with Detach policy", + isStackHub: true, + dataDisks: []machinev1.DataDisk{ + { + NameSuffix: "disk1", + DiskSizeGB: 128, + Lun: 0, + DeletionPolicy: machinev1.DiskDeletionPolicyTypeDetach, + ManagedDisk: machinev1.DataDiskManagedDiskParameters{StorageAccountType: machinev1.StorageAccountPremiumLRS}, + }, + }, + }, + { + name: "Stack Hub with mixed deletion policies", + isStackHub: true, + dataDisks: []machinev1.DataDisk{ + { + NameSuffix: "disk1", + DiskSizeGB: 128, + Lun: 0, + DeletionPolicy: machinev1.DiskDeletionPolicyTypeDelete, + ManagedDisk: machinev1.DataDiskManagedDiskParameters{StorageAccountType: machinev1.StorageAccountPremiumLRS}, + }, + { + NameSuffix: "disk2", + DiskSizeGB: 256, + Lun: 1, + DeletionPolicy: machinev1.DiskDeletionPolicyTypeDetach, + ManagedDisk: machinev1.DataDiskManagedDiskParameters{StorageAccountType: machinev1.StorageAccountPremiumLRS}, + }, + }, + }, + { + name: "Non-Stack Hub with data disks", + isStackHub: false, + dataDisks: []machinev1.DataDisk{ + { + NameSuffix: "disk1", + DiskSizeGB: 128, + Lun: 0, + DeletionPolicy: machinev1.DiskDeletionPolicyTypeDelete, + ManagedDisk: machinev1.DataDiskManagedDiskParameters{StorageAccountType: machinev1.StorageAccountPremiumLRS}, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + mockCtrl := gomock.NewController(t) + + vmSvc := mock_azure.NewMockService(mockCtrl) + disksSvc := mock_azure.NewMockService(mockCtrl) + networkSvc := mock_azure.NewMockService(mockCtrl) + availabilitySetsSvc := mock_azure.NewMockService(mockCtrl) + + testCtx := context.TODO() + + platformType := string(configv1.AzureStackCloud) + if !tc.isStackHub { + platformType = string(configv1.AzurePublicCloud) + } + scope := newFakeMachineScope(t, "worker", platformType) + scope.MachineConfig.DataDisks = tc.dataDisks + + r := &Reconciler{ + scope: scope, + virtualMachinesSvc: vmSvc, + disksSvc: disksSvc, + networkInterfacesSvc: networkSvc, + availabilitySetsSvc: availabilitySetsSvc, + } + + // Expect VM deletion + vmSvc.EXPECT().Delete(testCtx, gomock.Any()).Return(nil).Times(1) + + // Expect OS disk deletion - always happens + expectedOSDiskName := azure.GenerateOSDiskName(scope.Machine.Name) + disksSvc.EXPECT().Delete(testCtx, gomock.Any()).DoAndReturn( + func(ctx context.Context, spec azure.Spec) error { + diskSpec, ok := spec.(*disks.Spec) + g.Expect(ok).To(BeTrue(), "spec should be a disk spec") + g.Expect(diskSpec.Name).To(Equal(expectedOSDiskName), "OS disk name should match") + return nil + }, + ).Times(1) + + // Expect data disk deletions only for disks with Delete policy on Stack Hub + if tc.isStackHub { + for _, disk := range tc.dataDisks { + if disk.DeletionPolicy == machinev1.DiskDeletionPolicyTypeDelete { + expectedDataDiskName := azure.GenerateDataDiskName(scope.Machine.Name, disk.NameSuffix) + disksSvc.EXPECT().Delete(testCtx, gomock.Any()).DoAndReturn( + func(ctx context.Context, spec azure.Spec) error { + diskSpec, ok := spec.(*disks.Spec) + g.Expect(ok).To(BeTrue(), "spec should be a disk spec") + g.Expect(diskSpec.Name).To(Equal(expectedDataDiskName), "data disk name should match") + return nil + }, + ).Times(1) + } + } + } + + // Expect network interface deletion + networkSvc.EXPECT().Delete(testCtx, gomock.Any()).Return(nil).Times(1) + + // Expect availability set deletion + availabilitySetsSvc.EXPECT().Delete(testCtx, gomock.Any()).Return(nil).Times(1) + + err := r.Delete(testCtx) + g.Expect(err).To(BeNil()) + }) + } +} diff --git a/pkg/cloud/azure/actuators/machine_scope_test_helper.go b/pkg/cloud/azure/actuators/machine_scope_test_helper.go new file mode 100644 index 000000000..6591c3c46 --- /dev/null +++ b/pkg/cloud/azure/actuators/machine_scope_test_helper.go @@ -0,0 +1,57 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package actuators + +import ( + "context" + + machinev1 "github.com/openshift/api/machine/v1beta1" + controllerclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +// NewFakeMachineScope creates a MachineScope for testing purposes. +// It can set private fields that are not accessible through NewMachineScope. +// This is only intended for use in tests. +func NewFakeMachineScope(params FakeMachineScopeParams) *MachineScope { + scope := &MachineScope{ + Context: params.Context, + AzureClients: params.AzureClients, + Machine: params.Machine, + CoreClient: params.CoreClient, + MachineConfig: params.MachineConfig, + MachineStatus: params.MachineStatus, + ClusterName: params.ClusterName, + cloudEnv: params.CloudEnv, + armEndpoint: params.ARMEndpoint, + Tags: params.Tags, + } + return scope +} + +// FakeMachineScopeParams defines parameters for creating a test MachineScope. +type FakeMachineScopeParams struct { + AzureClients + Context context.Context + Machine *machinev1.Machine + CoreClient controllerclient.Client + MachineConfig *machinev1.AzureMachineProviderSpec + MachineStatus *machinev1.AzureMachineProviderStatus + ClusterName string + CloudEnv string + ARMEndpoint string + Tags map[string]*string +} diff --git a/pkg/cloud/azure/services/virtualmachines/virtualmachines.go b/pkg/cloud/azure/services/virtualmachines/virtualmachines.go index 49d0d6327..e578bae30 100644 --- a/pkg/cloud/azure/services/virtualmachines/virtualmachines.go +++ b/pkg/cloud/azure/services/virtualmachines/virtualmachines.go @@ -244,7 +244,7 @@ func (s *Service) deriveVirtualMachineParameters(vmSpec *Spec, nicID string) (*a return nil, fmt.Errorf("failed to get Spot VM options %w", err) } - dataDisks, err := generateDataDisks(vmSpec) + dataDisks, err := generateDataDisks(vmSpec, s.Scope.IsStackHub()) if err != nil { return nil, fmt.Errorf("failed to generate data disk spec: %w", err) } @@ -540,7 +540,7 @@ func generateSecurityProfile(vmSpec *Spec, osDisk *armcompute.OSDisk) (*armcompu return securityProfile, nil } -func generateDataDisks(vmSpec *Spec) ([]*armcompute.DataDisk, error) { +func generateDataDisks(vmSpec *Spec, isStackHub bool) ([]*armcompute.DataDisk, error) { seenDataDiskLuns := make(map[int32]struct{}) seenDataDiskNames := make(map[string]struct{}) // defines rules for matching. strings must start and finish with an alphanumeric character @@ -605,6 +605,13 @@ func generateDataDisks(vmSpec *Spec) ([]*armcompute.DataDisk, error) { dataDiskName, vmSpec.Name, disk.DeletionPolicy, machinev1.DiskDeletionPolicyTypeDelete, machinev1.DiskDeletionPolicyTypeDetach) } + // DiskEncryptionSet is not supported on Azure Stack Hub + if disk.ManagedDisk.DiskEncryptionSet != nil && isStackHub { + return nil, apierrors.InvalidMachineConfiguration("failed to create Data Disk: %s for vm %s. "+ + "`diskEncryptionSet` is not supported on Azure Stack Hub.", + dataDiskName, vmSpec.Name) + } + seenDataDiskNames[disk.NameSuffix] = struct{}{} seenDataDiskLuns[disk.Lun] = struct{}{} @@ -614,7 +621,11 @@ func generateDataDisks(vmSpec *Spec) ([]*armcompute.DataDisk, error) { Lun: to.Int32Ptr(disk.Lun), Name: to.StringPtr(dataDiskName), Caching: ptr.To(armcompute.CachingTypes(disk.CachingType)), - DeleteOption: ptr.To(armcompute.DiskDeleteOptionTypes(disk.DeletionPolicy)), + } + + // DeleteOption not supported on Azure Stack Hub API, disks are manually deleted in reconciler based on deletionPolicy + if !isStackHub { + dataDisks[i].DeleteOption = ptr.To(armcompute.DiskDeleteOptionTypes(disk.DeletionPolicy)) } dataDisks[i].ManagedDisk = &armcompute.ManagedDiskParameters{ diff --git a/pkg/cloud/azure/services/virtualmachines/virtualmachines_stack.go b/pkg/cloud/azure/services/virtualmachines/virtualmachines_stack.go index 681d7d74f..72ad86f36 100644 --- a/pkg/cloud/azure/services/virtualmachines/virtualmachines_stack.go +++ b/pkg/cloud/azure/services/virtualmachines/virtualmachines_stack.go @@ -44,6 +44,11 @@ func (s *Service) deriveVirtualMachineParametersStackHub(vmSpec *Spec, nicID str } } + dataDisks, err := generateDataDisks(vmSpec, s.Scope.IsStackHub()) + if err != nil { + return nil, fmt.Errorf("failed to generate data disk spec: %w", err) + } + virtualMachine := &compute.VirtualMachine{ Location: ptr.To(s.Scope.MachineConfig.Location), Tags: vmSpec.Tags, @@ -62,6 +67,7 @@ func (s *Service) deriveVirtualMachineParametersStackHub(vmSpec *Spec, nicID str StorageAccountType: ptr.To(compute.StorageAccountTypes(vmSpec.OSDisk.ManagedDisk.StorageAccountType)), }, }, + DataDisks: dataDisks, }, OSProfile: osProfile, NetworkProfile: &compute.NetworkProfile{