Skip to content

Commit ba946f7

Browse files
authored
Pause snapshot creation if disk is pending promotion or registration (vmware-tanzu#1397)
This PR updates VM Snapshot reconciliation to skip snapshot creation if the VM is pending disk promotion (prerequisite for disk registration) or pending unmanaged disk backfill/registration. This is required because a VM disk cannot be registered as a PVC on vSphere if a disk delta has been created by a snapshot. While here, moving the existing snapshot tests from `vmprovider_vm_test.go` to its own `vmprovider_vm_snapshot_test.go` file, and adding new tests to verify the updated snapshot reconciliation.
1 parent b278bb4 commit ba946f7

File tree

4 files changed

+425
-403
lines changed

4 files changed

+425
-403
lines changed

pkg/providers/vsphere/vmprovider_vm.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1461,6 +1461,15 @@ func (vs *vSphereVMProvider) reconcilePowerState(
14611461
return nil
14621462
}
14631463

1464+
func (vs *vSphereVMProvider) reconcileCurrentSnapshot(
1465+
vmCtx pkgctx.VirtualMachineContext,
1466+
vcVM *object.VirtualMachine) error {
1467+
1468+
vmCtx.Logger.V(4).Info("Reconciling current snapshot")
1469+
1470+
return ReconcileCurrentSnapshot(vmCtx, vs.k8sClient, vcVM)
1471+
}
1472+
14641473
func verifyConfigInfo(vmCtx pkgctx.VirtualMachineContext) error {
14651474
if vmCtx.MoVM.Config == nil {
14661475
return pkgerr.NoRequeueError{

pkg/providers/vsphere/vmprovider_vm_snapshot.go

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
vmopv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha5"
2727
backupapi "github.com/vmware-tanzu/vm-operator/pkg/backup/api"
2828
pkgcnd "github.com/vmware-tanzu/vm-operator/pkg/conditions"
29+
pkgcfg "github.com/vmware-tanzu/vm-operator/pkg/config"
2930
pkgconst "github.com/vmware-tanzu/vm-operator/pkg/constants"
3031
pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context"
3132
pkgerr "github.com/vmware-tanzu/vm-operator/pkg/errors"
@@ -35,6 +36,8 @@ import (
3536
pkgutil "github.com/vmware-tanzu/vm-operator/pkg/util"
3637
kubeutil "github.com/vmware-tanzu/vm-operator/pkg/util/kube"
3738
vmopv1util "github.com/vmware-tanzu/vm-operator/pkg/util/vmopv1"
39+
vmconfunmanagedvolsfil "github.com/vmware-tanzu/vm-operator/pkg/vmconfig/volumes/unmanaged/backfill"
40+
vmconfunmanagedvolsreg "github.com/vmware-tanzu/vm-operator/pkg/vmconfig/volumes/unmanaged/register"
3841
)
3942

4043
// DeleteSnapshot deletes the snapshot from the VM.
@@ -157,8 +160,9 @@ func (vs *vSphereVMProvider) SyncVMSnapshotTreeStatus(ctx context.Context, vm *v
157160
}
158161

159162
// markSnapshotInProgress marks a snapshot as currently being processed.
160-
func (vs *vSphereVMProvider) markSnapshotInProgress(
163+
func markSnapshotInProgress(
161164
vmCtx pkgctx.VirtualMachineContext,
165+
k8sClient ctrlclient.Client,
162166
vmSnapshot *vmopv1.VirtualMachineSnapshot) error {
163167
// Create a patch to set the InProgress condition
164168
patch := ctrlclient.MergeFrom(vmSnapshot.DeepCopy())
@@ -171,7 +175,7 @@ func (vs *vSphereVMProvider) markSnapshotInProgress(
171175
"snapshot in progress")
172176

173177
// Use Patch instead of Update to avoid conflicts with snapshot controller
174-
if err := vs.k8sClient.Status().Patch(vmCtx, vmSnapshot, patch); err != nil {
178+
if err := k8sClient.Status().Patch(vmCtx, vmSnapshot, patch); err != nil {
175179
return fmt.Errorf("failed to mark snapshot as in progress: %w", err)
176180
}
177181

@@ -180,7 +184,8 @@ func (vs *vSphereVMProvider) markSnapshotInProgress(
180184
}
181185

182186
// markSnapshotFailed marks a snapshot as failed and clears the in-progress status.
183-
func (vs *vSphereVMProvider) markSnapshotFailed(vmCtx pkgctx.VirtualMachineContext,
187+
func markSnapshotFailed(vmCtx pkgctx.VirtualMachineContext,
188+
k8sClient ctrlclient.Client,
184189
vmSnapshot *vmopv1.VirtualMachineSnapshot, err error) error {
185190
// Create a patch to update the snapshot status.
186191
patch := ctrlclient.MergeFrom(vmSnapshot.DeepCopy())
@@ -193,7 +198,7 @@ func (vs *vSphereVMProvider) markSnapshotFailed(vmCtx pkgctx.VirtualMachineConte
193198
err)
194199

195200
// Use Patch instead of Update to avoid conflicts with snapshot controller (best effort)
196-
if updateErr := vs.k8sClient.Status().Patch(vmCtx, vmSnapshot, patch); updateErr != nil {
201+
if updateErr := k8sClient.Status().Patch(vmCtx, vmSnapshot, patch); updateErr != nil {
197202
return fmt.Errorf("failed to update snapshot status after failure: %w", updateErr)
198203
}
199204

@@ -202,12 +207,13 @@ func (vs *vSphereVMProvider) markSnapshotFailed(vmCtx pkgctx.VirtualMachineConte
202207
}
203208

204209
// getVirtualMachineSnapshotsForVM finds all VirtualMachineSnapshot objects that reference this VM.
205-
func (vs *vSphereVMProvider) getVirtualMachineSnapshotsForVM(
206-
vmCtx pkgctx.VirtualMachineContext) ([]vmopv1.VirtualMachineSnapshot, error) {
210+
func getVirtualMachineSnapshotsForVM(
211+
vmCtx pkgctx.VirtualMachineContext,
212+
k8sClient ctrlclient.Client) ([]vmopv1.VirtualMachineSnapshot, error) {
207213

208214
// List all VirtualMachineSnapshot objects in the VM's namespace
209215
var snapshotList vmopv1.VirtualMachineSnapshotList
210-
if err := vs.k8sClient.List(vmCtx, &snapshotList, ctrlclient.InNamespace(vmCtx.VM.Namespace)); err != nil {
216+
if err := k8sClient.List(vmCtx, &snapshotList, ctrlclient.InNamespace(vmCtx.VM.Namespace)); err != nil {
211217
return nil, fmt.Errorf("failed to list VirtualMachineSnapshot objects: %w", err)
212218
}
213219

@@ -799,14 +805,16 @@ func synthesizeVMSpecForSnapshot(
799805
}
800806
}
801807

802-
func (vs *vSphereVMProvider) reconcileCurrentSnapshot(
808+
// ReconcileCurrentSnapshot reconciles the current snapshot owned by the VM.
809+
func ReconcileCurrentSnapshot(
803810
vmCtx pkgctx.VirtualMachineContext,
811+
k8sClient ctrlclient.Client,
804812
vcVM *object.VirtualMachine) error {
805813

806814
logger := pkglog.FromContextOrDefault(vmCtx)
807815

808816
// Find all VirtualMachineSnapshot objects that reference this VM
809-
vmSnapshots, err := vs.getVirtualMachineSnapshotsForVM(vmCtx)
817+
vmSnapshots, err := getVirtualMachineSnapshotsForVM(vmCtx, k8sClient)
810818
if err != nil {
811819
return err
812820
}
@@ -822,6 +830,36 @@ func (vs *vSphereVMProvider) reconcileCurrentSnapshot(
822830
return nil
823831
}
824832

833+
// When FastDeploy is enabled, wait for disk promotion sync to be ready.
834+
// This is prerequisite for the VM disk registration checked in next step.
835+
if pkgcfg.FromContext(vmCtx).Features.FastDeploy {
836+
837+
if vmCtx.VM.Spec.PromoteDisksMode != vmopv1.VirtualMachinePromoteDisksModeDisabled &&
838+
!pkgcnd.IsTrue(vmCtx.VM, vmopv1.VirtualMachineDiskPromotionSynced) {
839+
logger.Info("Skipping snapshot as required condition is not ready",
840+
"condition", vmopv1.VirtualMachineDiskPromotionSynced)
841+
return nil
842+
}
843+
}
844+
845+
// When AllDisksArePVCs is enabled, wait for VM's disks to be registered.
846+
// This is because vSphere cannot register disks as PVCs if there's disk
847+
// delta caused by snapshot creation.
848+
if pkgcfg.FromContext(vmCtx).Features.AllDisksArePVCs {
849+
850+
if !pkgcnd.IsTrue(vmCtx.VM, vmconfunmanagedvolsfil.Condition) {
851+
logger.Info("Skipping snapshot as required condition is not ready",
852+
"condition", vmconfunmanagedvolsfil.Condition)
853+
return nil
854+
}
855+
856+
if !pkgcnd.IsTrue(vmCtx.VM, vmconfunmanagedvolsreg.Condition) {
857+
logger.Info("Skipping snapshot as required condition is not ready",
858+
"condition", vmconfunmanagedvolsreg.Condition)
859+
return nil
860+
}
861+
}
862+
825863
// Sort snapshots by creation timestamp to ensure consistent ordering
826864
sort.Slice(vmSnapshots, func(i, j int) bool {
827865
return vmSnapshots[i].CreationTimestamp.Before(
@@ -875,7 +913,7 @@ func (vs *vSphereVMProvider) reconcileCurrentSnapshot(
875913
owner, err := controllerutil.HasOwnerReference(
876914
snapshotToProcess.OwnerReferences,
877915
vmCtx.VM,
878-
vs.k8sClient.Scheme())
916+
k8sClient.Scheme())
879917
if err != nil {
880918
return fmt.Errorf("failed to check if snapshot %q is owned by vm: %w",
881919
snapshotToProcess.Name, err)
@@ -899,7 +937,7 @@ func (vs *vSphereVMProvider) reconcileCurrentSnapshot(
899937

900938
// Mark the snapshot as in progress before starting the operation to
901939
// prevent concurrent snapshots.
902-
if err := vs.markSnapshotInProgress(vmCtx, snapshotToProcess); err != nil {
940+
if err := markSnapshotInProgress(vmCtx, k8sClient, snapshotToProcess); err != nil {
903941
return fmt.Errorf("failed to mark snapshot %q in progress: %w",
904942
snapshotToProcess.Name, err)
905943
}
@@ -918,7 +956,7 @@ func (vs *vSphereVMProvider) reconcileCurrentSnapshot(
918956
// taking a snapshot. So, if we are unable to clear the
919957
// in-progress condition because status patching fails, we
920958
// will forever be waiting.
921-
if err := vs.markSnapshotFailed(vmCtx, snapshotToProcess, err); err != nil {
959+
if err := markSnapshotFailed(vmCtx, k8sClient, snapshotToProcess, err); err != nil {
922960
return fmt.Errorf("failed to mark snapshot as failed: %w", err)
923961
}
924962
return fmt.Errorf("failed to create snapshot %q: %w",
@@ -929,7 +967,7 @@ func (vs *vSphereVMProvider) reconcileCurrentSnapshot(
929967
if err = kubeutil.PatchSnapshotSuccessStatus(
930968
vmCtx,
931969
logger,
932-
vs.k8sClient,
970+
k8sClient,
933971
snapshotToProcess,
934972
snapNode,
935973
vmCtx.VM.Spec.PowerState); err != nil {

0 commit comments

Comments
 (0)