Skip to content

Commit 9eefea3

Browse files
authored
Fix incorrect volume attach error handling in CNSNodeVMAttachment reconciler (#3590)
1 parent 470977f commit 9eefea3

File tree

1 file changed

+30
-35
lines changed

1 file changed

+30
-35
lines changed

pkg/syncer/cnsoperator/controller/cnsnodevmattachment/cnsnodevmattachment_controller.go

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -357,12 +357,15 @@ func (r *ReconcileCnsNodeVMAttachment) Reconcile(ctx context.Context,
357357
log.Infof("vSphere CSI driver is attaching volume: %q to nodevm: %+v for "+
358358
"CnsNodeVmAttachment request with name: %q on namespace: %q",
359359
volumeID, nodeVM, request.Name, request.Namespace)
360-
diskUUID, faulttype, attachErr := r.volumeManager.AttachVolume(internalCtx, nodeVM, volumeID, false)
361-
362-
if attachErr != nil {
360+
diskUUID, faulttype, err := r.volumeManager.AttachVolume(internalCtx, nodeVM, volumeID, false)
361+
if err != nil {
363362
log.Errorf("failed to attach disk: %q to nodevm: %+v for CnsNodeVmAttachment "+
364363
"request with name: %q on namespace: %q. Err: %+v",
365-
volumeID, nodeVM, request.Name, request.Namespace, attachErr)
364+
volumeID, nodeVM, request.Name, request.Namespace, err)
365+
instance.Status.Error = err.Error()
366+
_ = k8s.UpdateStatus(internalCtx, r.client, instance)
367+
recordEvent(internalCtx, r, instance, v1.EventTypeWarning, "")
368+
return reconcile.Result{RequeueAfter: timeout}, faulttype, nil
366369
}
367370

368371
pvc := &v1.PersistentVolumeClaim{}
@@ -383,7 +386,7 @@ func (r *ReconcileCnsNodeVMAttachment) Reconcile(ctx context.Context,
383386
}
384387
}
385388
if !cnsPvcFinalizerExists {
386-
faulttype, err = addFinalizerToPVC(internalCtx, r.client, pvc)
389+
_, err = addFinalizerToPVC(internalCtx, r.client, pvc)
387390
if err != nil {
388391
msg := fmt.Sprintf("failed to add %q finalizer on the PVC with volumename: %q on namespace: %q. Err: %+v",
389392
cnsoptypes.CNSPvcFinalizer, instance.Spec.VolumeName, instance.Namespace, err)
@@ -396,31 +399,27 @@ func (r *ReconcileCnsNodeVMAttachment) Reconcile(ctx context.Context,
396399
return reconcile.Result{RequeueAfter: timeout}, csifault.CSIInternalFault, nil
397400
}
398401
}
399-
if attachErr != nil {
400-
// Update CnsNodeVMAttachment instance with attach error message.
401-
instance.Status.Error = attachErr.Error()
402-
} else {
403-
// Add the CNS volume ID in the attachment metadata. This is used later
404-
// to detach the CNS volume on deletion of CnsNodeVMAttachment instance.
405-
// Note that the supervisor PVC can be deleted due to following:
406-
// 1. Bug in external provisioner(https://github.com/kubernetes/kubernetes/issues/84226)
407-
// where DeleteVolume could be invoked in pvcsi before ControllerUnpublishVolume.
408-
// This causes supervisor PVC to be deleted.
409-
// 2. Supervisor namespace user deletes PVC used by a guest cluster.
410-
// 3. Supervisor namespace is deleted
411-
// Basically, we cannot rely on the existence of PVC in supervisor
412-
// cluster for detaching the volume from guest cluster VM. So, the
413-
// logic stores the CNS volume ID in attachmentMetadata itself which
414-
// is used during detach.
415-
// Update CnsNodeVMAttachment instance with attached status set to true
416-
// and attachment metadata.
417-
instance.Status.AttachmentMetadata = make(map[string]string)
418-
instance.Status.AttachmentMetadata[v1a1.AttributeCnsVolumeID] = volumeID
419-
instance.Status.AttachmentMetadata[v1a1.AttributeFirstClassDiskUUID] = diskUUID
420-
instance.Status.Attached = true
421-
// Clear the error message.
422-
instance.Status.Error = ""
423-
}
402+
403+
// Add the CNS volume ID in the attachment metadata. This is used later
404+
// to detach the CNS volume on deletion of CnsNodeVMAttachment instance.
405+
// Note that the supervisor PVC can be deleted due to following:
406+
// 1. Bug in external provisioner(https://github.com/kubernetes/kubernetes/issues/84226)
407+
// where DeleteVolume could be invoked in pvcsi before ControllerUnpublishVolume.
408+
// This causes supervisor PVC to be deleted.
409+
// 2. Supervisor namespace user deletes PVC used by a guest cluster.
410+
// 3. Supervisor namespace is deleted
411+
// Basically, we cannot rely on the existence of PVC in supervisor
412+
// cluster for detaching the volume from guest cluster VM. So, the
413+
// logic stores the CNS volume ID in attachmentMetadata itself which
414+
// is used during detach.
415+
// Update CnsNodeVMAttachment instance with attached status set to true
416+
// and attachment metadata.
417+
instance.Status.AttachmentMetadata = make(map[string]string)
418+
instance.Status.AttachmentMetadata[v1a1.AttributeCnsVolumeID] = volumeID
419+
instance.Status.AttachmentMetadata[v1a1.AttributeFirstClassDiskUUID] = diskUUID
420+
instance.Status.Attached = true
421+
// Clear the error message.
422+
instance.Status.Error = ""
424423

425424
err = k8s.UpdateStatus(internalCtx, r.client, instance)
426425
if err != nil {
@@ -431,11 +430,6 @@ func (r *ReconcileCnsNodeVMAttachment) Reconcile(ctx context.Context,
431430
return reconcile.Result{RequeueAfter: timeout}, csifault.CSIApiServerOperationFault, nil
432431
}
433432

434-
if attachErr != nil {
435-
recordEvent(internalCtx, r, instance, v1.EventTypeWarning, "")
436-
return reconcile.Result{RequeueAfter: timeout}, faulttype, nil
437-
}
438-
439433
msg := fmt.Sprintf("ReconcileCnsNodeVMAttachment: Successfully updated entry in CNS for instance "+
440434
"with name %q and namespace %q.", request.Name, request.Namespace)
441435
recordEvent(internalCtx, r, instance, v1.EventTypeNormal, msg)
@@ -607,6 +601,7 @@ func (r *ReconcileCnsNodeVMAttachment) Reconcile(ctx context.Context,
607601
log.Infof("Finished Reconcile for CnsNodeVMAttachment request: %q", request.NamespacedName)
608602
return reconcile.Result{}, "", nil
609603
}
604+
610605
// creating new context for reconcileCnsNodeVMAttachmentInternal, as kubernetes supplied context can get canceled
611606
// This is required to ensure CNS operations won't get prematurely canceled by the controller runtime’s
612607
// internal reconcile logic.

0 commit comments

Comments
 (0)