Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions e2e/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,14 @@ func validateOmapCount(f *framework.Framework, count int, driver, pool, mode str
radosLsKeysCmd: "rados listomapkeys csi.groups.default " + cephfsOptions(pool),
radosLsKeysCmdFilter: fmt.Sprintf("rados listomapkeys csi.groups.default %s | wc -l", cephfsOptions(pool)),
},
{
volumeMode: groupSnapsType,
driverType: rbdType,
radosLsCmd: "rados ls" + rbdOptions(pool),
radosLsCmdFilter: fmt.Sprintf("rados ls %s | grep -v default | grep -c ^csi.volume.group.", rbdOptions(pool)),
radosLsKeysCmd: "rados listomapkeys csi.groups.default " + rbdOptions(pool),
radosLsKeysCmdFilter: fmt.Sprintf("rados listomapkeys csi.groups.default %s | wc -l", rbdOptions(pool)),
},
}

for _, cmds := range radosListCommands {
Expand Down
8 changes: 8 additions & 0 deletions e2e/volumegroupsnapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,14 @@ func (rvgs *rbdVolumeGroupSnapshot) ValidateResourcesForCreate(vgs *groupsnapapi

func (rvgs *rbdVolumeGroupSnapshot) ValidateResourcesForDelete() error {
validateOmapCount(rvgs.framework, 0, rbdType, defaultRBDPool, volumesType)
validateOmapCount(rvgs.framework, 0, rbdType, defaultRBDPool, snapsType)
validateOmapCount(rvgs.framework, 0, rbdType, defaultRBDPool, groupSnapsType)
validateRBDImageCount(rvgs.framework, 0, defaultRBDPool)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a validation of no images in the pool itself. validateRBDImageCount

err := waitToRemoveImagesFromTrash(rvgs.framework, defaultRBDPool, deployTimeout)
if err != nil {
return err
}

return nil
}
22 changes: 2 additions & 20 deletions internal/rbd/controllerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -1471,27 +1471,9 @@ func (cs *ControllerServer) DeleteSnapshot(
// Deleting snapshot and cloned volume
log.DebugLog(ctx, "deleting cloned rbd volume %s", rbdSnap.RbdSnapName)

rbdVol := rbdSnap.toVolume()

err = rbdVol.Connect(cr)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
defer rbdVol.Destroy(ctx)

rbdVol.ImageID = rbdSnap.ImageID
// update parent name to delete the snapshot
rbdSnap.RbdImageName = rbdVol.RbdImageName
err = cleanUpSnapshot(ctx, rbdVol, rbdSnap, rbdVol)
if err != nil {
log.ErrorLog(ctx, "failed to delete image: %v", err)

return nil, status.Error(codes.Internal, err.Error())
}
err = undoSnapReservation(ctx, rbdSnap, cr)
err = rbdSnap.Delete(ctx)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the commit message refers to a cherry-picked commit. Please remove that line, or make sure this PR depends on the PR that adds that commit.

if err != nil {
log.ErrorLog(ctx, "failed to remove reservation for snapname (%s) with backing snap (%s) on image (%s) (%s)",
rbdSnap.RequestName, rbdSnap.RbdSnapName, rbdSnap.RbdImageName, err)
log.ErrorLog(ctx, "failed to delete rbd snapshot: %s with error: %v", rbdSnap, err)

return nil, status.Error(codes.Internal, err.Error())
}
Expand Down
15 changes: 12 additions & 3 deletions internal/rbd/group/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,15 @@ func (cvg *commonVolumeGroup) GetName(ctx context.Context) (string, error) {
return cvg.name, nil
}

// GetRequestName returns the requestName of the VolumeGroup.
func (cvg *commonVolumeGroup) GetRequestName(ctx context.Context) (string, error) {
if cvg.requestName == "" {
return "", errors.New("BUG: requestName is not set")
}

return cvg.requestName, nil
}

// GetPool returns the name of the pool that holds the VolumeGroup.
func (cvg *commonVolumeGroup) GetPool(ctx context.Context) (string, error) {
if cvg.pool == "" {
Expand Down Expand Up @@ -291,9 +300,9 @@ func (cvg *commonVolumeGroup) Delete(ctx context.Context) error {
return fmt.Errorf("failed to get name for volume group %q: %w", cvg, err)
}

csiID, err := cvg.GetID(ctx)
reqName, err := cvg.GetRequestName(ctx)
if err != nil {
return fmt.Errorf("failed to get id for volume group %q: %w", cvg, err)
return fmt.Errorf("failed to get request name for volume group %q: %w", cvg, err)
}

pool, err := cvg.GetPool(ctx)
Expand All @@ -306,7 +315,7 @@ func (cvg *commonVolumeGroup) Delete(ctx context.Context) error {
return err
}

err = j.UndoReservation(ctx, pool, name, csiID)
err = j.UndoReservation(ctx, pool, name, reqName)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, looks like UndoReservation() might be improved to return ErrNotFound so that this bug could have been caught earlier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are ignoring the ErrObjectNotFound error, which seems to have been present since the beginning in the volume journal implementation as well.

if err != nil {
if !errors.Is(err, util.ErrObjectNotFound) {
log.ErrorLog(ctx, "failed removing oMap %s (%s)", cj.cephUUIDDirectoryPrefix+imageUUID, err)
return err
}
}
}

Let's address this in different PR? @Madhu-1

if err != nil /* TODO? !errors.Is(..., err) */ {
return fmt.Errorf("failed to undo the reservation for volume group %q: %w", cvg, err)
}
Expand Down
4 changes: 4 additions & 0 deletions internal/rbd/rbd_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,10 @@ func (ri *rbdImage) Delete(ctx context.Context) error {
rbdImage := librbd.GetImage(ri.ioctx, image)
err = rbdImage.Trash(0)
if err != nil {
if errors.Is(err, librbd.ErrNotFound) {
return fmt.Errorf("Failed as %w (internal %w)", ErrImageNotFound, err)
}

log.ErrorLog(ctx, "failed to delete rbd image: %s, error: %v", ri, err)

return err
Expand Down
38 changes: 37 additions & 1 deletion internal/rbd/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func cleanUpSnapshot(
) error {
err := parentVol.deleteSnapshot(ctx, rbdSnap)
if err != nil {
if !errors.Is(err, ErrSnapNotFound) {
if !errors.Is(err, ErrImageNotFound) && !errors.Is(err, ErrSnapNotFound) {
log.ErrorLog(ctx, "failed to delete snapshot %q: %v", rbdSnap, err)

return err
Expand Down Expand Up @@ -162,6 +162,42 @@ func (rbdSnap *rbdSnapshot) ToCSI(ctx context.Context) (*csi.Snapshot, error) {
}, nil
}

// Delete removes the snapshot from the RBD image and then
// the RBD image itself. If the backing RBD snapshot and image is removed
// successfully, the reservation for the snapshot is removed from the journal.
//
// NOTE: As the function manipulates omaps, it should be called with a lock against the request name
// held, to prevent parallel operations from modifying the state of the omaps for this request name.
func (rbdSnap *rbdSnapshot) Delete(ctx context.Context) error {
rbdVol := rbdSnap.toVolume()

err := rbdVol.Connect(rbdSnap.conn.Creds)
if err != nil {
return err
}
defer rbdVol.Destroy(ctx)

rbdVol.ImageID = rbdSnap.ImageID
// update parent name to delete the snapshot
rbdSnap.RbdImageName = rbdVol.RbdImageName
err = cleanUpSnapshot(ctx, rbdVol, rbdSnap, rbdVol)
if err != nil {
log.ErrorLog(ctx, "failed to cleanup image %s and snapshot %s: %v", rbdVol, rbdSnap, err)

return err
}

err = undoSnapReservation(ctx, rbdSnap, rbdSnap.conn.Creds)
if err != nil {
log.ErrorLog(ctx, "failed to remove reservation for snapname (%s) with backing snap (%s) on image (%s) (%s)",
rbdSnap.RequestName, rbdSnap.RbdSnapName, rbdSnap.RbdImageName, err)

return err
}

return nil
}

func undoSnapshotCloning(
ctx context.Context,
parentVol *rbdVolume,
Expand Down
Loading