Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 35 additions & 9 deletions internal/controllers/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package controllers
import (
"errors"
"fmt"
"slices"

"github.com/ceph/go-ceph/rados"
librbd "github.com/ceph/go-ceph/rbd"
Expand Down Expand Up @@ -75,7 +76,7 @@ func openImage(ioCtx *rados.IOContext, imageName string) (*librbd.Image, error)
img, err := librbd.OpenImage(ioCtx, imageName, librbd.NoSnapshot)
if err != nil {
if !errors.Is(err, librbd.ErrNotFound) {
return nil, fmt.Errorf("failed to open image: %w", err)
return nil, fmt.Errorf("failed to open image %s: %w", imageName, err)
}
return nil, err
}
Expand All @@ -87,7 +88,7 @@ func flattenImage(log logr.Logger, conn *rados.Conn, pool string, imageName stri

ioCtx, err := conn.OpenIOContext(pool)
if err != nil {
return fmt.Errorf("unable to open io context: %w", err)
return fmt.Errorf("unable to open io context for pool %s: %w", pool, err)
}
defer ioCtx.Destroy()

Expand All @@ -98,7 +99,7 @@ func flattenImage(log logr.Logger, conn *rados.Conn, pool string, imageName stri
defer closeImage(log, img)

if err := img.Flatten(); err != nil {
return fmt.Errorf("failed to flatten cloned image: %w", err)
return fmt.Errorf("failed to flatten cloned image %s: %w", imageName, err)
}
log.V(2).Info("Flattened cloned image", "clonedImageId", imageName)
return nil
Expand All @@ -113,12 +114,12 @@ func createSnapshot(log logr.Logger, ioCtx *rados.IOContext, snapshotName string

imgSnap, err := img.CreateSnapshot(snapshotName)
if err != nil {
return fmt.Errorf("unable to create snapshot: %w", err)
return fmt.Errorf("unable to create snapshot %s: %w", snapshotName, err)
}
log.Info("Snapshot created")

if err := imgSnap.Protect(); err != nil {
return fmt.Errorf("unable to protect snapshot: %w", err)
return fmt.Errorf("unable to protect snapshot %s: %w", snapshotName, err)
}

if err := img.SetSnapshot(snapshotName); err != nil {
Expand Down Expand Up @@ -167,11 +168,36 @@ func snapshotExists(log logr.Logger, ioCtx *rados.IOContext, imageName string, s
}
defer closeImage(log, img)

if _, err = img.GetSnapID(snapshotName); err != nil {
if errors.Is(err, librbd.ErrNotFound) {
return false, nil
snapshot := img.GetSnapshot(snapshotName)
if isProtected, err := snapshot.IsProtected(); err != nil {
if !errors.Is(err, librbd.ErrNotFound) {
return false, fmt.Errorf("failed to check if snapshot %s is protected: %w", snapshotName, err)
}
return false, nil // Snapshot doesn't exist
} else if isProtected {
log.V(2).Info("Snapshot already exists and is protected, skipping protection.", "snapshotName", snapshotName)
} else {
// Snapshot exists but not protected - just protect it
log.V(2).Info("Snapshot exists but is not protected, protecting it", "snapshotName", snapshotName)
if err := snapshot.Protect(); err != nil {
return false, fmt.Errorf("unable to protect existing snapshot %s: %w", snapshotName, err)
}
if err := img.SetSnapshot(snapshotName); err != nil {
return false, fmt.Errorf("failed to set snapshot %s for image %s: %w", snapshotName, imageName, err)
}
return false, fmt.Errorf("failed to get snapshot ID: %w", err)
}
return true, nil
}

func isRbdImageExisting(ioCtx *rados.IOContext, imageID string) (bool, error) {
images, err := librbd.GetImageNames(ioCtx)
if err != nil {
return false, fmt.Errorf("failed to list images: %w", err)
}

if slices.Contains(images, imageID) {
return true, nil
}

return false, nil
}
27 changes: 20 additions & 7 deletions internal/controllers/image_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,14 +269,10 @@ func (r *ImageReconciler) deleteImageSnapshots(ctx context.Context, log logr.Log
return fmt.Errorf("failed to create snapshot clone: %w", err)
}

isSnapshotExist, err := snapshotExists(log, ioCtx, ImageIDToRBDID(snapName), snapName)
if err != nil {
return fmt.Errorf("failed to check if snapshot exists: %w", err)
}
if isSnapshotExist {
log.V(2).Info("Snapshot of cloned image is already created")
if isSnapshotExist, err := snapshotExists(log, ioCtx, ImageIDToRBDID(snapName), snapName); err == nil && isSnapshotExist {
continue
Comment on lines +272 to 273
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Propagate snapshotExists errors in cleanup flow.

At Line 272, errors are ignored and execution continues. If Ceph lookup fails, the code may attempt snapshot creation/removal decisions on incomplete information.

💡 Proposed fix
-		if isSnapshotExist, err := snapshotExists(log, ioCtx, ImageIDToRBDID(snapName), snapName); err == nil && isSnapshotExist {
-			continue
-		}
+		if isSnapshotExist, err := snapshotExists(log, ioCtx, ImageIDToRBDID(snapName), snapName); err != nil {
+			return fmt.Errorf("failed to check snapshot %s existence on cloned image: %w", snapName, err)
+		} else if isSnapshotExist {
+			continue
+		}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controllers/image_controller.go` around lines 272 - 273, The cleanup
flow currently ignores errors from snapshotExists (called with log, ioCtx,
ImageIDToRBDID(snapName), snapName) and proceeds as if the snapshot check
succeeded; change the logic to propagate the error instead of skipping it —
i.e., if snapshotExists returns a non-nil err, return or surface that error from
the surrounding function (or handle it consistently with existing error handling
in this function) so snapshot creation/removal is not attempted on incomplete
information; ensure the call site uses the err value from snapshotExists and
does not treat only the boolean result as authoritative.

}

log.V(2).Info("Create snapshot of cloned image", "clonedImageId", snapName)
if err := createSnapshot(log, ioCtx, snapName, ImageIDToRBDID(snapName)); err != nil {
return fmt.Errorf("failed to create snapshot of cloned image: %w", err)
Expand Down Expand Up @@ -742,7 +738,6 @@ func (r *ImageReconciler) createImageFromSnapshot(ctx context.Context, log logr.
}

log.V(1).Info("snapshot not found", "snapshotID", snapshotRef)

return false, nil
}

Expand All @@ -751,6 +746,24 @@ func (r *ImageReconciler) createImageFromSnapshot(ctx context.Context, log logr.
return false, nil
}

if image.Spec.Image != "" {
log.V(2).Info("Check if snapshot rbd image already exist")
imageExists, err := isRbdImageExisting(ioCtx, SnapshotIDToRBDID(snapshotRef))
if err != nil {
return false, fmt.Errorf("failed to check snapshot rbd image existence: %w", err)
}
log.V(1).Info("Checked snapshot rbd image existence", "imageExists", imageExists)

if !imageExists && snapshot.Status.State == providerapi.SnapshotStateReady {
log.V(1).Info("Snapshot rbd image does not exist. Mark snapshot as failed to trigger rbd image creation in next reconciliation loop")
snapshot.Status.State = providerapi.SnapshotStateFailed
if _, err := r.snapshots.Update(ctx, snapshot); store.IgnoreErrNotFound(err) != nil {
return false, fmt.Errorf("failed to update snapshot to trigger rbd image creation: %w", err)
}
return false, nil
}
}
Comment on lines +749 to +765
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Drive parent-image existence checks from snapshot.Source, not image.Spec.Image.

At Line 749, the check only runs when image.Spec.Image != "". That skips validation for SnapshotRef-driven reconciliations where the snapshot source still requires a parent RBD image. Use getSnapshotSourceDetails(snapshot) and check existence of parentName directly.

💡 Proposed fix
-	if image.Spec.Image != "" {
+	parentName, snapName, err := getSnapshotSourceDetails(snapshot)
+	if err != nil {
+		return false, fmt.Errorf("failed to get snapshot source details: %w", err)
+	}
+
+	if snapshot.Source.IronCoreImage != "" || snapshot.Source.VolumeImageID != "" {
 		log.V(2).Info("Check if snapshot rbd image already exist")
-		imageExists, err := isRbdImageExisting(ioCtx, SnapshotIDToRBDID(snapshotRef))
+		imageExists, err := isRbdImageExisting(ioCtx, parentName)
 		if err != nil {
 			return false, fmt.Errorf("failed to check snapshot rbd image existence: %w", err)
 		}
 		log.V(1).Info("Checked snapshot rbd image existence", "imageExists", imageExists)
 
 		if !imageExists && snapshot.Status.State == providerapi.SnapshotStateReady {
 			log.V(1).Info("Snapshot rbd image does not exist. Mark snapshot as failed to trigger rbd image creation in next reconciliation loop")
 			snapshot.Status.State = providerapi.SnapshotStateFailed
 			if _, err := r.snapshots.Update(ctx, snapshot); store.IgnoreErrNotFound(err) != nil {
 				return false, fmt.Errorf("failed to update snapshot to trigger rbd image creation: %w", err)
 			}
 			return false, nil
 		}
 	}
-
-	parentName, snapName, err := getSnapshotSourceDetails(snapshot)
-	if err != nil {
-		return false, fmt.Errorf("failed to get snapshot source details: %w", err)
-	}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controllers/image_controller.go` around lines 749 - 765, The check
that gates parent RBD image existence must be driven by the snapshot source, not
by image.Spec.Image; replace the conditional that uses image.Spec.Image with
logic that calls getSnapshotSourceDetails(snapshot) to obtain parentName and
then call isRbdImageExisting(ioCtx, SnapshotIDToRBDID(parentName)) (or
equivalent RBD id conversion) to determine existence; keep the same error
handling and the snapshot.Status.State update and r.snapshots.Update(...) flow
(including store.IgnoreErrNotFound(err)) when the parent is missing and
snapshot.Status.State == providerapi.SnapshotStateReady so the reconcile will
mark it failed and trigger creation.


parentName, snapName, err := getSnapshotSourceDetails(snapshot)
if err != nil {
return false, fmt.Errorf("failed to get snapshot source details: %w", err)
Expand Down
80 changes: 53 additions & 27 deletions internal/controllers/snapshot_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,21 +249,35 @@ func (r *SnapshotReconciler) reconcileSnapshot(ctx context.Context, id string) e
return nil
}

// SnapshotStatePopulated is no longer actively used. It has been replaced by SnapshotStateReady.
// This block will transition any snapshots that are in SnapshotStatePopulated to SnapshotStateReady.
if snapshot.Status.State == providerapi.SnapshotStatePopulated {
log.V(1).Info("Snapshot already populated")
snapshot.Status.State = providerapi.SnapshotStateReady
if _, err = r.store.Update(ctx, snapshot); err != nil {
return fmt.Errorf("failed to update snapshot: %w", err)
imageExists := false
if snapshot.Source.IronCoreImage != "" {
log.V(2).Info("Check if snapshot image already exist")
imageExists, err = isRbdImageExisting(ioCtx, SnapshotIDToRBDID(snapshot.ID))
if err != nil {
return fmt.Errorf("failed to check if snapshot exists: %w", err)
}
log.V(1).Info("Checked snapshot rbd image existence", "imageExists", imageExists)
} else if snapshot.Source.VolumeImageID != "" {
imageExists = true
}

if imageExists {
// SnapshotStatePopulated is no longer actively used. It has been replaced by SnapshotStateReady.
// This block will transition any snapshots that are in SnapshotStatePopulated to SnapshotStateReady.
if snapshot.Status.State == providerapi.SnapshotStatePopulated {
log.V(1).Info("Snapshot already populated")
snapshot.Status.State = providerapi.SnapshotStateReady
if _, err = r.store.Update(ctx, snapshot); err != nil {
return fmt.Errorf("failed to update snapshot: %w", err)
}

return nil
}
return nil
}

if snapshot.Status.State == providerapi.SnapshotStateReady {
log.V(1).Info("Snapshot is ready")
return nil
if snapshot.Status.State == providerapi.SnapshotStateReady {
log.V(1).Info("Snapshot is ready")
return nil
}
}

if !slices.Contains(snapshot.Finalizers, SnapshotFinalizer) {
Expand Down Expand Up @@ -325,20 +339,24 @@ func (r *SnapshotReconciler) reconcileIroncoreImageSnapshot(ctx context.Context,
}
log.V(2).Info("Configured pool", "pool", r.pool)

imageName := SnapshotIDToRBDID(snapshot.ID)
rbdImageID := SnapshotIDToRBDID(snapshot.ID)
roundedSize := round.OffBytes(snapshotSize)
if err = librbd.CreateImage(ioCtx, imageName, roundedSize, options); err != nil {
return fmt.Errorf("failed to create os rbd image: %w", err)
}
log.V(2).Info("Created rbd image", "bytes", roundedSize)

if err := r.prepareSnapshotContent(log, ioCtx, imageName, rc); err != nil {
return fmt.Errorf("failed to prepare snapshot content: %w", err)
rbdImg, err := openImage(ioCtx, rbdImageID)
if errors.Is(err, librbd.ErrNotFound) {
if err = librbd.CreateImage(ioCtx, rbdImageID, roundedSize, options); err != nil {
return fmt.Errorf("failed to create os rbd image: %w", err)
}
log.V(2).Info("Created rbd image", "bytes", roundedSize)
rbdImg, err = openImage(ioCtx, rbdImageID)
}
if err != nil {
return err
}
defer closeImage(log, rbdImg)

log.V(2).Info("Create ironcore image snapshot", "ImageID", imageName)
if err := createSnapshot(log, ioCtx, ImageSnapshotVersion, imageName); err != nil {
return fmt.Errorf("failed to create ironcore image snapshot: %w", err)
if err := r.prepareSnapshotContent(log, ioCtx, rbdImg, rc); err != nil {
return fmt.Errorf("failed to prepare snapshot content: %w", err)
}

snapshot.Status.Digest = digest
Expand All @@ -355,6 +373,10 @@ func (r *SnapshotReconciler) reconcileVolumeImageSnapshot(ctx context.Context, l
return nil
}

if isSnapshotExist, err := snapshotExists(log, ioCtx, img.GetID(), snapshot.ID); err == nil && isSnapshotExist {
return nil
}
Comment on lines +376 to +378
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Use the RBD image name (img_<id>) and fail fast on existence-check errors.

At Line 376, snapshotExists gets img.GetID() (store ID), but this helper expects the real RBD image name. Combined with ignored errors, this can miss existing snapshots and fall through to createSnapshot, which then fails on already-existing snapshots.

💡 Proposed fix
-	if isSnapshotExist, err := snapshotExists(log, ioCtx, img.GetID(), snapshot.ID); err == nil && isSnapshotExist {
-		return nil
-	}
+	if isSnapshotExist, err := snapshotExists(log, ioCtx, ImageIDToRBDID(img.ID), snapshot.ID); err != nil {
+		return fmt.Errorf("failed to check volume snapshot existence: %w", err)
+	} else if isSnapshotExist {
+		return nil
+	}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controllers/snapshot_controller.go` around lines 376 - 378, The
existence check is using img.GetID() (store ID) instead of the real RBD image
name and silently ignores errors; change the call to snapshotExists to pass the
actual RBD image name (the "img_<id>" name used for RBD operations) instead of
img.GetID(), and if snapshotExists returns an error, return that error
immediately (do not treat it as non-existence) so createSnapshot is only
attempted when existence check truly succeeds; update the call site where
snapshotExists(log, ioCtx, img.GetID(), snapshot.ID) is invoked to use the RBD
image name and to propagate any error from snapshotExists up instead of ignoring
it (refer to snapshotExists, img.GetID()/RBD image name, snapshot.ID, and
createSnapshot).


log.V(2).Info("Create volume image snapshot", "ImageID", img.ID)
if err := createSnapshot(log, ioCtx, snapshot.ID, ImageIDToRBDID(img.ID)); err != nil {
return fmt.Errorf("failed to create volume image snapshot: %w", err)
Expand Down Expand Up @@ -393,18 +415,22 @@ func (r *SnapshotReconciler) openIroncoreImageSource(ctx context.Context, imageR
return content, uint64(rootFS.Descriptor().Size), img.Descriptor().Digest.String(), nil
}

func (r *SnapshotReconciler) prepareSnapshotContent(log logr.Logger, ioCtx *rados.IOContext, imageName string, rc io.ReadCloser) error {
rbdImg, err := openImage(ioCtx, imageName)
if err != nil {
return err
func (r *SnapshotReconciler) prepareSnapshotContent(log logr.Logger, ioCtx *rados.IOContext, rbdImg *librbd.Image, rc io.ReadCloser) error {
rbdImageID := rbdImg.GetName()
if isSnapshotExist, err := snapshotExists(log, ioCtx, rbdImageID, ImageSnapshotVersion); err == nil && isSnapshotExist {
return nil
}
Comment on lines +420 to 422
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Do not continue when snapshotExists fails.

At Line 420, a non-nil error is ignored, and reconciliation proceeds to populate/create snapshot. If this check fails transiently, you can perform unnecessary writes and hit avoidable failures.

💡 Proposed fix
-	if isSnapshotExist, err := snapshotExists(log, ioCtx, rbdImageID, ImageSnapshotVersion); err == nil && isSnapshotExist {
-		return nil
-	}
+	if isSnapshotExist, err := snapshotExists(log, ioCtx, rbdImageID, ImageSnapshotVersion); err != nil {
+		return fmt.Errorf("failed to check ironcore snapshot existence: %w", err)
+	} else if isSnapshotExist {
+		return nil
+	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if isSnapshotExist, err := snapshotExists(log, ioCtx, rbdImageID, ImageSnapshotVersion); err == nil && isSnapshotExist {
return nil
}
if isSnapshotExist, err := snapshotExists(log, ioCtx, rbdImageID, ImageSnapshotVersion); err != nil {
return fmt.Errorf("failed to check ironcore snapshot existence: %w", err)
} else if isSnapshotExist {
return nil
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controllers/snapshot_controller.go` around lines 420 - 422, The
current code ignores errors from snapshotExists, which can lead to dangerous
reconciliation when the check fails; update the conditional around
snapshotExists(log, ioCtx, rbdImageID, ImageSnapshotVersion) to explicitly
handle a non-nil err (e.g., log the error and return it or wrap and return)
instead of falling through — keep the early-return when isSnapshotExist is true,
but if err != nil return that error (or a wrapped error) so reconciliation stops
on transient failures.

defer closeImage(log, rbdImg)

if err := r.populateImage(log, rbdImg, rc); err != nil {
return fmt.Errorf("failed to populate os image: %w", err)
}
log.V(2).Info("Populated os image on rbd image")

log.V(2).Info("Create ironcore image snapshot", "ImageID", rbdImageID)
if err := createSnapshot(log, ioCtx, ImageSnapshotVersion, rbdImageID); err != nil {
return fmt.Errorf("failed to create ironcore image snapshot: %w", err)
}

return nil
}

Expand Down
Loading