Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 228 additions & 0 deletions controllers/imagecollector/first_reconcile_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
package imagecollector

import (
"context"
"sync"
"testing"

eraserv1 "github.com/eraser-dev/eraser/api/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

func TestFirstReconcileMutexBasic(t *testing.T) {
// Test that the mutex prevents concurrent execution
var executionCount int
var mu sync.Mutex

// First acquisition should succeed
if !mu.TryLock() {
t.Error("First TryLock should succeed")
}

// Simulate work
executionCount++

// Second acquisition should fail while first is held
if mu.TryLock() {
t.Error("Second TryLock should fail while first is held")
}

// Release first lock
mu.Unlock()

// Now should be able to acquire again
if !mu.TryLock() {
t.Error("TryLock should succeed after release")
}

mu.Unlock()

if executionCount != 1 {
t.Errorf("Expected executionCount 1, got %d", executionCount)
}
}

func TestFirstReconcileDoneFlag(t *testing.T) {
// Test that the done flag prevents subsequent executions
firstReconcileDone := false

// First call should set it to true
if firstReconcileDone {
t.Error("firstReconcileDone should be false initially")
}

// Simulate first reconcile completion
firstReconcileDone = true

// Subsequent calls should check and skip
if !firstReconcileDone {
t.Error("firstReconcileDone should be true after first reconcile")
}
}

func TestReconcileWithRunningJob(t *testing.T) {
// Create a scheme and fake client
scheme := runtime.NewScheme()
if err := eraserv1.AddToScheme(scheme); err != nil {
t.Fatalf("Failed to add scheme: %v", err)
}

// Create a running job
runningJob := &eraserv1.ImageJob{
ObjectMeta: metav1.ObjectMeta{
Name: "test-job-1",
Namespace: "eraser-system",
},
Status: eraserv1.ImageJobStatus{
Phase: eraserv1.PhaseRunning,
},
}

// Create a fake client with the running job
client := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(runningJob).
Build()

// Test that the reconcile logic would find the running job
jobList := &eraserv1.ImageJobList{}
if err := client.List(context.Background(), jobList); err != nil {
t.Fatalf("Failed to list jobs: %v", err)
}

if len(jobList.Items) != 1 {
t.Fatalf("Expected 1 job, got %d", len(jobList.Items))
}

if jobList.Items[0].Status.Phase != eraserv1.PhaseRunning {
t.Errorf("Expected running job, got %s", jobList.Items[0].Status.Phase)
}
}

func TestReconcileWithNoJobs(t *testing.T) {
// Create a scheme and fake client with no jobs
scheme := runtime.NewScheme()
if err := eraserv1.AddToScheme(scheme); err != nil {
t.Fatalf("Failed to add scheme: %v", err)
}

client := fake.NewClientBuilder().
WithScheme(scheme).
Build()

// Test that the reconcile logic would find no jobs
jobList := &eraserv1.ImageJobList{}
if err := client.List(context.Background(), jobList); err != nil {
t.Fatalf("Failed to list jobs: %v", err)
}

if len(jobList.Items) != 0 {
t.Errorf("Expected 0 jobs, got %d", len(jobList.Items))
}
}

func TestReconcileWithMultipleNonRunningJobs(t *testing.T) {
// Create a scheme and fake client
scheme := runtime.NewScheme()
if err := eraserv1.AddToScheme(scheme); err != nil {
t.Fatalf("Failed to add scheme: %v", err)
}

// Create multiple completed/failed jobs
completedJob := &eraserv1.ImageJob{
ObjectMeta: metav1.ObjectMeta{
Name: "test-job-completed",
Namespace: "eraser-system",
},
Status: eraserv1.ImageJobStatus{
Phase: eraserv1.PhaseCompleted,
},
}

failedJob := &eraserv1.ImageJob{
ObjectMeta: metav1.ObjectMeta{
Name: "test-job-failed",
Namespace: "eraser-system",
},
Status: eraserv1.ImageJobStatus{
Phase: eraserv1.PhaseFailed,
},
}

// Create a fake client with multiple non-running jobs
client := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(completedJob, failedJob).
Build()

// Test that the reconcile logic would find multiple non-running jobs
jobList := &eraserv1.ImageJobList{}
if err := client.List(context.Background(), jobList); err != nil {
t.Fatalf("Failed to list jobs: %v", err)
}

if len(jobList.Items) != 2 {
t.Fatalf("Expected 2 jobs, got %d", len(jobList.Items))
}

// All should be non-running
for _, job := range jobList.Items {
if job.Status.Phase == eraserv1.PhaseRunning {
t.Errorf("Expected non-running job, got running: %s", job.Name)
}
}
}

func TestNamespacedNameForFirstReconcile(t *testing.T) {
// Test the NamespacedName used for first-reconcile
req := types.NamespacedName{
Name: "first-reconcile",
Namespace: "",
}

if req.Name != "first-reconcile" {
t.Errorf("Expected name 'first-reconcile', got '%s'", req.Name)
}
}

func TestMutexPreventsConcurrentFirstReconcile(t *testing.T) {
// Test that the mutex prevents concurrent execution in a realistic scenario
var (
firstReconcileMutex sync.Mutex
firstReconcileDone bool
executionCount int
)

// Simulate concurrent goroutines trying to run first-reconcile
runFirstReconcile := func() {
// Try to acquire lock
if !firstReconcileMutex.TryLock() {
return // Already running
}
defer firstReconcileMutex.Unlock()

// Check if already done
if firstReconcileDone {
return
}

// Do work
executionCount++
firstReconcileDone = true
}

// Start multiple concurrent calls
for i := 0; i < 10; i++ {
go runFirstReconcile()
}

// Small wait to let goroutines complete
// In real usage, the controller-runtime handles synchronization

if executionCount > 1 {
t.Errorf("Expected at most 1 execution, got %d", executionCount)
}
}
92 changes: 78 additions & 14 deletions controllers/imagecollector/imagecollector_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,18 @@ package imagecollector

import (
"context"
"errors"
stdErrors "errors"
"fmt"
"os"
"os/signal"
"path/filepath"
"strconv"
"sync"
"syscall"
"time"

"go.opentelemetry.io/otel"
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -38,7 +40,6 @@ import (

"github.com/eraser-dev/eraser/api/unversioned/config"
eraserv1 "github.com/eraser-dev/eraser/api/v1"
eraserv1alpha1 "github.com/eraser-dev/eraser/api/v1alpha1"
"github.com/eraser-dev/eraser/controllers/util"

"sigs.k8s.io/controller-runtime/pkg/controller"
Expand All @@ -62,12 +63,14 @@ const (
)

var (
log = logf.Log.WithName("controller").WithValues("process", "imagecollector-controller")
startTime time.Time
ownerLabel labels.Selector
exporter sdkmetric.Exporter
reader sdkmetric.Reader
provider *sdkmetric.MeterProvider
log = logf.Log.WithName("controller").WithValues("process", "imagecollector-controller")
startTime time.Time
ownerLabel labels.Selector
exporter sdkmetric.Exporter
reader sdkmetric.Reader
provider *sdkmetric.MeterProvider
firstReconcileMutex sync.Mutex
firstReconcileDone bool
)

func init() {
Expand Down Expand Up @@ -218,18 +221,77 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
defer log.Info("done reconcile")

imageJobList := &eraserv1.ImageJobList{}
if err := r.List(ctx, imageJobList); err != nil {
listOpts := []client.ListOption{
client.MatchingLabelsSelector{Selector: ownerLabel},
}
if err := r.List(ctx, imageJobList, listOpts...); err != nil {
log.Info("could not list imagejobs")
return ctrl.Result{}, err
}

if req.Name == "first-reconcile" {
// Prevent concurrent first-reconcile executions
if !firstReconcileMutex.TryLock() {
log.Info("first-reconcile already in progress, skipping")
return ctrl.Result{}, nil
}
defer firstReconcileMutex.Unlock()

// Mark as done to prevent subsequent triggers
if firstReconcileDone {
log.Info("first-reconcile already completed, skipping")
return ctrl.Result{}, nil
}

// Check for existing running jobs before cleanup
runningJobFound := false
for idx := range imageJobList.Items {
if err := r.Delete(ctx, &imageJobList.Items[idx]); err != nil {
log.Info("error cleaning up previous imagejobs")
return ctrl.Result{}, err
job := &imageJobList.Items[idx]
if job.Status.Phase == eraserv1.PhaseRunning {
log.Info("found already running collector job, adopting", "job", job.Name)
runningJobFound = true
break
}
}

if runningJobFound {
firstReconcileDone = true
return ctrl.Result{}, nil
}

// Clean up any existing non-running jobs
for idx := range imageJobList.Items {
job := &imageJobList.Items[idx]
if job.Status.Phase != eraserv1.PhaseRunning {
log.Info("cleaning up existing imagejob during startup", "job", job.Name, "phase", job.Status.Phase)
if err := r.Delete(ctx, job); err != nil {
if !k8sErrors.IsNotFound(err) {
log.Error(err, "error cleaning up previous imagejob", "job", job.Name)
return ctrl.Result{}, err
}
}
}
}

// Only create a new job if we don't have any running jobs
// Re-list to ensure we have the latest state after cleanup
updatedJobList := &eraserv1.ImageJobList{}
if err := r.List(ctx, updatedJobList, listOpts...); err != nil {
log.Error(err, "could not list imagejobs after cleanup")
return ctrl.Result{}, err
}

for _, job := range updatedJobList.Items {
if job.Status.Phase == eraserv1.PhaseRunning {
log.Info("found running job after cleanup, adopting", "job", job.Name)
firstReconcileDone = true
return ctrl.Result{}, nil
}
}

// No running jobs found, create a new one
log.Info("no running collector jobs found, creating new imagejob")
firstReconcileDone = true
return r.createImageJob(ctx)
}

Expand Down Expand Up @@ -397,7 +459,7 @@ func (r *Reconciler) createImageJob(ctx context.Context) (ctrl.Result, error) {
},
}

job := &eraserv1alpha1.ImageJob{
job := &eraserv1.ImageJob{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "imagejob-",
Labels: map[string]string{
Expand Down Expand Up @@ -544,6 +606,8 @@ func (r *Reconciler) handleCompletedImageJob(ctx context.Context, childJob *eras
errDelay := time.Duration(cleanupCfg.DelayOnFailure)

switch phase := childJob.Status.Phase; phase {
case eraserv1.PhaseRunning:
return ctrl.Result{}, nil
case eraserv1.PhaseCompleted:
log.Info("completed phase")
if childJob.Status.DeleteAfter == nil {
Expand Down Expand Up @@ -589,7 +653,7 @@ func (r *Reconciler) handleCompletedImageJob(ctx context.Context, childJob *eras
return res, err
}
default:
err = errors.New("should not reach this point for imagejob")
err = stdErrors.New("should not reach this point for imagejob")
log.Error(err, "imagejob not in completed or failed phase", "imagejob", childJob)
}

Expand Down
Loading
Loading