From 39945ca07dbb2954b8011d66dfa91f24a177bf9c Mon Sep 17 00:00:00 2001 From: AbdelrhmanHamouda Date: Fri, 13 Feb 2026 23:17:38 +0100 Subject: [PATCH 1/4] feat(helm): add Artifact Hub metadata and comprehensive chart documentation Add Artifact Hub repository metadata file with verified publisher configuration, enhance Chart.yaml with SEO-optimized description, keywords, annotations, and documentation links, create comprehensive chart README with installation examples and migration guidance, and update release workflow to copy artifacthub-repo.yml to published site. --- .github/workflows/release.yaml | 4 ++ artifacthub-repo.yml | 7 ++ charts/locust-k8s-operator/Chart.yaml | 32 ++++++++- charts/locust-k8s-operator/README.md | 98 +++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 artifacthub-repo.yml create mode 100644 charts/locust-k8s-operator/README.md diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 00a625a..94984a6 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -115,6 +115,10 @@ jobs: run: | python -m mkdocs build + - name: 📝 Copy Artifact Hub metadata + run: | + cp artifacthub-repo.yml ./site/ + - name: 🚀 Deploy documentation uses: peaceiris/actions-gh-pages@v4 with: diff --git a/artifacthub-repo.yml b/artifacthub-repo.yml new file mode 100644 index 0000000..cb15f05 --- /dev/null +++ b/artifacthub-repo.yml @@ -0,0 +1,7 @@ +# Artifact Hub repository metadata file +# This file enables Verified Publisher status and ownership claim + +repositoryID: 8e1a59f6-36d5-4e2a-805e-aeb1c38784c0 +owners: + - name: Abdelrhman Hamouda + email: abdelrhman.ahmed@outlook.com diff --git a/charts/locust-k8s-operator/Chart.yaml b/charts/locust-k8s-operator/Chart.yaml index e3a3ac2..f98f292 100644 --- a/charts/locust-k8s-operator/Chart.yaml +++ b/charts/locust-k8s-operator/Chart.yaml @@ -1,8 +1,19 @@ apiVersion: v2 name: locust-k8s-operator -description: Locust Kubernetes Operator - Go v2 +description: Production-ready Kubernetes operator for distributed Locust load testing with native OpenTelemetry, HA support, and automatic v1→v2 migration icon: https://raw.githubusercontent.com/AbdelrhmanHamouda/locust-k8s-operator/master/docs/assets/images/favicon.png +# Keywords for Artifact Hub search discoverability +keywords: + - locust + - load-testing + - performance-testing + - kubernetes-operator + - distributed-testing + - opentelemetry + - observability + - stress-testing + # A chart can be either an 'application' or a 'library' chart. # # Application charts are a collection of templates that can be packaged into versioned archives @@ -29,10 +40,27 @@ home: https://github.com/AbdelrhmanHamouda/locust-k8s-operator sources: - https://github.com/AbdelrhmanHamouda/locust-k8s-operator maintainers: - - name: AbdelrhmanHamouda + - name: Abdelrhman Hamouda url: https://github.com/AbdelrhmanHamouda annotations: + # Artifact Hub specific metadata + artifacthub.io/license: Apache-2.0 + artifacthub.io/operator: "true" + artifacthub.io/operatorCapabilities: Basic Install + artifacthub.io/prerelease: "false" + artifacthub.io/containsSecurityUpdates: "false" + artifacthub.io/links: | + - name: Documentation + url: https://abdelrhmanhamouda.github.io/locust-k8s-operator/ + - name: Getting Started + url: https://abdelrhmanhamouda.github.io/locust-k8s-operator/getting_started/ + - name: Migration Guide (v1→v2) + url: https://abdelrhmanhamouda.github.io/locust-k8s-operator/migration/ + - name: GitHub Discussions + url: https://github.com/AbdelrhmanHamouda/locust-k8s-operator/discussions + - name: GitHub Issues + url: https://github.com/AbdelrhmanHamouda/locust-k8s-operator/issues artifacthub.io/changes: | - kind: changed description: Rewritten for Go operator with clean-slate design diff --git a/charts/locust-k8s-operator/README.md b/charts/locust-k8s-operator/README.md new file mode 100644 index 0000000..bf5495f --- /dev/null +++ b/charts/locust-k8s-operator/README.md @@ -0,0 +1,98 @@ +# Locust Kubernetes Operator + +[![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/locust-k8s-operator)](https://artifacthub.io/packages/helm/locust-k8s-operator/locust-k8s-operator) + +Production-ready Kubernetes operator for distributed Locust load testing with native OpenTelemetry support, high availability, and automatic v1→v2 migration. + +## ✨ Features + +- **🚀 Complete Go Rewrite**: 4x memory reduction, 60x faster startup +- **📊 Native OpenTelemetry**: Built-in metrics, traces, and optional collector deployment +- **🔒 High Availability**: Leader election support for multi-replica deployments +- **🔄 Zero-Downtime Migration**: Automatic conversion webhooks for v1→v2 upgrades +- **💚 Pod Health Monitoring**: Automatic pod recovery and health checks +- **🛡️ Production-Ready**: Comprehensive RBAC, security contexts, and resource management + +## 📦 Installation + +### Prerequisites + +- Kubernetes 1.25+ +- Helm 3.x + +### Quick Start + +```bash +# Add the Helm repository +helm repo add locust-k8s-operator https://abdelrhmanhamouda.github.io/locust-k8s-operator + +# Update repository +helm repo update + +# Install the operator +helm install my-locust-operator locust-k8s-operator/locust-k8s-operator + +# Install with OpenTelemetry collector +helm install my-locust-operator locust-k8s-operator/locust-k8s-operator \ + --set otelCollector.enabled=true +``` + +## ⚙️ Configuration + +### Key Values + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `replicas` | Number of operator replicas | `1` | +| `resources.limits.memory` | Memory limit | `128Mi` | +| `resources.limits.cpu` | CPU limit | `500m` | +| `webhook.enabled` | Enable conversion webhooks | `false` | +| `webhook.certManager.enabled` | Use cert-manager for webhook certs | `false` | +| `otelCollector.enabled` | Deploy OTel collector sidecar | `false` | +| `leaderElection.enabled` | Enable leader election for HA | `true` | + +### Example: Production Deployment with HA + +```bash +helm install locust-operator locust-k8s-operator/locust-k8s-operator \ + --set replicas=3 \ + --set leaderElection.enabled=true \ + --set webhook.enabled=true \ + --set webhook.certManager.enabled=true \ + --set otelCollector.enabled=true \ + --set resources.limits.memory=256Mi +``` + +## 🔄 Migrating from v1.x + +The operator includes automatic conversion webhooks for zero-downtime migration: + +```bash +# Install v2 with webhooks enabled +helm upgrade locust-operator locust-k8s-operator/locust-k8s-operator \ + --set webhook.enabled=true \ + --set webhook.certManager.enabled=true + +# Your existing v1 LocustTest resources will be automatically converted +``` + +For detailed migration guide, see: https://abdelrhmanhamouda.github.io/locust-k8s-operator/migration/ + +## 📚 Documentation + +- **Full Documentation**: https://abdelrhmanhamouda.github.io/locust-k8s-operator/ +- **Getting Started**: https://abdelrhmanhamouda.github.io/locust-k8s-operator/getting_started/ +- **Migration Guide**: https://abdelrhmanhamouda.github.io/locust-k8s-operator/migration/ +- **API Reference**: https://abdelrhmanhamouda.github.io/locust-k8s-operator/api_reference/ + +## 🐛 Support + +- **GitHub Issues**: https://github.com/AbdelrhmanHamouda/locust-k8s-operator/issues + +## 📝 License + +Apache-2.0 + +## 🙏 Contributing + +Contributions are welcome! Please see our [Contributing Guide](https://github.com/AbdelrhmanHamouda/locust-k8s-operator/blob/master/CONTRIBUTING.md). From e13081cfab6dc95a2f37db44df216e7bf3fcb25b Mon Sep 17 00:00:00 2001 From: AbdelrhmanHamouda Date: Fri, 13 Feb 2026 23:18:32 +0100 Subject: [PATCH 2/4] refactor(controller): extract external deletion handling and resource validation into helper functions Extract repeated external resource deletion logic into handleExternalResourceDeletion helper, consolidate Service and Job existence checks into checkResourcesExist function, add shouldSkipStatusUpdate helper for terminal state detection, and enhance buildResourceList with defensive logging for invalid quantities that bypass startup validation. --- internal/controller/locusttest_controller.go | 178 +++++++++---------- internal/resources/job.go | 19 +- 2 files changed, 98 insertions(+), 99 deletions(-) diff --git a/internal/controller/locusttest_controller.go b/internal/controller/locusttest_controller.go index 6751e85..040b03c 100644 --- a/internal/controller/locusttest_controller.go +++ b/internal/controller/locusttest_controller.go @@ -258,21 +258,25 @@ func (r *LocustTestReconciler) createResource(ctx context.Context, lt *locustv2. return nil } -// reconcileStatus updates the LocustTest status based on owned Job states. -// Called when resources already exist and we need to track Job completion. -func (r *LocustTestReconciler) reconcileStatus(ctx context.Context, lt *locustv2.LocustTest) (ctrl.Result, error) { +// handleExternalResourceDeletion handles the case where a resource was externally deleted. +// It transitions the LocustTest to Pending phase to trigger recreation on the next reconcile. +// Returns (shouldRequeue, requeueAfter, error). +func (r *LocustTestReconciler) handleExternalResourceDeletion( + ctx context.Context, + lt *locustv2.LocustTest, + resourceName, resourceKind string, + obj client.Object, +) (bool, time.Duration, error) { log := logf.FromContext(ctx) - // Check for externally deleted Service - masterServiceName := lt.Name + "-master" - masterService := &corev1.Service{} - if err := r.Get(ctx, client.ObjectKey{Name: masterServiceName, Namespace: lt.Namespace}, masterService); err != nil { + // Try to fetch the resource + if err := r.Get(ctx, client.ObjectKey{Name: resourceName, Namespace: lt.Namespace}, obj); err != nil { if apierrors.IsNotFound(err) { - // Service was externally deleted — transition to Pending for recovery - log.Info("Master Service externally deleted, transitioning to Pending for recovery", - "service", masterServiceName) + // Resource was externally deleted — transition to Pending for recovery + log.Info(fmt.Sprintf("%s externally deleted, transitioning to Pending for recovery", resourceKind), + resourceKind, resourceName) r.Recorder.Event(lt, corev1.EventTypeWarning, "ResourceDeleted", - fmt.Sprintf("Master Service %s was deleted externally, will attempt recreation", masterServiceName)) + fmt.Sprintf("%s %s was deleted externally, will attempt recreation", resourceKind, resourceName)) // Reset to Pending to trigger resource recreation on next reconcile log.Info("Attempting to update status to Pending after external deletion", @@ -292,104 +296,90 @@ func (r *LocustTestReconciler) reconcileStatus(ctx context.Context, lt *locustv2 r.setReady(lt, false, locustv2.ReasonResourcesCreating, "Recreating externally deleted resources") return r.Status().Update(ctx, lt) }); err != nil { - log.Error(err, "Failed to update status after detecting Service deletion", - "service", masterServiceName, + log.Error(err, fmt.Sprintf("Failed to update status after detecting %s deletion", resourceKind), + resourceKind, resourceName, "retryAttempts", "exhausted") // Still requeue to retry the entire reconciliation - return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + return true, 5 * time.Second, nil } - log.Info("Successfully updated status to Pending, will recreate Service", - "service", masterServiceName) - return ctrl.Result{RequeueAfter: time.Second}, nil + log.Info(fmt.Sprintf("Successfully updated status to Pending, will recreate %s", resourceKind), + resourceKind, resourceName) + return true, time.Second, nil } - return ctrl.Result{}, fmt.Errorf("failed to get master Service: %w", err) + return false, 0, fmt.Errorf("failed to get %s: %w", resourceKind, err) } - // Don't update if already in terminal state (unless resources are missing — handled above) - if lt.Status.Phase == locustv2.PhaseSucceeded || lt.Status.Phase == locustv2.PhaseFailed { - return ctrl.Result{}, nil + // Resource exists, no action needed + return false, 0, nil +} + +// checkResourcesExist verifies that all required resources (Service, Master Job, Worker Job) exist. +// If any resource is missing, it handles external deletion recovery. +// Returns (masterJob, workerJob, shouldRequeue, requeueAfter, error). +func (r *LocustTestReconciler) checkResourcesExist( + ctx context.Context, + lt *locustv2.LocustTest, +) (*batchv1.Job, *batchv1.Job, bool, time.Duration, error) { + // Check for externally deleted Service + masterServiceName := lt.Name + "-master" + masterService := &corev1.Service{} + if shouldRequeue, requeueAfter, err := r.handleExternalResourceDeletion( + ctx, lt, masterServiceName, "Master Service", masterService, + ); err != nil { + return nil, nil, false, 0, err + } else if shouldRequeue { + return nil, nil, true, requeueAfter, nil } - // Fetch master Job to determine status + // Check for externally deleted master Job masterJob := &batchv1.Job{} masterJobName := lt.Name + "-master" - if err := r.Get(ctx, client.ObjectKey{Name: masterJobName, Namespace: lt.Namespace}, masterJob); err != nil { - if apierrors.IsNotFound(err) { - // Job was externally deleted — same recovery pattern - log.Info("Master Job externally deleted, transitioning to Pending for recovery", - "job", masterJobName) - r.Recorder.Event(lt, corev1.EventTypeWarning, "ResourceDeleted", - fmt.Sprintf("Master Job %s was deleted externally, will attempt recreation", masterJobName)) - - log.Info("Attempting to update status to Pending after external deletion", - "currentPhase", lt.Status.Phase, - "generation", lt.Generation, - "observedGeneration", lt.Status.ObservedGeneration) - if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - if err := r.Get(ctx, client.ObjectKeyFromObject(lt), lt); err != nil { - log.Error(err, "Failed to re-fetch LocustTest during status update retry") - return err - } - log.V(1).Info("Re-fetched LocustTest for status update", - "resourceVersion", lt.ResourceVersion, - "phase", lt.Status.Phase) - lt.Status.Phase = locustv2.PhasePending - lt.Status.ObservedGeneration = lt.Generation - r.setReady(lt, false, locustv2.ReasonResourcesCreating, "Recreating externally deleted resources") - return r.Status().Update(ctx, lt) - }); err != nil { - log.Error(err, "Failed to update status after detecting master Job deletion", - "job", masterJobName, - "retryAttempts", "exhausted") - // Still requeue to retry the entire reconciliation - return ctrl.Result{RequeueAfter: 5 * time.Second}, nil - } - log.Info("Successfully updated status to Pending, will recreate master Job", - "job", masterJobName) - return ctrl.Result{RequeueAfter: time.Second}, nil - } - return ctrl.Result{}, fmt.Errorf("failed to get master Job: %w", err) + if shouldRequeue, requeueAfter, err := r.handleExternalResourceDeletion( + ctx, lt, masterJobName, "Master Job", masterJob, + ); err != nil { + return nil, nil, false, 0, err + } else if shouldRequeue { + return nil, nil, true, requeueAfter, nil } - // Fetch worker Job for worker count + // Check for externally deleted worker Job workerJob := &batchv1.Job{} workerJobName := lt.Name + "-worker" - if err := r.Get(ctx, client.ObjectKey{Name: workerJobName, Namespace: lt.Namespace}, workerJob); err != nil { - if apierrors.IsNotFound(err) { - // Worker Job externally deleted — recovery - log.Info("Worker Job externally deleted, transitioning to Pending for recovery", - "job", workerJobName) - r.Recorder.Event(lt, corev1.EventTypeWarning, "ResourceDeleted", - fmt.Sprintf("Worker Job %s was deleted externally, will attempt recreation", workerJobName)) + if shouldRequeue, requeueAfter, err := r.handleExternalResourceDeletion( + ctx, lt, workerJobName, "Worker Job", workerJob, + ); err != nil { + return nil, nil, false, 0, err + } else if shouldRequeue { + return nil, nil, true, requeueAfter, nil + } - log.Info("Attempting to update status to Pending after external deletion", - "currentPhase", lt.Status.Phase, - "generation", lt.Generation, - "observedGeneration", lt.Status.ObservedGeneration) - if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - if err := r.Get(ctx, client.ObjectKeyFromObject(lt), lt); err != nil { - log.Error(err, "Failed to re-fetch LocustTest during status update retry") - return err - } - log.V(1).Info("Re-fetched LocustTest for status update", - "resourceVersion", lt.ResourceVersion, - "phase", lt.Status.Phase) - lt.Status.Phase = locustv2.PhasePending - lt.Status.ObservedGeneration = lt.Generation - r.setReady(lt, false, locustv2.ReasonResourcesCreating, "Recreating externally deleted resources") - return r.Status().Update(ctx, lt) - }); err != nil { - log.Error(err, "Failed to update status after detecting worker Job deletion", - "job", workerJobName, - "retryAttempts", "exhausted") - // Still requeue to retry the entire reconciliation - return ctrl.Result{RequeueAfter: 5 * time.Second}, nil - } - log.Info("Successfully updated status to Pending, will recreate worker Job", - "job", workerJobName) - return ctrl.Result{RequeueAfter: time.Second}, nil - } - return ctrl.Result{}, fmt.Errorf("failed to get worker Job: %w", err) + // All resources exist + return masterJob, workerJob, false, 0, nil +} + +// shouldSkipStatusUpdate checks if the LocustTest is in a terminal state where status updates should be skipped. +// Returns true if Phase is Succeeded or Failed (terminal states). +func shouldSkipStatusUpdate(lt *locustv2.LocustTest) bool { + return lt.Status.Phase == locustv2.PhaseSucceeded || lt.Status.Phase == locustv2.PhaseFailed +} + +// reconcileStatus updates the LocustTest status based on owned Job states. +// Called when resources already exist and we need to track Job completion. +func (r *LocustTestReconciler) reconcileStatus(ctx context.Context, lt *locustv2.LocustTest) (ctrl.Result, error) { + log := logf.FromContext(ctx) + + // Check that all resources exist, handle external deletion if needed + masterJob, workerJob, shouldRequeue, requeueAfter, err := r.checkResourcesExist(ctx, lt) + if err != nil { + return ctrl.Result{}, err + } + if shouldRequeue { + return ctrl.Result{RequeueAfter: requeueAfter}, nil + } + + // Don't update if already in terminal state (unless resources are missing — handled above) + if shouldSkipStatusUpdate(lt) { + return ctrl.Result{}, nil } // Check pod health before updating status from Jobs diff --git a/internal/resources/job.go b/internal/resources/job.go index 1925d00..dc24320 100644 --- a/internal/resources/job.go +++ b/internal/resources/job.go @@ -18,6 +18,7 @@ package resources import ( "fmt" + "log" locustv2 "github.com/AbdelrhmanHamouda/locust-k8s-operator/api/v2" "github.com/AbdelrhmanHamouda/locust-k8s-operator/internal/config" @@ -385,21 +386,29 @@ func hasResourcesSpecified(r *corev1.ResourceRequirements) bool { // buildResourceList creates a ResourceList from CPU, memory, and ephemeral storage strings. // Empty strings are skipped (not added to the resource list). -// Safe parsing is used (errors ignored) because values are pre-validated at operator startup. +// Values are pre-validated at operator startup, but defensive logging is included to detect validation bypasses. func buildResourceList(cpu, memory, ephemeral string) corev1.ResourceList { resources := corev1.ResourceList{} if cpu != "" { - // Safe: Already validated at startup in LoadConfig - q, _ := resource.ParseQuantity(cpu) + q, err := resource.ParseQuantity(cpu) + if err != nil { + log.Printf("CRITICAL: Invalid CPU quantity %q passed startup validation: %v", cpu, err) + } resources[corev1.ResourceCPU] = q } if memory != "" { - q, _ := resource.ParseQuantity(memory) + q, err := resource.ParseQuantity(memory) + if err != nil { + log.Printf("CRITICAL: Invalid memory quantity %q passed startup validation: %v", memory, err) + } resources[corev1.ResourceMemory] = q } if ephemeral != "" { - q, _ := resource.ParseQuantity(ephemeral) + q, err := resource.ParseQuantity(ephemeral) + if err != nil { + log.Printf("CRITICAL: Invalid ephemeral storage quantity %q passed startup validation: %v", ephemeral, err) + } resources[corev1.ResourceEphemeralStorage] = q } From 108b9b6d6046779dda0a96ef8ecb99829825e961 Mon Sep 17 00:00:00 2001 From: AbdelrhmanHamouda Date: Fri, 13 Feb 2026 23:23:34 +0100 Subject: [PATCH 3/4] chore(codacy): exclude markdown files from code quality analysis --- .codacy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.codacy.yml b/.codacy.yml index cec35e4..d06261e 100644 --- a/.codacy.yml +++ b/.codacy.yml @@ -1,3 +1,4 @@ exclude_paths: - "**/zz_generated.deepcopy.go" - "test/utils/**" + - "**/*.md" From 099be12dc45a25c5330b2de883dc1be5b7b45faa Mon Sep 17 00:00:00 2001 From: AbdelrhmanHamouda Date: Fri, 13 Feb 2026 23:25:25 +0100 Subject: [PATCH 4/4] chore(helm): bump chart version to 2.0.1 --- charts/locust-k8s-operator/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/locust-k8s-operator/Chart.yaml b/charts/locust-k8s-operator/Chart.yaml index f98f292..b1c9a5e 100644 --- a/charts/locust-k8s-operator/Chart.yaml +++ b/charts/locust-k8s-operator/Chart.yaml @@ -27,7 +27,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 2.0.0 +version: 2.0.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to