diff --git a/ray-operator/controllers/ray/metrics/ray_cluster_metrics.go b/ray-operator/controllers/ray/metrics/ray_cluster_metrics.go index 4c9de270eac..78b837f6f44 100644 --- a/ray-operator/controllers/ray/metrics/ray_cluster_metrics.go +++ b/ray-operator/controllers/ray/metrics/ray_cluster_metrics.go @@ -89,6 +89,14 @@ func (r *RayClusterMetricsManager) ObserveRayClusterProvisionedDuration(name, na r.rayClusterProvisionedDurationSeconds.WithLabelValues(name, namespace).Set(duration) } +// DeleteRayClusterMetrics removes metrics that belongs to the specified RayCluster. +// NOTE: Uses Delete() as metric has only "name" and "namespace" labels. +// If more labels are added, switch to DeletePartialMatch(), otherwise it may not clean up all metrics correctly. +func (r *RayClusterMetricsManager) DeleteRayClusterMetrics(name, namespace string) { + numCleanedUpMetrics := r.rayClusterProvisionedDurationSeconds.Delete(prometheus.Labels{"name": name, "namespace": namespace}) + r.log.Info("Cleaned up expired RayCluster metric", "name", name, "namespace", namespace, "numCleanedUpMetrics", numCleanedUpMetrics) +} + func (r *RayClusterMetricsManager) collectRayClusterInfo(cluster *rayv1.RayCluster, ch chan<- prometheus.Metric) { ownerKind := "None" if v, ok := cluster.Labels[utils.RayOriginatedFromCRDLabelKey]; ok { diff --git a/ray-operator/controllers/ray/metrics/ray_cluster_metrics_test.go b/ray-operator/controllers/ray/metrics/ray_cluster_metrics_test.go index fa4eb2421fa..67b620728b8 100644 --- a/ray-operator/controllers/ray/metrics/ray_cluster_metrics_test.go +++ b/ray-operator/controllers/ray/metrics/ray_cluster_metrics_test.go @@ -3,11 +3,9 @@ package metrics import ( "context" "net/http" - "net/http/httptest" "testing" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -16,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/test/support" ) func TestRayClusterInfo(t *testing.T) { @@ -65,28 +64,21 @@ func TestRayClusterInfo(t *testing.T) { reg := prometheus.NewRegistry() reg.MustRegister(manager) - req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/metrics", nil) - require.NoError(t, err) - rr := httptest.NewRecorder() - handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) - handler.ServeHTTP(rr, req) + body, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr.Code) - body := rr.Body.String() + assert.Equal(t, http.StatusOK, statusCode) for _, label := range tc.expectedMetrics { assert.Contains(t, body, label) } if len(tc.clusters) > 0 { - err = client.Delete(t.Context(), &tc.clusters[0]) + err := client.Delete(t.Context(), &tc.clusters[0]) require.NoError(t, err) } - rr2 := httptest.NewRecorder() - handler.ServeHTTP(rr2, req) + body2, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr2.Code) - body2 := rr2.Body.String() + assert.Equal(t, http.StatusOK, statusCode) assert.NotContains(t, body2, tc.expectedMetrics[0]) for _, label := range tc.expectedMetrics[1:] { @@ -96,6 +88,65 @@ func TestRayClusterInfo(t *testing.T) { } } +func TestDeleteRayClusterMetrics(t *testing.T) { + k8sScheme := runtime.NewScheme() + require.NoError(t, rayv1.AddToScheme(k8sScheme)) + client := fake.NewClientBuilder().WithScheme(k8sScheme).Build() + manager := NewRayClusterMetricsManager(context.Background(), client) + reg := prometheus.NewRegistry() + reg.MustRegister(manager) + + // Test case 1: Delete specific cluster metrics + // Manually add some metrics + manager.rayClusterProvisionedDurationSeconds.With(prometheus.Labels{"name": "cluster1", "namespace": "ns1"}).Set(10.5) + manager.rayClusterProvisionedDurationSeconds.With(prometheus.Labels{"name": "cluster2", "namespace": "ns2"}).Set(20.3) + manager.rayClusterProvisionedDurationSeconds.With(prometheus.Labels{"name": "cluster3", "namespace": "ns1"}).Set(5.7) + + // Test deleting metrics for cluster1 in ns1 + manager.DeleteRayClusterMetrics("cluster1", "ns1") + + // Verify metrics + body, statusCode := support.GetMetricsResponseAndCode(t, reg) + + assert.Equal(t, http.StatusOK, statusCode) + assert.NotContains(t, body, `kuberay_cluster_provisioned_duration_seconds{name="cluster1",namespace="ns1"}`) + assert.Contains(t, body, `kuberay_cluster_provisioned_duration_seconds{name="cluster2",namespace="ns2"}`) + assert.Contains(t, body, `kuberay_cluster_provisioned_duration_seconds{name="cluster3",namespace="ns1"}`) + + // Test case 2: Delete with empty name + manager.DeleteRayClusterMetrics("", "ns1") + + // Verify metrics again + body2, statusCode := support.GetMetricsResponseAndCode(t, reg) + + assert.Equal(t, http.StatusOK, statusCode) + assert.NotContains(t, body2, `kuberay_cluster_provisioned_duration_seconds{name="cluster1",namespace="ns1"}`) + assert.Contains(t, body2, `kuberay_cluster_provisioned_duration_seconds{name="cluster3",namespace="ns1"}`) + assert.Contains(t, body2, `kuberay_cluster_provisioned_duration_seconds{name="cluster2",namespace="ns2"}`) + + // Test case 3: Delete with empty name and namespace + manager.DeleteRayClusterMetrics("", "") + + // Verify no metrics were deleted + body3, statusCode := support.GetMetricsResponseAndCode(t, reg) + + assert.Equal(t, http.StatusOK, statusCode) + assert.NotContains(t, body3, `kuberay_cluster_provisioned_duration_seconds{name="cluster1",namespace="ns1"}`) + assert.Contains(t, body3, `kuberay_cluster_provisioned_duration_seconds{name="cluster3",namespace="ns1"}`) + assert.Contains(t, body3, `kuberay_cluster_provisioned_duration_seconds{name="cluster2",namespace="ns2"}`) + + // Test case 4: Delete with false name and namespace + manager.DeleteRayClusterMetrics("ns2", "cluster2") + + // Verify no metrics were deleted + body4, statusCode := support.GetMetricsResponseAndCode(t, reg) + + assert.Equal(t, http.StatusOK, statusCode) + assert.NotContains(t, body4, `kuberay_cluster_provisioned_duration_seconds{name="cluster1",namespace="ns1"}`) + assert.Contains(t, body4, `kuberay_cluster_provisioned_duration_seconds{name="cluster3",namespace="ns1"}`) + assert.Contains(t, body4, `kuberay_cluster_provisioned_duration_seconds{name="cluster2",namespace="ns2"}`) +} + func TestRayClusterConditionProvisioned(t *testing.T) { tests := []struct { name string @@ -155,28 +206,21 @@ func TestRayClusterConditionProvisioned(t *testing.T) { reg := prometheus.NewRegistry() reg.MustRegister(manager) - req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/metrics", nil) - require.NoError(t, err) - rr := httptest.NewRecorder() - handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) - handler.ServeHTTP(rr, req) + body, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr.Code) - body := rr.Body.String() + assert.Equal(t, http.StatusOK, statusCode) for _, metric := range tc.expectedMetrics { assert.Contains(t, body, metric) } if len(tc.clusters) > 0 { - err = client.Delete(t.Context(), &tc.clusters[0]) + err := client.Delete(context.Background(), &tc.clusters[0]) require.NoError(t, err) } - rr2 := httptest.NewRecorder() - handler.ServeHTTP(rr2, req) + body2, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr2.Code) - body2 := rr2.Body.String() + assert.Equal(t, http.StatusOK, statusCode) assert.NotContains(t, body2, tc.expectedMetrics[0]) for _, metric := range tc.expectedMetrics[1:] { diff --git a/ray-operator/controllers/ray/metrics/ray_job_metrics.go b/ray-operator/controllers/ray/metrics/ray_job_metrics.go index e4e415ac681..984ec78d7e8 100644 --- a/ray-operator/controllers/ray/metrics/ray_job_metrics.go +++ b/ray-operator/controllers/ray/metrics/ray_job_metrics.go @@ -84,6 +84,12 @@ func (r *RayJobMetricsManager) ObserveRayJobExecutionDuration(name, namespace st r.rayJobExecutionDurationSeconds.WithLabelValues(name, namespace, string(jobDeploymentStatus), strconv.Itoa(retryCount)).Set(duration) } +// DeleteRayJobMetrics removes metrics that belongs to the specified RayJob. +func (r *RayJobMetricsManager) DeleteRayJobMetrics(name, namespace string) { + numCleanedUpMetrics := r.rayJobExecutionDurationSeconds.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace}) + r.log.Info("Cleaned up expired rayJob metric", "name", name, "namespace", namespace, "numCleanedUpMetrics", numCleanedUpMetrics) +} + func (r *RayJobMetricsManager) collectRayJobInfo(rayJob *rayv1.RayJob, ch chan<- prometheus.Metric) { ch <- prometheus.MustNewConstMetric( r.rayJobInfo, diff --git a/ray-operator/controllers/ray/metrics/ray_job_metrics_test.go b/ray-operator/controllers/ray/metrics/ray_job_metrics_test.go index 46879fcdc77..3b9d3a3bd34 100644 --- a/ray-operator/controllers/ray/metrics/ray_job_metrics_test.go +++ b/ray-operator/controllers/ray/metrics/ray_job_metrics_test.go @@ -3,11 +3,9 @@ package metrics import ( "context" "net/http" - "net/http/httptest" "testing" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -16,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/test/support" ) func TestMetricRayJobInfo(t *testing.T) { @@ -61,29 +60,21 @@ func TestMetricRayJobInfo(t *testing.T) { reg := prometheus.NewRegistry() reg.MustRegister(manager) - req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/metrics", nil) - require.NoError(t, err) - rr := httptest.NewRecorder() - handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) - handler.ServeHTTP(rr, req) + body, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr.Code) - body := rr.Body.String() + assert.Equal(t, http.StatusOK, statusCode) for _, label := range tc.expectedMetrics { assert.Contains(t, body, label) } if len(tc.rayJobs) > 0 { - err = client.Delete(t.Context(), &tc.rayJobs[0]) + err := client.Delete(t.Context(), &tc.rayJobs[0]) require.NoError(t, err) } - rr2 := httptest.NewRecorder() - handler.ServeHTTP(rr2, req) - - assert.Equal(t, http.StatusOK, rr2.Code) - body2 := rr2.Body.String() + body2, statusCode := support.GetMetricsResponseAndCode(t, reg) + assert.Equal(t, http.StatusOK, statusCode) assert.NotContains(t, body2, tc.expectedMetrics[0]) for _, label := range tc.expectedMetrics[1:] { assert.Contains(t, body2, label) @@ -92,6 +83,66 @@ func TestMetricRayJobInfo(t *testing.T) { } } +func TestDeleteRayJobMetrics(t *testing.T) { + k8sScheme := runtime.NewScheme() + require.NoError(t, rayv1.AddToScheme(k8sScheme)) + client := fake.NewClientBuilder().WithScheme(k8sScheme).Build() + manager := NewRayJobMetricsManager(context.Background(), client) + reg := prometheus.NewRegistry() + reg.MustRegister(manager) + + // Test case 1: Delete specific job metrics + // Manually add some metrics + manager.ObserveRayJobExecutionDuration("job1", "ns1", rayv1.JobDeploymentStatusComplete, 0, 10.5) + manager.ObserveRayJobExecutionDuration("job2", "ns2", rayv1.JobDeploymentStatusFailed, 1, 20.3) + manager.ObserveRayJobExecutionDuration("job3", "ns1", rayv1.JobDeploymentStatusRunning, 0, 5.7) + + // Test deleting metrics for job1 in ns1 + manager.DeleteRayJobMetrics("job1", "ns1") + + // Verify metrics + body, statusCode := support.GetMetricsResponseAndCode(t, reg) + + assert.Equal(t, http.StatusOK, statusCode) + assert.NotContains(t, body, `kuberay_job_execution_duration_seconds{job_deployment_status="Complete",name="job1",namespace="ns1",retry_count="0"}`) + assert.Contains(t, body, `kuberay_job_execution_duration_seconds{job_deployment_status="Failed",name="job2",namespace="ns2",retry_count="1"}`) + assert.Contains(t, body, `kuberay_job_execution_duration_seconds{job_deployment_status="Running",name="job3",namespace="ns1",retry_count="0"}`) + + // Test case 2: Delete with empty name + manager.DeleteRayJobMetrics("", "ns1") + + // Verify metrics again + body2, statusCode := support.GetMetricsResponseAndCode(t, reg) + + assert.Equal(t, http.StatusOK, statusCode) + assert.NotContains(t, body2, `kuberay_job_execution_duration_seconds{job_deployment_status="Complete",name="job1",namespace="ns1",retry_count="0"}`) + assert.Contains(t, body2, `kuberay_job_execution_duration_seconds{job_deployment_status="Failed",name="job2",namespace="ns2",retry_count="1"}`) + assert.Contains(t, body2, `kuberay_job_execution_duration_seconds{job_deployment_status="Running",name="job3",namespace="ns1",retry_count="0"}`) + + // Test case 3: Delete with empty name and namespace + manager.DeleteRayJobMetrics("", "") + + // Verify no metrics were deleted + body3, statusCode := support.GetMetricsResponseAndCode(t, reg) + + assert.Equal(t, http.StatusOK, statusCode) + assert.NotContains(t, body3, `kuberay_job_execution_duration_seconds{job_deployment_status="Complete",name="job1",namespace="ns1",retry_count="0"}`) + assert.Contains(t, body3, `kuberay_job_execution_duration_seconds{job_deployment_status="Failed",name="job2",namespace="ns2",retry_count="1"}`) + assert.Contains(t, body3, `kuberay_job_execution_duration_seconds{job_deployment_status="Running",name="job3",namespace="ns1",retry_count="0"}`) + + // Test case 4: Delete with false name and namespace + manager.DeleteRayJobMetrics("ns2", "job2") + + // Verify no metrics were deleted + body4, statusCode := support.GetMetricsResponseAndCode(t, reg) + + assert.Equal(t, http.StatusOK, statusCode) + assert.NotContains(t, body4, `kuberay_job_execution_duration_seconds{job_deployment_status="Complete",name="job1",namespace="ns1",retry_count="0"}`) + assert.Contains(t, body4, `kuberay_job_execution_duration_seconds{job_deployment_status="Failed",name="job2",namespace="ns2",retry_count="1"}`) + assert.Contains(t, body4, `kuberay_job_execution_duration_seconds{job_deployment_status="Running",name="job3",namespace="ns1",retry_count="0"}`) + +} + func TestMetricRayJobDeploymentStatus(t *testing.T) { tests := []struct { name string @@ -141,28 +192,21 @@ func TestMetricRayJobDeploymentStatus(t *testing.T) { reg := prometheus.NewRegistry() reg.MustRegister(manager) - req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/metrics", nil) - require.NoError(t, err) - rr := httptest.NewRecorder() - handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) - handler.ServeHTTP(rr, req) + body, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr.Code) - body := rr.Body.String() + assert.Equal(t, http.StatusOK, statusCode) for _, label := range tc.expectedMetrics { assert.Contains(t, body, label) } if len(tc.rayJobs) > 0 { - err = client.Delete(t.Context(), &tc.rayJobs[0]) + err := client.Delete(context.Background(), &tc.rayJobs[0]) require.NoError(t, err) } - rr2 := httptest.NewRecorder() - handler.ServeHTTP(rr2, req) + body2, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr2.Code) - body2 := rr2.Body.String() + assert.Equal(t, http.StatusOK, statusCode) assert.NotContains(t, body2, tc.expectedMetrics[0]) for _, label := range tc.expectedMetrics[1:] { diff --git a/ray-operator/controllers/ray/metrics/ray_service_metrics_test.go b/ray-operator/controllers/ray/metrics/ray_service_metrics_test.go index 16f8046311f..a502d1f9d56 100644 --- a/ray-operator/controllers/ray/metrics/ray_service_metrics_test.go +++ b/ray-operator/controllers/ray/metrics/ray_service_metrics_test.go @@ -3,11 +3,9 @@ package metrics import ( "context" "net/http" - "net/http/httptest" "testing" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -16,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" + "github.com/ray-project/kuberay/ray-operator/test/support" ) func TestRayServiceInfo(t *testing.T) { @@ -60,14 +59,9 @@ func TestRayServiceInfo(t *testing.T) { reg := prometheus.NewRegistry() reg.MustRegister(manager) - req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/metrics", nil) - require.NoError(t, err) - rr := httptest.NewRecorder() - handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) - handler.ServeHTTP(rr, req) + body, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr.Code) - body := rr.Body.String() + assert.Equal(t, http.StatusOK, statusCode) for _, info := range tc.expectedInfo { assert.Contains(t, body, info) } @@ -77,11 +71,9 @@ func TestRayServiceInfo(t *testing.T) { require.NoError(t, err) } - rr2 := httptest.NewRecorder() - handler.ServeHTTP(rr2, req) + body2, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr2.Code) - body2 := rr2.Body.String() + assert.Equal(t, http.StatusOK, statusCode) assert.NotContains(t, body2, tc.expectedInfo[0]) for _, info := range tc.expectedInfo[1:] { @@ -186,14 +178,9 @@ func TestRayServiceCondition(t *testing.T) { reg := prometheus.NewRegistry() reg.MustRegister(manager) - req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/metrics", nil) - require.NoError(t, err) - rr := httptest.NewRecorder() - handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) - handler.ServeHTTP(rr, req) + body, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr.Code) - body := rr.Body.String() + assert.Equal(t, http.StatusOK, statusCode) for _, info := range tc.expectedInfo { assert.Contains(t, body, info) } @@ -203,11 +190,9 @@ func TestRayServiceCondition(t *testing.T) { require.NoError(t, err) } - rr2 := httptest.NewRecorder() - handler.ServeHTTP(rr2, req) + body2, statusCode := support.GetMetricsResponseAndCode(t, reg) - assert.Equal(t, http.StatusOK, rr2.Code) - body2 := rr2.Body.String() + assert.Equal(t, http.StatusOK, statusCode) assert.NotContains(t, body2, tc.expectedInfo[0]) for _, info := range tc.expectedInfo[1:] { diff --git a/ray-operator/controllers/ray/raycluster_controller.go b/ray-operator/controllers/ray/raycluster_controller.go index 9d1602fe0d8..b9b1ada2d6a 100644 --- a/ray-operator/controllers/ray/raycluster_controller.go +++ b/ray-operator/controllers/ray/raycluster_controller.go @@ -123,6 +123,7 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, request ctrl.Reque if errors.IsNotFound(err) { // Clear all related expectations r.rayClusterScaleExpectation.Delete(instance.Name, instance.Namespace) + cleanUpRayClusterMetrics(r.options.RayClusterMetricsManager, request.Name, request.Namespace) } else { logger.Error(err, "Read request instance error!") } @@ -1559,6 +1560,13 @@ func emitRayClusterProvisionedDuration(RayClusterMetricsObserver metrics.RayClus } } +func cleanUpRayClusterMetrics(rayClusterMetricsManager *metrics.RayClusterMetricsManager, clusterName, namespace string) { + if rayClusterMetricsManager == nil { + return + } + rayClusterMetricsManager.DeleteRayClusterMetrics(clusterName, namespace) +} + // sumGPUs sums the GPUs in the given resource list. func sumGPUs(resources map[corev1.ResourceName]resource.Quantity) resource.Quantity { totalGPUs := resource.Quantity{} diff --git a/ray-operator/controllers/ray/rayjob_controller.go b/ray-operator/controllers/ray/rayjob_controller.go index 3fdeb2de76d..5655930eb1a 100644 --- a/ray-operator/controllers/ray/rayjob_controller.go +++ b/ray-operator/controllers/ray/rayjob_controller.go @@ -91,7 +91,8 @@ func (r *RayJobReconciler) Reconcile(ctx context.Context, request ctrl.Request) if err := r.Get(ctx, request.NamespacedName, rayJobInstance); err != nil { if errors.IsNotFound(err) { // Request object not found, could have been deleted after reconcile request. Stop reconciliation. - logger.Info("RayJob resource not found. Ignoring since object must be deleted") + logger.Info("RayJob resource not found.") + cleanUpRayJobMetrics(r.options.RayJobMetricsManager, request.Name, request.Namespace) return ctrl.Result{}, nil } // Error reading the object - requeue the request. @@ -495,6 +496,13 @@ func emitRayJobExecutionDuration(rayJobMetricsObserver metrics.RayJobMetricsObse } } +func cleanUpRayJobMetrics(rayJobMetricsManager *metrics.RayJobMetricsManager, rayJobName, rayJobNamespace string) { + if rayJobMetricsManager == nil { + return + } + rayJobMetricsManager.DeleteRayJobMetrics(rayJobName, rayJobNamespace) +} + // checkBackoffLimitAndUpdateStatusIfNeeded determines if a RayJob is eligible for retry based on the configured backoff limit, // the job's success status, and its failure status. If eligible, sets the JobDeploymentStatus to Retrying. func checkBackoffLimitAndUpdateStatusIfNeeded(ctx context.Context, rayJob *rayv1.RayJob) { diff --git a/ray-operator/test/support/metrics.go b/ray-operator/test/support/metrics.go new file mode 100644 index 00000000000..44074f0e573 --- /dev/null +++ b/ray-operator/test/support/metrics.go @@ -0,0 +1,26 @@ +package support + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/stretchr/testify/require" +) + +// GetMetricsResponseAndCode simulates an HTTP GET request to the /metrics endpoint, +// processes it using a Prometheus handler built from the provided registry, +// and returns the resulting response body as a string along with the HTTP status code. +func GetMetricsResponseAndCode(t *testing.T, reg *prometheus.Registry) (string, int) { + t.Helper() + req, err := http.NewRequestWithContext(t.Context(), http.MethodGet, "/metrics", nil) + require.NoError(t, err) + + rr := httptest.NewRecorder() + handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) + handler.ServeHTTP(rr, req) + + return rr.Body.String(), rr.Code +}