Support scraping metrics from target running with TLS (#1190)

pierDipi · kfswain · ahg-g · web-flow · commit 7f5ccbf5edac · 2025-07-22T11:10:27.000-07:00
* Support scraping metrics from target running with TLS

vLLM server can run with TLS and metrics scraping doesn't
work currently in that case.

Signed-off-by: Pierangelo Di Pilato &lt;pierdipi@redhat.com&gt;

* Add '--model-server-metrics-*' flags to Helm values and EPP template

Signed-off-by: Pierangelo Di Pilato &lt;pierdipi@redhat.com&gt;

* Update config/charts/inferencepool/values.yaml

Co-authored-by: Abdullah Gharaibeh &lt;40361897+ahg-g@users.noreply.github.com&gt;

* Update cmd/epp/runner/runner.go

Co-authored-by: Abdullah Gharaibeh &lt;40361897+ahg-g@users.noreply.github.com&gt;

---------

Signed-off-by: Pierangelo Di Pilato &lt;pierdipi@redhat.com&gt;
Co-authored-by: Kellen Swain &lt;kfswain@google.com&gt;
Co-authored-by: Abdullah Gharaibeh &lt;40361897+ahg-g@users.noreply.github.com&gt;
diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go
@@ -18,9 +18,11 @@ package runner
 
 import (
 	"context"
+	"crypto/tls"
 	"errors"
 	"flag"
 	"fmt"
+	"net/http"
 	"net/http/pprof"
 	"os"
 
@@ -145,7 +147,9 @@ var (
 
 	modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+
 		"Default value will be set to InferencePool.Spec.TargetPortNumber if not set.")
-	modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods")
+	modelServerMetricsPath                    = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods")
+	modelServerMetricsScheme                  = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods")
+	modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)")
 
 	setupLog = ctrl.Log.WithName("setup")
 )
@@ -176,13 +180,15 @@ func (r *Runner) WithSchedulerConfig(schedulerConfig *scheduling.SchedulerConfig
 func bindEnvToFlags() {
 	// map[ENV_VAR]flagName   – add more as needed
 	for env, flg := range map[string]string{
-		"GRPC_PORT":                     "grpc-port",
-		"GRPC_HEALTH_PORT":              "grpc-health-port",
-		"MODEL_SERVER_METRICS_PORT":     "model-server-metrics-port",
-		"MODEL_SERVER_METRICS_PATH":     "model-server-metrics-path",
-		"DESTINATION_ENDPOINT_HINT_KEY": "destination-endpoint-hint-key",
-		"POOL_NAME":                     "pool-name",
-		"POOL_NAMESPACE":                "pool-namespace",
+		"GRPC_PORT":                                       "grpc-port",
+		"GRPC_HEALTH_PORT":                                "grpc-health-port",
+		"MODEL_SERVER_METRICS_PORT":                       "model-server-metrics-port",
+		"MODEL_SERVER_METRICS_PATH":                       "model-server-metrics-path",
+		"MODEL_SERVER_METRICS_SCHEME":                     "model-server-metrics-scheme",
+		"MODEL_SERVER_METRICS_HTTPS_INSECURE_SKIP_VERIFY": "model-server-metrics-https-insecure-skip-verify",
+		"DESTINATION_ENDPOINT_HINT_KEY":                   "destination-endpoint-hint-key",
+		"POOL_NAME":                                       "pool-name",
+		"POOL_NAMESPACE":                                  "pool-namespace",
 		// durations & bools work too; flag.Set expects the *string* form
 		"REFRESH_METRICS_INTERVAL": "refresh-metrics-interval",
 		"SECURE_SERVING":           "secure-serving",
@@ -242,10 +248,26 @@ func (r *Runner) Run(ctx context.Context) error {
 		return err
 	}
 	verifyMetricMapping(*mapping, setupLog)
+
+	var metricsHttpClient *http.Client
+	if *modelServerMetricsScheme == "https" {
+		metricsHttpClient = &http.Client{
+			Transport: &http.Transport{
+				TLSClientConfig: &tls.Config{
+					InsecureSkipVerify: *modelServerMetricsHttpsInsecureSkipVerify,
+				},
+			},
+		}
+	} else {
+		metricsHttpClient = http.DefaultClient
+	}
+
 	pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{
-		MetricMapping:          mapping,
-		ModelServerMetricsPort: int32(*modelServerMetricsPort),
-		ModelServerMetricsPath: *modelServerMetricsPath,
+		MetricMapping:            mapping,
+		ModelServerMetricsPort:   int32(*modelServerMetricsPort),
+		ModelServerMetricsPath:   *modelServerMetricsPath,
+		ModelServerMetricsScheme: *modelServerMetricsScheme,
+		Client:                   metricsHttpClient,
 	}, *refreshMetricsInterval)
 
 	datastore := datastore.NewDatastore(ctx, pmf)
@@ -446,6 +468,9 @@ func validateFlags() error {
 	if *configText != "" && *configFile != "" {
 		return fmt.Errorf("both the %q and %q flags can not be set at the same time", "configText", "configFile")
 	}
+	if *modelServerMetricsScheme != "http" && *modelServerMetricsScheme != "https" {
+		return fmt.Errorf("unexpected %q value for %q flag, it can only be set to 'http' or 'https'", *modelServerMetricsScheme, "model-server-metrics-scheme")
+	}
 
 	return nil
 }
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -39,6 +39,9 @@ spec:
         - "config/{{ .Values.inferenceExtension.pluginsConfigFile }}"
         # https://pkg.go.dev/flag#hdr-Command_line_flag_syntax; space is only for non-bool flags
         - "--enable-pprof={{ .Values.inferenceExtension.enablePprof }}"
+        - "--model-server-metrics-path={{ .Values.inferenceExtension.modelServerMetricsPath }}"
+        - "--model-server-metrics-scheme={{ .Values.inferenceExtension.modelServerMetricsScheme }}"
+        - "--model-server-metrics-https-insecure-skip-verify={{ .Values.inferenceExtension.modelServerMetricsHttpsInsecureSkipVerify }}"
         {{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
         - --total-queued-requests-metric
         - "nv_trt_llm_request_metrics{request_type=waiting}"
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -8,6 +8,9 @@ inferenceExtension:
   extProcPort: 9002
   env: []
   enablePprof: true # Enable pprof handlers for profiling and debugging
+  modelServerMetricsPath: "/metrics"
+  modelServerMetricsScheme: "http"
+  modelServerMetricsHttpsInsecureSkipVerify: true
   # This is the plugins configuration file. 
   pluginsConfigFile: "default-plugins.yaml"
   # pluginsCustomConfig:
diff --git a/pkg/epp/backend/metrics/metrics.go b/pkg/epp/backend/metrics/metrics.go
@@ -37,9 +37,12 @@ const (
 )
 
 type PodMetricsClientImpl struct {
-	MetricMapping          *MetricMapping
-	ModelServerMetricsPort int32
-	ModelServerMetricsPath string
+	MetricMapping            *MetricMapping
+	ModelServerMetricsPort   int32
+	ModelServerMetricsPath   string
+	ModelServerMetricsScheme string
+
+	Client *http.Client
 }
 
 // FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.
@@ -49,7 +52,7 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Po
 	if err != nil {
 		return nil, fmt.Errorf("failed to create request: %v", err)
 	}
-	resp, err := http.DefaultClient.Do(req)
+	resp, err := p.Client.Do(req)
 	if err != nil {
 		return nil, fmt.Errorf("failed to fetch metrics from %s: %w", pod.NamespacedName, err)
 	}
@@ -73,7 +76,7 @@ func (p *PodMetricsClientImpl) getMetricEndpoint(pod *backend.Pod, targetPortNum
 	if p.ModelServerMetricsPort == 0 {
 		p.ModelServerMetricsPort = targetPortNumber
 	}
-	return fmt.Sprintf("http://%s:%d%s", pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)
+	return fmt.Sprintf("%s://%s:%d%s", p.ModelServerMetricsScheme, pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)
 }
 
 // promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.
diff --git a/pkg/epp/backend/metrics/metrics_test.go b/pkg/epp/backend/metrics/metrics_test.go
@@ -19,6 +19,7 @@ package metrics
 import (
 	"context"
 	"errors"
+	"net/http"
 	"reflect"
 	"strconv"
 	"strings"
@@ -495,7 +496,13 @@ func TestFetchMetrics(t *testing.T) {
 		},
 	}
 	existing := &MetricsState{}
-	p := &PodMetricsClientImpl{ModelServerMetricsPort: 9999, ModelServerMetricsPath: "/metrics"} // No MetricMapping needed for this basic test
+	// No MetricMapping needed for this basic test
+	p := &PodMetricsClientImpl{
+		ModelServerMetricsScheme: "http",
+		ModelServerMetricsPort:   9999,
+		ModelServerMetricsPath:   "/metrics",
+		Client:                   http.DefaultClient,
+	}
 
 	_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use
 	if err == nil {

Original file line number	Diff line number	Diff line change
`@@ -37,9 +37,12 @@ const (`
`37`	`37`	`)`
`38`	`38`
`39`	`39`	`type PodMetricsClientImpl struct {`
`40`		`- MetricMapping *MetricMapping`
`41`		`- ModelServerMetricsPort int32`
`42`		`- ModelServerMetricsPath string`
	`40`	`+ MetricMapping *MetricMapping`
	`41`	`+ ModelServerMetricsPort int32`
	`42`	`+ ModelServerMetricsPath string`
	`43`	`+ ModelServerMetricsScheme string`
	`44`	`+`
	`45`	`+ Client *http.Client`
`43`	`46`	`}`
`44`	`47`
`45`	`48`	`// FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.`
`@@ -49,7 +52,7 @@ func (p PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod backend.Po`
`49`	`52`	`if err != nil {`
`50`	`53`	`return nil, fmt.Errorf("failed to create request: %v", err)`
`51`	`54`	`}`
`52`		`- resp, err := http.DefaultClient.Do(req)`
	`55`	`+ resp, err := p.Client.Do(req)`
`53`	`56`	`if err != nil {`
`54`	`57`	`return nil, fmt.Errorf("failed to fetch metrics from %s: %w", pod.NamespacedName, err)`
`55`	`58`	`}`
`@@ -73,7 +76,7 @@ func (p PodMetricsClientImpl) getMetricEndpoint(pod backend.Pod, targetPortNum`
`73`	`76`	`if p.ModelServerMetricsPort == 0 {`
`74`	`77`	`p.ModelServerMetricsPort = targetPortNumber`
`75`	`78`	`}`
`76`		`- return fmt.Sprintf("http://%s:%d%s", pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)`
	`79`	`+ return fmt.Sprintf("%s://%s:%d%s", p.ModelServerMetricsScheme, pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)`
`77`	`80`	`}`
`78`	`81`
`79`	`82`	`// promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.`