Skip to content

Commit 7f5ccbf

Browse files
pierDipikfswainahg-g
authored
Support scraping metrics from target running with TLS (#1190)
* Support scraping metrics from target running with TLS vLLM server can run with TLS and metrics scraping doesn't work currently in that case. Signed-off-by: Pierangelo Di Pilato <[email protected]> * Add '--model-server-metrics-*' flags to Helm values and EPP template Signed-off-by: Pierangelo Di Pilato <[email protected]> * Update config/charts/inferencepool/values.yaml Co-authored-by: Abdullah Gharaibeh <[email protected]> * Update cmd/epp/runner/runner.go Co-authored-by: Abdullah Gharaibeh <[email protected]> --------- Signed-off-by: Pierangelo Di Pilato <[email protected]> Co-authored-by: Kellen Swain <[email protected]> Co-authored-by: Abdullah Gharaibeh <[email protected]>
1 parent b50ee68 commit 7f5ccbf

File tree

5 files changed

+58
-17
lines changed

5 files changed

+58
-17
lines changed

cmd/epp/runner/runner.go

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ package runner
1818

1919
import (
2020
"context"
21+
"crypto/tls"
2122
"errors"
2223
"flag"
2324
"fmt"
25+
"net/http"
2426
"net/http/pprof"
2527
"os"
2628

@@ -145,7 +147,9 @@ var (
145147

146148
modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+
147149
"Default value will be set to InferencePool.Spec.TargetPortNumber if not set.")
148-
modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods")
150+
modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods")
151+
modelServerMetricsScheme = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods")
152+
modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)")
149153

150154
setupLog = ctrl.Log.WithName("setup")
151155
)
@@ -176,13 +180,15 @@ func (r *Runner) WithSchedulerConfig(schedulerConfig *scheduling.SchedulerConfig
176180
func bindEnvToFlags() {
177181
// map[ENV_VAR]flagName – add more as needed
178182
for env, flg := range map[string]string{
179-
"GRPC_PORT": "grpc-port",
180-
"GRPC_HEALTH_PORT": "grpc-health-port",
181-
"MODEL_SERVER_METRICS_PORT": "model-server-metrics-port",
182-
"MODEL_SERVER_METRICS_PATH": "model-server-metrics-path",
183-
"DESTINATION_ENDPOINT_HINT_KEY": "destination-endpoint-hint-key",
184-
"POOL_NAME": "pool-name",
185-
"POOL_NAMESPACE": "pool-namespace",
183+
"GRPC_PORT": "grpc-port",
184+
"GRPC_HEALTH_PORT": "grpc-health-port",
185+
"MODEL_SERVER_METRICS_PORT": "model-server-metrics-port",
186+
"MODEL_SERVER_METRICS_PATH": "model-server-metrics-path",
187+
"MODEL_SERVER_METRICS_SCHEME": "model-server-metrics-scheme",
188+
"MODEL_SERVER_METRICS_HTTPS_INSECURE_SKIP_VERIFY": "model-server-metrics-https-insecure-skip-verify",
189+
"DESTINATION_ENDPOINT_HINT_KEY": "destination-endpoint-hint-key",
190+
"POOL_NAME": "pool-name",
191+
"POOL_NAMESPACE": "pool-namespace",
186192
// durations & bools work too; flag.Set expects the *string* form
187193
"REFRESH_METRICS_INTERVAL": "refresh-metrics-interval",
188194
"SECURE_SERVING": "secure-serving",
@@ -242,10 +248,26 @@ func (r *Runner) Run(ctx context.Context) error {
242248
return err
243249
}
244250
verifyMetricMapping(*mapping, setupLog)
251+
252+
var metricsHttpClient *http.Client
253+
if *modelServerMetricsScheme == "https" {
254+
metricsHttpClient = &http.Client{
255+
Transport: &http.Transport{
256+
TLSClientConfig: &tls.Config{
257+
InsecureSkipVerify: *modelServerMetricsHttpsInsecureSkipVerify,
258+
},
259+
},
260+
}
261+
} else {
262+
metricsHttpClient = http.DefaultClient
263+
}
264+
245265
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{
246-
MetricMapping: mapping,
247-
ModelServerMetricsPort: int32(*modelServerMetricsPort),
248-
ModelServerMetricsPath: *modelServerMetricsPath,
266+
MetricMapping: mapping,
267+
ModelServerMetricsPort: int32(*modelServerMetricsPort),
268+
ModelServerMetricsPath: *modelServerMetricsPath,
269+
ModelServerMetricsScheme: *modelServerMetricsScheme,
270+
Client: metricsHttpClient,
249271
}, *refreshMetricsInterval)
250272

251273
datastore := datastore.NewDatastore(ctx, pmf)
@@ -446,6 +468,9 @@ func validateFlags() error {
446468
if *configText != "" && *configFile != "" {
447469
return fmt.Errorf("both the %q and %q flags can not be set at the same time", "configText", "configFile")
448470
}
471+
if *modelServerMetricsScheme != "http" && *modelServerMetricsScheme != "https" {
472+
return fmt.Errorf("unexpected %q value for %q flag, it can only be set to 'http' or 'https'", *modelServerMetricsScheme, "model-server-metrics-scheme")
473+
}
449474

450475
return nil
451476
}

config/charts/inferencepool/templates/epp-deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ spec:
3939
- "config/{{ .Values.inferenceExtension.pluginsConfigFile }}"
4040
# https://pkg.go.dev/flag#hdr-Command_line_flag_syntax; space is only for non-bool flags
4141
- "--enable-pprof={{ .Values.inferenceExtension.enablePprof }}"
42+
- "--model-server-metrics-path={{ .Values.inferenceExtension.modelServerMetricsPath }}"
43+
- "--model-server-metrics-scheme={{ .Values.inferenceExtension.modelServerMetricsScheme }}"
44+
- "--model-server-metrics-https-insecure-skip-verify={{ .Values.inferenceExtension.modelServerMetricsHttpsInsecureSkipVerify }}"
4245
{{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
4346
- --total-queued-requests-metric
4447
- "nv_trt_llm_request_metrics{request_type=waiting}"

config/charts/inferencepool/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ inferenceExtension:
88
extProcPort: 9002
99
env: []
1010
enablePprof: true # Enable pprof handlers for profiling and debugging
11+
modelServerMetricsPath: "/metrics"
12+
modelServerMetricsScheme: "http"
13+
modelServerMetricsHttpsInsecureSkipVerify: true
1114
# This is the plugins configuration file.
1215
pluginsConfigFile: "default-plugins.yaml"
1316
# pluginsCustomConfig:

pkg/epp/backend/metrics/metrics.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,12 @@ const (
3737
)
3838

3939
type PodMetricsClientImpl struct {
40-
MetricMapping *MetricMapping
41-
ModelServerMetricsPort int32
42-
ModelServerMetricsPath string
40+
MetricMapping *MetricMapping
41+
ModelServerMetricsPort int32
42+
ModelServerMetricsPath string
43+
ModelServerMetricsScheme string
44+
45+
Client *http.Client
4346
}
4447

4548
// FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.
@@ -49,7 +52,7 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Po
4952
if err != nil {
5053
return nil, fmt.Errorf("failed to create request: %v", err)
5154
}
52-
resp, err := http.DefaultClient.Do(req)
55+
resp, err := p.Client.Do(req)
5356
if err != nil {
5457
return nil, fmt.Errorf("failed to fetch metrics from %s: %w", pod.NamespacedName, err)
5558
}
@@ -73,7 +76,7 @@ func (p *PodMetricsClientImpl) getMetricEndpoint(pod *backend.Pod, targetPortNum
7376
if p.ModelServerMetricsPort == 0 {
7477
p.ModelServerMetricsPort = targetPortNumber
7578
}
76-
return fmt.Sprintf("http://%s:%d%s", pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)
79+
return fmt.Sprintf("%s://%s:%d%s", p.ModelServerMetricsScheme, pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)
7780
}
7881

7982
// promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.

pkg/epp/backend/metrics/metrics_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package metrics
1919
import (
2020
"context"
2121
"errors"
22+
"net/http"
2223
"reflect"
2324
"strconv"
2425
"strings"
@@ -495,7 +496,13 @@ func TestFetchMetrics(t *testing.T) {
495496
},
496497
}
497498
existing := &MetricsState{}
498-
p := &PodMetricsClientImpl{ModelServerMetricsPort: 9999, ModelServerMetricsPath: "/metrics"} // No MetricMapping needed for this basic test
499+
// No MetricMapping needed for this basic test
500+
p := &PodMetricsClientImpl{
501+
ModelServerMetricsScheme: "http",
502+
ModelServerMetricsPort: 9999,
503+
ModelServerMetricsPath: "/metrics",
504+
Client: http.DefaultClient,
505+
}
499506

500507
_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use
501508
if err == nil {

0 commit comments

Comments
 (0)