Skip to content

Commit b4cf5e2

Browse files
pierDipikfswainahg-g
committed
Support scraping metrics from target running with TLS (#1190)
* Support scraping metrics from target running with TLS vLLM server can run with TLS and metrics scraping doesn't work currently in that case. Signed-off-by: Pierangelo Di Pilato <[email protected]> * Add '--model-server-metrics-*' flags to Helm values and EPP template Signed-off-by: Pierangelo Di Pilato <[email protected]> * Update config/charts/inferencepool/values.yaml Co-authored-by: Abdullah Gharaibeh <[email protected]> * Update cmd/epp/runner/runner.go Co-authored-by: Abdullah Gharaibeh <[email protected]> --------- Signed-off-by: Pierangelo Di Pilato <[email protected]> Co-authored-by: Kellen Swain <[email protected]> Co-authored-by: Abdullah Gharaibeh <[email protected]>
1 parent 38577e6 commit b4cf5e2

File tree

5 files changed

+59
-18
lines changed

5 files changed

+59
-18
lines changed

cmd/epp/runner/runner.go

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ package runner
1818

1919
import (
2020
"context"
21+
"crypto/tls"
2122
"flag"
2223
"fmt"
24+
"net/http"
2325
"net/http/pprof"
2426
"os"
2527

@@ -136,7 +138,9 @@ var (
136138

137139
modelServerMetricsPort = flag.Int("modelServerMetricsPort", 0, "Port to scrape metrics from pods. "+
138140
"Default value will be set to InferencePool.Spec.TargetPortNumber if not set.")
139-
modelServerMetricsPath = flag.String("modelServerMetricsPath", "/metrics", "Path to scrape metrics from pods")
141+
modelServerMetricsPath = flag.String("modelServerMetricsPath", "/metrics", "Path to scrape metrics from pods")
142+
modelServerMetricsScheme = flag.String("modelServerMetricsScheme", "http", "Scheme to scrape metrics from pods")
143+
modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("modelServerMetricsHttpsInsecureSkipVerify", true, "When using 'https' scheme for 'modelServerMetricsScheme', configure 'InsecureSkipVerify' (default to true)")
140144

141145
setupLog = ctrl.Log.WithName("setup")
142146
)
@@ -167,13 +171,15 @@ func (r *Runner) WithSchedulerConfig(schedulerConfig *scheduling.SchedulerConfig
167171
func bindEnvToFlags() {
168172
// map[ENV_VAR]flagName – add more as needed
169173
for env, flg := range map[string]string{
170-
"GRPC_PORT": "grpcPort",
171-
"GRPC_HEALTH_PORT": "grpcHealthPort",
172-
"MODEL_SERVER_METRICS_PORT": "modelServerMetricsPort",
173-
"MODEL_SERVER_METRICS_PATH": "modelServerMetricsPath",
174-
"DESTINATION_ENDPOINT_HINT_KEY": "destinationEndpointHintKey",
175-
"POOL_NAME": "poolName",
176-
"POOL_NAMESPACE": "poolNamespace",
174+
"GRPC_PORT": "grpcPort",
175+
"GRPC_HEALTH_PORT": "grpcHealthPort",
176+
"MODEL_SERVER_METRICS_PORT": "modelServerMetricsPort",
177+
"MODEL_SERVER_METRICS_PATH": "modelServerMetricsPath",
178+
"DESTINATION_ENDPOINT_HINT_KEY": "destinationEndpointHintKey",
179+
"MODEL_SERVER_METRICS_SCHEME": "modelServerMetricsScheme",
180+
"MODEL_SERVER_METRICS_HTTPS_INSECURE_SKIP_VERIFY": "modelServerMetricsHttpsInsecureSkipVerify",
181+
"POOL_NAME": "poolName",
182+
"POOL_NAMESPACE": "poolNamespace",
177183
// durations & bools work too; flag.Set expects the *string* form
178184
"REFRESH_METRICS_INTERVAL": "refreshMetricsInterval",
179185
"SECURE_SERVING": "secureServing",
@@ -231,10 +237,26 @@ func (r *Runner) Run(ctx context.Context) error {
231237
return err
232238
}
233239
verifyMetricMapping(*mapping, setupLog)
240+
241+
var metricsHttpClient *http.Client
242+
if *modelServerMetricsScheme == "https" {
243+
metricsHttpClient = &http.Client{
244+
Transport: &http.Transport{
245+
TLSClientConfig: &tls.Config{
246+
InsecureSkipVerify: *modelServerMetricsHttpsInsecureSkipVerify,
247+
},
248+
},
249+
}
250+
} else {
251+
metricsHttpClient = http.DefaultClient
252+
}
253+
234254
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{
235-
MetricMapping: mapping,
236-
ModelServerMetricsPort: int32(*modelServerMetricsPort),
237-
ModelServerMetricsPath: *modelServerMetricsPath,
255+
MetricMapping: mapping,
256+
ModelServerMetricsPort: int32(*modelServerMetricsPort),
257+
ModelServerMetricsPath: *modelServerMetricsPath,
258+
ModelServerMetricsScheme: *modelServerMetricsScheme,
259+
Client: metricsHttpClient,
238260
}, *refreshMetricsInterval)
239261

240262
datastore := datastore.NewDatastore(ctx, pmf)
@@ -412,6 +434,9 @@ func validateFlags() error {
412434
if *configText != "" && *configFile != "" {
413435
return fmt.Errorf("both the %q and %q flags can not be set at the same time", "configText", "configFile")
414436
}
437+
if *modelServerMetricsScheme != "http" && *modelServerMetricsScheme != "https" {
438+
return fmt.Errorf("unexpected %q value for %q flag, it can only be set to 'http' or 'https'", *modelServerMetricsScheme, "model-server-metrics-scheme")
439+
}
415440

416441
return nil
417442
}

config/charts/inferencepool/templates/epp-deployment.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ spec:
3838
- -configFile
3939
- "config/{{ .Values.inferenceExtension.pluginsConfigFile }}"
4040
# https://pkg.go.dev/flag#hdr-Command_line_flag_syntax; space is only for non-bool flags
41-
- "-enablePprof={{ .Values.inferenceExtension.enablePprof }}"
41+
- "--enablePprof={{ .Values.inferenceExtension.enablePprof }}"
42+
- "--modelServerMetricsPath={{ .Values.inferenceExtension.modelServerMetricsPath }}"
43+
- "--modelServerMetricsScheme={{ .Values.inferenceExtension.modelServerMetricsScheme }}"
44+
- "--modelServerMetricsHttpsInsecureSkipVerify={{ .Values.inferenceExtension.modelServerMetricsHttpsInsecureSkipVerify }}"
4245
{{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
4346
- -totalQueuedRequestsMetric
4447
- "nv_trt_llm_request_metrics{request_type=waiting}"

config/charts/inferencepool/values.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ inferenceExtension:
88
extProcPort: 9002
99
env: {}
1010
enablePprof: true # Enable pprof handlers for profiling and debugging
11+
modelServerMetricsPath: "/metrics"
12+
modelServerMetricsScheme: "http"
13+
modelServerMetricsHttpsInsecureSkipVerify: true
1114
# This is the plugins configuration file.
1215
pluginsConfigFile: "default-plugins.yaml"
1316
# pluginsCustomConfig:

pkg/epp/backend/metrics/metrics.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,12 @@ const (
3737
)
3838

3939
type PodMetricsClientImpl struct {
40-
MetricMapping *MetricMapping
41-
ModelServerMetricsPort int32
42-
ModelServerMetricsPath string
40+
MetricMapping *MetricMapping
41+
ModelServerMetricsPort int32
42+
ModelServerMetricsPath string
43+
ModelServerMetricsScheme string
44+
45+
Client *http.Client
4346
}
4447

4548
// FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.
@@ -49,7 +52,7 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Po
4952
if err != nil {
5053
return nil, fmt.Errorf("failed to create request: %v", err)
5154
}
52-
resp, err := http.DefaultClient.Do(req)
55+
resp, err := p.Client.Do(req)
5356
if err != nil {
5457
return nil, fmt.Errorf("failed to fetch metrics from %s: %w", pod.NamespacedName, err)
5558
}
@@ -73,7 +76,7 @@ func (p *PodMetricsClientImpl) getMetricEndpoint(pod *backend.Pod, targetPortNum
7376
if p.ModelServerMetricsPort == 0 {
7477
p.ModelServerMetricsPort = targetPortNumber
7578
}
76-
return fmt.Sprintf("http://%s:%d%s", pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)
79+
return fmt.Sprintf("%s://%s:%d%s", p.ModelServerMetricsScheme, pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)
7780
}
7881

7982
// promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.

pkg/epp/backend/metrics/metrics_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package metrics
1919
import (
2020
"context"
2121
"errors"
22+
"net/http"
2223
"reflect"
2324
"strconv"
2425
"strings"
@@ -495,7 +496,13 @@ func TestFetchMetrics(t *testing.T) {
495496
},
496497
}
497498
existing := &MetricsState{}
498-
p := &PodMetricsClientImpl{ModelServerMetricsPort: 9999, ModelServerMetricsPath: "/metrics"} // No MetricMapping needed for this basic test
499+
// No MetricMapping needed for this basic test
500+
p := &PodMetricsClientImpl{
501+
ModelServerMetricsScheme: "http",
502+
ModelServerMetricsPort: 9999,
503+
ModelServerMetricsPath: "/metrics",
504+
Client: http.DefaultClient,
505+
}
499506

500507
_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use
501508
if err == nil {

0 commit comments

Comments
 (0)