Skip to content

Commit e696216

Browse files
authored
feat: make model metrics endpoints configurable (#1000)
1 parent 13d8bb0 commit e696216

File tree

4 files changed

+48
-9
lines changed

4 files changed

+48
-9
lines changed

cmd/epp/runner/runner.go

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ var (
137137
runserver.DefaultConfigText,
138138
"The configuration specified as text, in lieu of a file")
139139

140+
modelServerMetricsPort = flag.Int("modelServerMetricsPort", 0, "Port to scrape metrics from pods. "+
141+
"Default value will be set to InferencePool.Spec.TargetPortNumber if not set.")
142+
modelServerMetricsPath = flag.String("modelServerMetricsPath", "/metrics", "Path to scrape metrics from pods")
143+
140144
setupLog = ctrl.Log.WithName("setup")
141145

142146
// Environment variables
@@ -168,7 +172,32 @@ func (r *Runner) WithSchedulerConfig(schedulerConfig *scheduling.SchedulerConfig
168172
return r
169173
}
170174

175+
func bindEnvToFlags() {
176+
// map[ENV_VAR]flagName – add more as needed
177+
for env, flg := range map[string]string{
178+
"GRPC_PORT": "grpcPort",
179+
"GRPC_HEALTH_PORT": "grpcHealthPort",
180+
"MODEL_SERVER_METRICS_PORT": "modelServerMetricsPort",
181+
"MODEL_SERVER_METRICS_PATH": "modelServerMetricsPath",
182+
"DESTINATION_ENDPOINT_HINT_KEY": "destinationEndpointHintKey",
183+
"POOL_NAME": "poolName",
184+
"POOL_NAMESPACE": "poolNamespace",
185+
// durations & bools work too; flag.Set expects the *string* form
186+
"REFRESH_METRICS_INTERVAL": "refreshMetricsInterval",
187+
"SECURE_SERVING": "secureServing",
188+
} {
189+
if v := os.Getenv(env); v != "" {
190+
// ignore error; Parse() will catch invalid values later
191+
_ = flag.Set(flg, v)
192+
}
193+
}
194+
}
195+
171196
func (r *Runner) Run(ctx context.Context) error {
197+
// Defaults already baked into flag declarations
198+
// Load env vars as "soft" overrides
199+
bindEnvToFlags()
200+
172201
opts := zap.Options{
173202
Development: true,
174203
}
@@ -210,7 +239,11 @@ func (r *Runner) Run(ctx context.Context) error {
210239
return err
211240
}
212241
verifyMetricMapping(*mapping, setupLog)
213-
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{MetricMapping: mapping}, *refreshMetricsInterval)
242+
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.PodMetricsClientImpl{
243+
MetricMapping: mapping,
244+
ModelServerMetricsPort: int32(*modelServerMetricsPort),
245+
ModelServerMetricsPath: *modelServerMetricsPath,
246+
}, *refreshMetricsInterval)
214247

215248
datastore := datastore.NewDatastore(ctx, pmf)
216249

pkg/epp/backend/metrics/fake.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ type FakePodMetricsClient struct {
5656
Res map[types.NamespacedName]*MetricsState
5757
}
5858

59-
func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
59+
func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, _ int32) (*MetricsState, error) {
6060
f.errMu.RLock()
6161
err, ok := f.Err[pod.NamespacedName]
6262
f.errMu.RUnlock()

pkg/epp/backend/metrics/metrics.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,14 @@ const (
3737
)
3838

3939
type PodMetricsClientImpl struct {
40-
MetricMapping *MetricMapping
40+
MetricMapping *MetricMapping
41+
ModelServerMetricsPort int32
42+
ModelServerMetricsPath string
4143
}
4244

4345
// FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.
4446
func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
45-
// Currently the metrics endpoint is hard-coded, which works with vLLM.
46-
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16): Consume this from InferencePool config.
47-
url := "http://" + pod.Address + ":" + strconv.Itoa(int(port)) + "/metrics"
48-
47+
url := p.getMetricEndpoint(pod, port)
4948
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
5049
if err != nil {
5150
return nil, fmt.Errorf("failed to create request: %v", err)
@@ -70,6 +69,13 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Po
7069
return p.promToPodMetrics(metricFamilies, existing)
7170
}
7271

72+
func (p *PodMetricsClientImpl) getMetricEndpoint(pod *backend.Pod, targetPortNumber int32) string {
73+
if p.ModelServerMetricsPort == 0 {
74+
p.ModelServerMetricsPort = targetPortNumber
75+
}
76+
return fmt.Sprintf("http://%s:%d%s", pod.Address, p.ModelServerMetricsPort, p.ModelServerMetricsPath)
77+
}
78+
7379
// promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.
7480
func (p *PodMetricsClientImpl) promToPodMetrics(
7581
metricFamilies map[string]*dto.MetricFamily,

pkg/epp/backend/metrics/metrics_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -495,9 +495,9 @@ func TestFetchMetrics(t *testing.T) {
495495
},
496496
}
497497
existing := &MetricsState{}
498-
p := &PodMetricsClientImpl{} // No MetricMapping needed for this basic test
498+
p := &PodMetricsClientImpl{ModelServerMetricsPort: 9999, ModelServerMetricsPath: "/metrics"} // No MetricMapping needed for this basic test
499499

500-
_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use.
500+
_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use
501501
if err == nil {
502502
t.Errorf("FetchMetrics() expected error, got nil")
503503
}

0 commit comments

Comments
 (0)