Skip to content

Commit 28aa195

Browse files
authored
Adding pprof endpoints to metrics port (#1069)
1 parent 8dc40a5 commit 28aa195

File tree

3 files changed

+42
-4
lines changed

3 files changed

+42
-4
lines changed

cmd/epp/runner/runner.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"flag"
2222
"fmt"
23+
"net/http/pprof"
2324

2425
"github.com/go-logr/logr"
2526
"github.com/prometheus/client_golang/prometheus"
@@ -215,6 +216,11 @@ func (r *Runner) Run(ctx context.Context) error {
215216
setupLog.Error(err, "Failed to create controller manager")
216217
return err
217218
}
219+
err = setupPprofHandlers(mgr)
220+
if err != nil {
221+
setupLog.Error(err, "Failed to setup pprof handlers")
222+
return err
223+
}
218224

219225
err = r.parseConfiguration()
220226
if err != nil {
@@ -409,3 +415,24 @@ func verifyMetricMapping(mapping backendmetrics.MetricMapping, logger logr.Logge
409415
logger.Info("Not scraping metric: LoraRequestInfo")
410416
}
411417
}
418+
419+
// setupPprofHandlers only implements the pre-defined profiles:
420+
// https://cs.opensource.google/go/go/+/refs/tags/go1.24.4:src/runtime/pprof/pprof.go;l=108
421+
func setupPprofHandlers(mgr ctrl.Manager) error {
422+
var err error
423+
profiles := []string{
424+
"heap",
425+
"goroutine",
426+
"allocs",
427+
"threadcreate",
428+
"block",
429+
"mutex",
430+
}
431+
for _, p := range profiles {
432+
err = mgr.AddMetricsServerExtraHandler("/debug/pprof/"+p, pprof.Handler(p))
433+
if err != nil {
434+
return err
435+
}
436+
}
437+
return nil
438+
}

mkdocs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ nav:
6868
- Rollout:
6969
- Adapter Rollout: guides/adapter-rollout.md
7070
- InferencePool Rollout: guides/inferencepool-rollout.md
71-
- Metrics: guides/metrics.md
71+
- Metrics and Observability: guides/metrics-and-observability.md
7272
- Configuration Guide:
7373
- Prefix Cache Aware Plugin: guides/epp-configuration/prefix-aware.md
7474
- Implementer's Guide: guides/implementers.md

site-src/guides/metrics.md renamed to site-src/guides/metrics-and-observability.md

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# Metrics
1+
# Metrics & Observability
22

3-
This guide describes the current state of exposed metrics and how to scrape them.
3+
This guide describes the current state of exposed metrics and how to scrape them, as well as accessing pprof profiles.
44

55
## Requirements
66

@@ -53,7 +53,7 @@ This guide describes the current state of exposed metrics and how to scrape them
5353
|:---------------------------|:-----------------|:-------------------------------------------------|:------------------------------------------|:------------|
5454
| lora_syncer_adapter_status | Gauge | Status of LoRA adapters (1=loaded, 0=not_loaded) | `adapter_name`=<adapter-id> | ALPHA |
5555

56-
## Scrape Metrics
56+
## Scrape Metrics & Pprof profiles
5757

5858
The metrics endpoints are exposed on different ports by default:
5959

@@ -73,6 +73,7 @@ metadata:
7373
rules:
7474
- nonResourceURLs:
7575
- /metrics
76+
- /debug/pprof/*
7677
verbs:
7778
- get
7879
---
@@ -116,6 +117,16 @@ kubectl -n default port-forward inference-gateway-ext-proc-pod-name 9090
116117
curl -H "Authorization: Bearer $TOKEN" localhost:9090/metrics
117118
```
118119

120+
### Pprof profiles
121+
122+
Currently only the [predefined profiles](https://pkg.go.dev/runtime/pprof#Profile) are supported, CPU profiling will require code changes. Assuming the EPP has been port-forwarded as in the above example, to get the PGN display of the `heap` profile simply run:
123+
124+
```
125+
PROFILE_NAME=heap
126+
curl -H "Authorization: Bearer $TOKEN" localhost:9090/debug/pprof/$PROFILE_NAME -o profile.out
127+
go tool pprof -png profile.out
128+
```
129+
119130
## Prometheus Alerts
120131

121132
The section instructs how to configure prometheus alerts using collected metrics.

0 commit comments

Comments
 (0)