Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
healthPb "google.golang.org/grpc/health/grpc_health_v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
Expand Down Expand Up @@ -74,17 +75,18 @@ const (
)

var (
grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy")
grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes")
metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port")
enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.")
poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.")
poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.")
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking")
certPath = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+
grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy")
grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes")
metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port")
metricsEndpointAuth = flag.Bool("metrics-endpoint-auth", true, "Enables authentication and authorization of the metrics endpoint")
enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.")
poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.")
poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.")
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking")
certPath = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
"then a self-signed certificate is used.")
// metric flags
Expand Down Expand Up @@ -184,8 +186,14 @@ func (r *Runner) Run(ctx context.Context) error {
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/metrics/server
// - https://book.kubebuilder.io/reference/metrics.html
metricsServerOptions := metricsserver.Options{
BindAddress: fmt.Sprintf(":%d", *metricsPort),
FilterProvider: filters.WithAuthenticationAndAuthorization,
BindAddress: fmt.Sprintf(":%d", *metricsPort),
FilterProvider: func() func(c *rest.Config, httpClient *http.Client) (metricsserver.Filter, error) {
if *metricsEndpointAuth {
return filters.WithAuthenticationAndAuthorization
}

return nil
}(),
}

// Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default
Expand Down
11 changes: 6 additions & 5 deletions config/charts/inferencepool/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,16 @@ inferenceExtension:
monitoring:
interval: "10s"
prometheus:
enabled: true
secret:
name: inference-gateway-sa-metrics-reader-secret
enabled: false
auth:
enabled: true
secretName: inference-gateway-sa-metrics-reader-secret
extraLabels: {}
```

**Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster.

For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection.
For GKE environments, you need to set `provider.name` to `gke` firstly. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection.

If you are using a GKE Autopilot cluster, you also need to set `provider.gke.autopilot` to `true`.

Expand Down Expand Up @@ -186,7 +188,6 @@ The following table list the configurable parameters of the chart.
| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. |
| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. |
| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. |
| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. |
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. |
| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. |
Expand Down
3 changes: 3 additions & 0 deletions config/charts/inferencepool/templates/epp-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ spec:
- "--{{ .name }}"
- "{{ .value }}"
{{- end }}
{{- if not .Values.inferenceExtension.monitoring.prometheus.enabled }}
- --metrics-endpoint-auth=false
{{- end }}
ports:
- name: grpc
containerPort: 9002
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled (ne (lower .Values.provider.name) "gke") }}
apiVersion: v1
kind: Secret
metadata:
name: {{ .Values.inferenceExtension.monitoring.secret.name }}
name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
Expand Down
9 changes: 7 additions & 2 deletions config/charts/inferencepool/templates/epp-servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled (ne (lower .Values.provider.name) "gke") }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ include "gateway-api-inference-extension.name" . }}-monitor
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
{{- with .Values.inferenceExtension.monitoring.prometheus.extraLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
endpoints:
- interval: {{ .Values.inferenceExtension.monitoring.interval }}
port: "http-metrics"
path: "/metrics"
{{- if .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
authorization:
credentials:
key: token
name: {{ .Values.inferenceExtension.monitoring.secret.name }}
name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }}
{{- end }}
jobLabel: {{ include "gateway-api-inference-extension.name" . }}
namespaceSelector:
matchNames:
Expand Down
2 changes: 1 addition & 1 deletion config/charts/inferencepool/templates/gke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ spec:
logging:
enabled: true # log all requests by default
---
{{- if .Values.inferenceExtension.monitoring.gke.enabled }}
{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
{{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
{{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}
{{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}}
Expand Down
13 changes: 6 additions & 7 deletions config/charts/inferencepool/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,15 @@ inferenceExtension:
# Monitoring configuration for EPP
monitoring:
interval: "10s"
# Service account token secret for authentication
secret:
name: inference-gateway-sa-metrics-reader-secret

# Prometheus ServiceMonitor will be created when enabled for EPP metrics collection
prometheus:
enabled: false

gke:
enabled: false
auth:
enabled: true
# Service account token secret for authentication
secretName: inference-gateway-sa-metrics-reader-secret
# additional labels for the ServiceMonitor
extraLabels: {}

inferencePool:
targetPorts:
Expand Down