Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
healthPb "google.golang.org/grpc/health/grpc_health_v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
Expand Down Expand Up @@ -74,17 +75,18 @@ const (
)

var (
grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy")
grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes")
metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port")
enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.")
poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.")
poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.")
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking")
certPath = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+
grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy")
grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes")
metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port")
metricsEndpointAuth = flag.Bool("metrics-endpoint-auth", true, "Enables authentication and authorization of the metrics endpoint")
enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.")
poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.")
poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.")
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking")
certPath = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
"then a self-signed certificate is used.")
// metric flags
Expand Down Expand Up @@ -184,8 +186,14 @@ func (r *Runner) Run(ctx context.Context) error {
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/metrics/server
// - https://book.kubebuilder.io/reference/metrics.html
metricsServerOptions := metricsserver.Options{
BindAddress: fmt.Sprintf(":%d", *metricsPort),
FilterProvider: filters.WithAuthenticationAndAuthorization,
BindAddress: fmt.Sprintf(":%d", *metricsPort),
FilterProvider: func() func(c *rest.Config, httpClient *http.Client) (metricsserver.Filter, error) {
if *metricsEndpointAuth {
return filters.WithAuthenticationAndAuthorization
}

return nil
}(),
}

// Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default
Expand Down
3 changes: 3 additions & 0 deletions config/charts/inferencepool/templates/epp-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ spec:
- "--{{ .name }}"
- "{{ .value }}"
{{- end }}
{{- if not .Values.inferenceExtension.monitoring.prometheus.enabled }}
- --metrics-endpoint-auth=false
{{- end }}
ports:
- name: grpc
containerPort: 9002
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled (ne (lower .Values.provider.name) "gke") }}
apiVersion: v1
kind: Secret
metadata:
name: {{ .Values.inferenceExtension.monitoring.secret.name }}
name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
Expand Down
11 changes: 8 additions & 3 deletions config/charts/inferencepool/templates/epp-servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled (ne (lower .Values.provider.name) "gke") }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ include "gateway-api-inference-extension.name" . }}-monitor
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
{{- with .Values.inferenceExtension.monitoring.prometheus.extraLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
endpoints:
- interval: {{ .Values.inferenceExtension.monitoring.interval }}
- interval: {{ .Values.inferenceExtension.monitoring.prometheus.interval }}
port: "http-metrics"
path: "/metrics"
{{- if .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
authorization:
credentials:
key: token
name: {{ .Values.inferenceExtension.monitoring.secret.name }}
name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }}
{{- end }}
jobLabel: {{ include "gateway-api-inference-extension.name" . }}
namespaceSelector:
matchNames:
Expand Down
4 changes: 2 additions & 2 deletions config/charts/inferencepool/templates/gke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ spec:
logging:
enabled: true # log all requests by default
---
{{- if .Values.inferenceExtension.monitoring.gke.enabled }}
{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }}
{{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}}
{{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}}
{{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}}
Expand Down Expand Up @@ -83,7 +83,7 @@ spec:
endpoints:
- port: metrics
scheme: http
interval: {{ .Values.inferenceExtension.monitoring.interval }}
interval: {{ .Values.inferenceExtension.monitoring.prometheus.interval }}
path: /metrics
authorization:
type: Bearer
Expand Down
15 changes: 7 additions & 8 deletions config/charts/inferencepool/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,16 @@ inferenceExtension:

# Monitoring configuration for EPP
monitoring:
interval: "10s"
# Service account token secret for authentication
secret:
name: inference-gateway-sa-metrics-reader-secret

# Prometheus ServiceMonitor will be created when enabled for EPP metrics collection
prometheus:
enabled: false

gke:
enabled: false
interval: "10s"
auth:
enabled: true
# Service account token secret for authentication
secretName: inference-gateway-sa-metrics-reader-secret
# additional labels for the ServiceMonitor
extraLabels: {}

inferencePool:
targetPorts:
Expand Down