diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 2842704bb..3f0c2e1e0 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -36,6 +36,7 @@ import ( healthPb "google.golang.org/grpc/health/grpc_health_v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -74,17 +75,18 @@ const ( ) var ( - grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy") - grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes") - metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port") - enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.") - poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") - poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") - poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.") - logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") - secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") - healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking") - certPath = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+ + grpcPort = flag.Int("grpc-port", runserver.DefaultGrpcPort, "The gRPC port used for communicating with Envoy proxy") + grpcHealthPort = flag.Int("grpc-health-port", runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes") + metricsPort = flag.Int("metrics-port", runserver.DefaultMetricsPort, "The metrics port") + metricsEndpointAuth = flag.Bool("metrics-endpoint-auth", true, "Enables authentication and authorization of the metrics endpoint") + enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.") + poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.") + poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.") + poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.") + logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") + secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") + healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking") + certPath = flag.String("cert-path", runserver.DefaultCertPath, "The path to the certificate for secure serving. The certificate and private key files "+ "are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+ "then a self-signed certificate is used.") // metric flags @@ -184,8 +186,14 @@ func (r *Runner) Run(ctx context.Context) error { // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.19.1/pkg/metrics/server // - https://book.kubebuilder.io/reference/metrics.html metricsServerOptions := metricsserver.Options{ - BindAddress: fmt.Sprintf(":%d", *metricsPort), - FilterProvider: filters.WithAuthenticationAndAuthorization, + BindAddress: fmt.Sprintf(":%d", *metricsPort), + FilterProvider: func() func(c *rest.Config, httpClient *http.Client) (metricsserver.Filter, error) { + if *metricsEndpointAuth { + return filters.WithAuthenticationAndAuthorization + } + + return nil + }(), } // Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 41fee834d..2e2d18903 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -137,14 +137,16 @@ inferenceExtension: monitoring: interval: "10s" prometheus: - enabled: true - secret: - name: inference-gateway-sa-metrics-reader-secret + enabled: false + auth: + enabled: true + secretName: inference-gateway-sa-metrics-reader-secret + extraLabels: {} ``` **Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster. -For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection. +For GKE environments, you need to set `provider.name` to `gke` firstly. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection. If you are using a GKE Autopilot cluster, you also need to set `provider.gke.autopilot` to `true`. @@ -186,7 +188,6 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | | `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | | `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | -| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | | `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | | `provider.name` | Name of the Inference Gateway implementation being used. Possible values: [`none`, `gke`, or `istio`]. Defaults to `none`. | | `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. | diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml index f012c2e47..c690539e3 100644 --- a/config/charts/inferencepool/templates/epp-deployment.yaml +++ b/config/charts/inferencepool/templates/epp-deployment.yaml @@ -62,6 +62,9 @@ spec: - "--{{ .name }}" - "{{ .value }}" {{- end }} + {{- if not .Values.inferenceExtension.monitoring.prometheus.enabled }} + - --metrics-endpoint-auth=false + {{- end }} ports: - name: grpc containerPort: 9002 diff --git a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml index df54b3475..16d935f96 100644 --- a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml +++ b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml @@ -1,8 +1,8 @@ -{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }} +{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled (ne (lower .Values.provider.name) "gke") }} apiVersion: v1 kind: Secret metadata: - name: {{ .Values.inferenceExtension.monitoring.secret.name }} + name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }} namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} diff --git a/config/charts/inferencepool/templates/epp-servicemonitor.yaml b/config/charts/inferencepool/templates/epp-servicemonitor.yaml index e4788ba83..220be76dc 100644 --- a/config/charts/inferencepool/templates/epp-servicemonitor.yaml +++ b/config/charts/inferencepool/templates/epp-servicemonitor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }} +{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled (ne (lower .Values.provider.name) "gke") }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -6,15 +6,20 @@ metadata: namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} + {{- with .Values.inferenceExtension.monitoring.prometheus.extraLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} spec: endpoints: - interval: {{ .Values.inferenceExtension.monitoring.interval }} port: "http-metrics" path: "/metrics" + {{- if .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} authorization: credentials: key: token - name: {{ .Values.inferenceExtension.monitoring.secret.name }} + name: {{ .Values.inferenceExtension.monitoring.prometheus.auth.secretName }} + {{- end }} jobLabel: {{ include "gateway-api-inference-extension.name" . }} namespaceSelector: matchNames: diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index 77855c35a..2ee2e13fc 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -40,7 +40,7 @@ spec: logging: enabled: true # log all requests by default --- -{{- if .Values.inferenceExtension.monitoring.gke.enabled }} +{{- if and .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.prometheus.auth.enabled }} {{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}} {{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}} {{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 91d6a48e6..2103a5be1 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -43,16 +43,15 @@ inferenceExtension: # Monitoring configuration for EPP monitoring: interval: "10s" - # Service account token secret for authentication - secret: - name: inference-gateway-sa-metrics-reader-secret - # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection prometheus: enabled: false - - gke: - enabled: false + auth: + enabled: true + # Service account token secret for authentication + secretName: inference-gateway-sa-metrics-reader-secret + # additional labels for the ServiceMonitor + extraLabels: {} inferencePool: targetPorts: