Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,7 @@ spec:
- prometheus
- influxdb
- datadog
- externalmetrics
- stackdriver
- cloudwatch
- newrelic
Expand Down
1 change: 1 addition & 0 deletions charts/flagger/crds/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,7 @@ spec:
- prometheus
- influxdb
- datadog
- externalmetrics
- stackdriver
- cloudwatch
- newrelic
Expand Down
8 changes: 8 additions & 0 deletions charts/flagger/templates/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,14 @@ rules:
- revisions
verbs:
- get
- apiGroups:
- external.metrics.k8s.io
resources:
- '*'
verbs:
- get
- watch
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
Expand Down
50 changes: 50 additions & 0 deletions docs/gitbook/usage/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,22 @@ Reference the template in the canary analysis:
interval: 1m
```

### Datadog Rate Limits

For bigger setups, you might run into rate limits on the Datadog API. To avoid this,
you can use the Datadog Cluster Agent to retrieve metrics in batches instead. It will then
expose these metrics as an external metrics server.

See [Datadog Documentation](https://docs.datadoghq.com/containers/guide/cluster_agent_autoscaling_metrics).

Once you have enabled Datadog's external metrics endpoint and `DatadogMetric` CRD (without
necessarily using `registerAPIService`), you can use Flagger's
[External Metrics Provider](#kubernetes-external-metrics) to query the metrics from there.

The server address is usually `datadog-cluster-agent-metrics-server` and exposed on port 8443.
ExternalMetrics will be named as `datadogmetric@<namespace>:<metricname>`, for example
`datadogmetric@istio-system:istio-mesh-request-count`.

## Amazon CloudWatch

You can create custom metric checks using the CloudWatch metrics provider.
Expand Down Expand Up @@ -781,3 +797,37 @@ Reference the template in the canary analysis:
max: 99
interval: 1m
```

## Kubernetes External Metrics

You can query an external metrics provider that implements the
[Kubernetes External Metrics API](https://kubernetes.io/docs/reference/external-api/external-metrics.v1beta1/).

By default, Flagger will use its bound Service Account for authentication. *Optionally* you can provide a Bearer token through a Secret (that must contain a field named `token`) :

```yaml
apiVersion: v1
kind: Secret
metadata:
name: external-metric-server-token
namespace: default
data:
token: your-access-token
```

External Metrics template example:

```yaml
apiVersion: flagger.app/v1beta1
kind: MetricTemplate
metadata:
name: my-external-metric
namespace: default
spec:
provider:
type: externalmetrics
address: https://external-metrics-server.default.svc.cluster.local:8443
secretRef: # Optional
name: external-metric-server-token
query: webapp-frontend/job-success-rate?labelSelector=env%3Dproduction
```
11 changes: 6 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ require (
google.golang.org/grpc v1.76.0
google.golang.org/protobuf v1.36.10
gopkg.in/h2non/gock.v1 v1.1.2
k8s.io/api v0.34.1
k8s.io/apimachinery v0.34.1
k8s.io/client-go v0.34.1
k8s.io/code-generator v0.34.1
gopkg.in/inf.v0 v0.9.1
k8s.io/api v0.34.2
k8s.io/apimachinery v0.34.2
k8s.io/client-go v0.34.2
k8s.io/code-generator v0.34.2
k8s.io/klog/v2 v2.130.1
k8s.io/metrics v0.34.2
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397
knative.dev/serving v0.46.6
)
Expand Down Expand Up @@ -96,7 +98,6 @@ require (
google.golang.org/genproto/googleapis/api v0.0.0-20250804133106-a7a43d27e69b // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20251002232023-7c0ddcbb5797 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/gengo/v2 v2.0.0-20250604051438-85fd79dbfd9f // indirect
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect
Expand Down
18 changes: 10 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -275,20 +275,22 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
k8s.io/api v0.34.1 h1:jC+153630BMdlFukegoEL8E/yT7aLyQkIVuwhmwDgJM=
k8s.io/api v0.34.1/go.mod h1:SB80FxFtXn5/gwzCoN6QCtPD7Vbu5w2n1S0J5gFfTYk=
k8s.io/apimachinery v0.34.1 h1:dTlxFls/eikpJxmAC7MVE8oOeP1zryV7iRyIjB0gky4=
k8s.io/apimachinery v0.34.1/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
k8s.io/client-go v0.34.1 h1:ZUPJKgXsnKwVwmKKdPfw4tB58+7/Ik3CrjOEhsiZ7mY=
k8s.io/client-go v0.34.1/go.mod h1:kA8v0FP+tk6sZA0yKLRG67LWjqufAoSHA2xVGKw9Of8=
k8s.io/code-generator v0.34.1 h1:WpphT26E+j7tEgIUfFr5WfbJrktCGzB3JoJH9149xYc=
k8s.io/code-generator v0.34.1/go.mod h1:DeWjekbDnJWRwpw3s0Jat87c+e0TgkxoR4ar608yqvg=
k8s.io/api v0.34.2 h1:fsSUNZhV+bnL6Aqrp6O7lMTy6o5x2C4XLjnh//8SLYY=
k8s.io/api v0.34.2/go.mod h1:MMBPaWlED2a8w4RSeanD76f7opUoypY8TFYkSM+3XHw=
k8s.io/apimachinery v0.34.2 h1:zQ12Uk3eMHPxrsbUJgNF8bTauTVR2WgqJsTmwTE/NW4=
k8s.io/apimachinery v0.34.2/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw=
k8s.io/client-go v0.34.2 h1:Co6XiknN+uUZqiddlfAjT68184/37PS4QAzYvQvDR8M=
k8s.io/client-go v0.34.2/go.mod h1:2VYDl1XXJsdcAxw7BenFslRQX28Dxz91U9MWKjX97fE=
k8s.io/code-generator v0.34.2 h1:9bG6jTxmsU3HXE5BNYJTC8AZ1D6hVVfkm8yYSkdkGY0=
k8s.io/code-generator v0.34.2/go.mod h1:dnDDEd6S/z4uZ+PG1aE58ySCi/lR4+qT3a4DddE4/2I=
k8s.io/gengo/v2 v2.0.0-20250604051438-85fd79dbfd9f h1:SLb+kxmzfA87x4E4brQzB33VBbT2+x7Zq9ROIHmGn9Q=
k8s.io/gengo/v2 v2.0.0-20250604051438-85fd79dbfd9f/go.mod h1:EJykeLsmFC60UQbYJezXkEsG2FLrt0GPNkU5iK5GWxU=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b h1:MloQ9/bdJyIu9lb1PzujOPolHyvO06MXG5TUIj2mNAA=
k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
k8s.io/metrics v0.34.2 h1:zao91FNDVPRGIiHLO2vqqe21zZVPien1goyzn0hsz90=
k8s.io/metrics v0.34.2/go.mod h1:Ydulln+8uZZctUM8yrUQX4rfq/Ay6UzsuXf24QJ37Vc=
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
knative.dev/networking v0.0.0-20250902160145-7dad473f6351 h1:Gv/UqbN0AK+ORoT5e2Kg+3+uMW/y9CCdhpXKxYaVV6k=
Expand Down
1 change: 1 addition & 0 deletions kustomize/base/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,7 @@ spec:
- prometheus
- influxdb
- datadog
- externalmetrics
- stackdriver
- cloudwatch
- newrelic
Expand Down
162 changes: 162 additions & 0 deletions pkg/metrics/providers/externalmetrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
Copyright 2020 The Flux authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package providers

import (
"fmt"
"net/url"
"strings"
"time"

flaggerv1 "github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/rest"
externalmetrics_client "k8s.io/metrics/pkg/client/external_metrics"
)

// ExternalMetricsProvider fetches metrics from an ExternalMetricsProvider.
type ExternalMetricsProvider struct {
timeout time.Duration
client externalmetrics_client.NamespacedMetricsGetter
}

// NewExternalMetricsProvider takes a canary spec, a provider spec, and
// returns a client ready to execute queries against the Service.
func NewExternalMetricsProvider(
provider flaggerv1.MetricTemplateProvider,
credentials map[string][]byte) (*ExternalMetricsProvider, error) {
return newExternalMetricsProviderWithBuilder(
provider, credentials, rest.InClusterConfig,
)
}

// newExternalMetricsProviderWithBuilder is like NewExternalMetricsProvider but
// accepts a rest.Config builder function. Used for testing as InClusterConfig is hard to mock
func newExternalMetricsProviderWithBuilder(
provider flaggerv1.MetricTemplateProvider,
credentials map[string][]byte,
configBuilder func() (*rest.Config, error),
) (*ExternalMetricsProvider, error) {
restConfig, err := configBuilder()
if err != nil || restConfig == nil {
return nil, fmt.Errorf("Not in a kubernetes cluster: %w", err)
}

// Handling overrides from MetricTemplateProvider
if provider.Address != "" {
restConfig.Host = provider.Address
}
restConfig.TLSClientConfig = rest.TLSClientConfig{
Insecure: provider.InsecureSkipVerify,
}
if tokenBytes, ok := credentials["token"]; ok {
restConfig.BearerToken = string(tokenBytes)
}
// TODO: handle user name/password auth if needed

client, err := externalmetrics_client.NewForConfig(restConfig)
if err != nil {
return nil, fmt.Errorf("error creating external metric client: %w", err)
}

return &ExternalMetricsProvider{
timeout: 5 * time.Second,
client: client,
}, nil
}

// RunQuery retrieves the ExternalMetricValue from the External Metrics API
// at the ExternalMetricsProvider's Address, using the provided query string,
// and returns the *first* result as a float64.
func (p *ExternalMetricsProvider) RunQuery(query string) (float64, error) {
// The Provider interface only allows a plain string query so decode it
namespace, metricName, selector, err := parseExternalMetricsQuery(query)
if err != nil {
return 0, fmt.Errorf("error parsing metric query: %w", err)
}

nm := p.client.NamespacedMetrics(namespace)
metricsList, err := nm.List(metricName, selector)
if err != nil {
return 0, fmt.Errorf("error querying external metrics API: %w", err)
}

if len(metricsList.Items) < 1 {
return 0, fmt.Errorf("no external metrics found: %w", ErrNoValuesFound)
}

vs := metricsList.Items[0].Value.AsApproximateFloat64()

return vs, nil
}

// IsOnline tests that the External Metrics API is reachable by looking for dummy metrics.
// If we don't get a network error, we assume the service is online.
func (p *ExternalMetricsProvider) IsOnline() (bool, error) {
nm := p.client.NamespacedMetrics("kube-system")
_, err := nm.List("dummy-metric", labels.Everything())

if err != nil {
return false, fmt.Errorf("external metrics service unavailable: %w", err)
}
return true, nil
}

// parseExternalMetricsQuery parses a query string in the format:
// <namespace>/<metricName>?labelSelector=<urlencoded label selectors>
// where only the metricName is required.
// and returns the namespace, metricName, and labelSelector separately.
func parseExternalMetricsQuery(query string) (namespace string, metricName string, labelSelector labels.Selector, err error) {
u, err := url.Parse("dummy:///" + query)
if err != nil {
return "", "", labels.Everything(), fmt.Errorf("malformed query string, expected <namespace>/<metricName>?labelSelector=<urlencoded label selectors>, got %s", query)
}
path := strings.TrimPrefix(u.Path, "/")
parts := strings.Split(path, "/")
if len(parts) > 2 {
return "", "", labels.Everything(), fmt.Errorf("malformed query string, too many slashes, expected <namespace>/<metricName>?labelSelector=<urlencoded label selectors>, got %s", query)
}

namespace = "default"
switch len(parts) {
case 1:
// Format: "metric"
metricName = parts[0]
case 2:
// Format: "namespace/metric" or "/metric"
if parts[0] != "" {
namespace = parts[0]
}
metricName = parts[1]
}
if metricName == "" {
return "", "", labels.Everything(), fmt.Errorf("metric name cannot be empty")
}

qp := u.Query()
rawSelector := qp.Get("labelSelector")
if rawSelector == "" {
labelSelector = labels.Everything()
} else {
labelSelector, err = labels.Parse(rawSelector)
if err != nil {
return "", "", labels.Everything(), fmt.Errorf("error parsing label selector from string %s: %w", rawSelector, err)
}
}

return namespace, metricName, labelSelector, nil
}
Loading