Skip to content

Commit 54d2534

Browse files
committed
feat: cluster metrics
Signed-off-by: Zbigniew Mandziejewicz <shaxbee@gmail.com>
1 parent 2cfb119 commit 54d2534

File tree

12 files changed

+571
-27
lines changed

12 files changed

+571
-27
lines changed

cmd/etcd-operator/command.go

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ import (
44
"flag"
55
"fmt"
66
"os"
7-
"time"
87

8+
"github.com/agoda-com/etcd-operator/pkg/backup"
99
"github.com/spf13/cobra"
1010

1111
corev1 "k8s.io/api/core/v1"
@@ -25,15 +25,16 @@ const (
2525
)
2626

2727
type Config struct {
28-
Pod client.ObjectKey
29-
MetricsAddr string
30-
HealthProbeAddr string
31-
LeaderElection bool
32-
WatchNamespaces []string
33-
WatchSelector LabelSelector
34-
Image string
35-
ControllerImage string
36-
BackupRetention time.Duration
28+
Pod client.ObjectKey
29+
MetricsAddr string
30+
HealthProbeAddr string
31+
LeaderElection bool
32+
WatchNamespaces []string
33+
WatchSelector LabelSelector
34+
Image string
35+
ControllerImage string
36+
PriorityClassName string
37+
BackupEnv map[string]string
3738
}
3839

3940
func Command() *cobra.Command {
@@ -50,13 +51,15 @@ func Command() *cobra.Command {
5051
Namespace: os.Getenv("POD_NAMESPACE"),
5152
Name: os.Getenv("POD_NAME"),
5253
},
54+
BackupEnv: backup.LoadEnv(),
5355
}
5456
flags.StringSliceVar(&config.WatchNamespaces, "watch-namespaces", nil, "Namespaces to watch for resources.")
5557
flags.Var(&config.WatchSelector, "watch-selector", "Selector to watch for resources.")
58+
flags.StringVar(&config.PriorityClassName, "priority-class-name", "", "ETCD cluster pods priorityClassName")
59+
5660
flags.StringVar(&config.MetricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
5761
flags.StringVar(&config.HealthProbeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
5862
flags.BoolVar(&config.LeaderElection, "leader-elect", false, "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
59-
flags.DurationVar(&config.BackupRetention, "backup-retention", 7*24*time.Hour, "How long to retain a backup objects.")
6063

6164
stdFlags := flag.NewFlagSet("etcd-operator", flag.ContinueOnError)
6265
zapOptions := &zap.Options{}

cmd/etcd-operator/main.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import (
3939

4040
"github.com/agoda-com/etcd-operator/pkg/cluster"
4141
"github.com/agoda-com/etcd-operator/pkg/etcd"
42+
"github.com/agoda-com/etcd-operator/pkg/metrics"
4243
)
4344

4445
func main() {
@@ -97,14 +98,25 @@ func run(ctx context.Context, logger logr.Logger, kubeconfig *rest.Config, confi
9798
return fmt.Errorf("tls cache: %w", err)
9899
}
99100

100-
err = cluster.CreateControllerWithManager(mgr, tlsCache, cluster.Config{
101-
Image: config.Image,
102-
ControllerImage: config.ControllerImage,
101+
err = cluster.SetupWithManager(mgr, tlsCache, cluster.Config{
102+
Image: config.Image,
103+
ControllerImage: config.ControllerImage,
104+
PriorityClassName: config.PriorityClassName,
105+
BackupEnv: config.BackupEnv,
103106
})
104107
if err != nil {
105108
return fmt.Errorf("cluster controller: %w", err)
106109
}
107110

111+
meterProvider, err := SetupTelemetry(ctx)
112+
if err != nil {
113+
return fmt.Errorf("metrics provider: %w", err)
114+
}
115+
err = metrics.SetupWithManager(mgr, meterProvider)
116+
if err != nil {
117+
return fmt.Errorf("metrics controller: %w", err)
118+
}
119+
108120
err = mgr.AddHealthzCheck("ping", healthz.Ping)
109121
if err != nil {
110122
return err

cmd/etcd-operator/telemetry.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"os"
6+
"time"
7+
8+
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
9+
"go.opentelemetry.io/otel/metric"
10+
"go.opentelemetry.io/otel/metric/noop"
11+
metricsdk "go.opentelemetry.io/otel/sdk/metric"
12+
"sigs.k8s.io/controller-runtime/pkg/log"
13+
)
14+
15+
func SetupTelemetry(ctx context.Context) (metric.MeterProvider, error) {
16+
logger := log.FromContext(ctx).WithName("metrics")
17+
18+
endpoint := os.Getenv("OTEL_EXPORTER_OLTP_ENDPOINT")
19+
if endpoint != "" {
20+
exporter, err := otlpmetricgrpc.New(ctx, otlpmetricgrpc.WithEndpoint(endpoint))
21+
if err != nil {
22+
return nil, err
23+
}
24+
25+
provider := metricsdk.NewMeterProvider(
26+
metricsdk.WithReader(metricsdk.NewPeriodicReader(exporter)),
27+
)
28+
29+
logger.Info("enabled otlp grpc metrics", "endpoint", endpoint)
30+
31+
go func() {
32+
<-ctx.Done()
33+
34+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
35+
defer cancel()
36+
37+
err := provider.Shutdown(ctx)
38+
if err != nil {
39+
logger.Error(err, "shutdown metrics")
40+
}
41+
}()
42+
43+
return provider, nil
44+
}
45+
46+
// fallback on noop provider if endpoint is not configured
47+
return noop.NewMeterProvider(), nil
48+
}

cmd/go.mod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ require (
4343
github.com/aws/smithy-go v1.22.2 // indirect
4444
github.com/beorn7/perks v1.0.1 // indirect
4545
github.com/blang/semver/v4 v4.0.0 // indirect
46+
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
4647
github.com/cespare/xxhash/v2 v2.3.0 // indirect
4748
github.com/coreos/go-semver v0.3.1 // indirect
4849
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
@@ -68,6 +69,7 @@ require (
6869
github.com/google/go-cmp v0.7.0 // indirect
6970
github.com/google/gofuzz v1.2.0 // indirect
7071
github.com/google/uuid v1.6.0 // indirect
72+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 // indirect
7173
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
7274
github.com/imdario/mergo v0.3.16 // indirect
7375
github.com/inconshreveable/mousetrap v1.1.0 // indirect
@@ -97,9 +99,12 @@ require (
9799
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
98100
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 // indirect
99101
go.opentelemetry.io/otel v1.35.0 // indirect
102+
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0 // indirect
100103
go.opentelemetry.io/otel/metric v1.35.0 // indirect
101104
go.opentelemetry.io/otel/sdk v1.35.0 // indirect
105+
go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
102106
go.opentelemetry.io/otel/trace v1.35.0 // indirect
107+
go.opentelemetry.io/proto/otlp v1.5.0 // indirect
103108
go.uber.org/multierr v1.11.0 // indirect
104109
golang.org/x/crypto v0.36.0 // indirect
105110
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect

cmd/go.sum

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
4444
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
4545
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
4646
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
47+
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
48+
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
4749
github.com/cert-manager/cert-manager v1.15.0 h1:xVL8tzdQECMypoYQa9rv4DLjkn2pJXJLTqH4JUsxfko=
4850
github.com/cert-manager/cert-manager v1.15.0/go.mod h1:Vxq6yNKAbgQeMtzu5gqU8n0vXDiZcGTa5LDyCJRbmXE=
4951
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
@@ -116,6 +118,9 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
116118
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
117119
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
118120
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
121+
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
122+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA=
123+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M=
119124
github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
120125
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
121126
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
@@ -229,6 +234,8 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.5
229234
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0/go.mod h1:azvtTADFQJA8mX80jIH/akaE7h+dbm/sVuaHqN13w74=
230235
go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ=
231236
go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y=
237+
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0 h1:QcFwRrZLc82r8wODjvyCbP7Ifp3UANaBSmhDSFjnqSc=
238+
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0/go.mod h1:CXIWhUomyWBG/oY2/r/kLp6K/cmx9e/7DLpBuuGdLCA=
232239
go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M=
233240
go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE=
234241
go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY=
@@ -237,6 +244,8 @@ go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5J
237244
go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
238245
go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
239246
go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
247+
go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4=
248+
go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4=
240249
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
241250
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
242251
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=

go.mod

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ require (
1616
go.etcd.io/etcd/client/v3 v3.5.21
1717
go.etcd.io/etcd/etcdutl/v3 v3.5.21
1818
go.uber.org/zap v1.27.0
19-
golang.org/x/sync v0.12.0
20-
google.golang.org/grpc v1.71.0
19+
golang.org/x/sync v0.13.0
20+
google.golang.org/grpc v1.72.0
2121
gotest.tools/v3 v3.5.2
2222
k8s.io/api v0.30.1
2323
k8s.io/apimachinery v0.30.1
@@ -46,6 +46,7 @@ require (
4646
github.com/aws/smithy-go v1.22.2 // indirect
4747
github.com/beorn7/perks v1.0.1 // indirect
4848
github.com/blang/semver/v4 v4.0.0 // indirect
49+
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
4950
github.com/cespare/xxhash/v2 v2.3.0 // indirect
5051
github.com/coreos/go-semver v0.3.1 // indirect
5152
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
@@ -70,6 +71,7 @@ require (
7071
github.com/google/go-cmp v0.7.0 // indirect
7172
github.com/google/gofuzz v1.2.0 // indirect
7273
github.com/google/uuid v1.6.0 // indirect
74+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect
7375
github.com/imdario/mergo v0.3.16 // indirect
7476
github.com/inconshreveable/mousetrap v1.1.0 // indirect
7577
github.com/jonboulle/clockwork v0.2.2 // indirect
@@ -97,23 +99,26 @@ require (
9799
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
98100
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.51.0 // indirect
99101
go.opentelemetry.io/otel v1.35.0 // indirect
102+
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0 // indirect
100103
go.opentelemetry.io/otel/metric v1.35.0 // indirect
104+
go.opentelemetry.io/otel/sdk v1.35.0 // indirect
101105
go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
102106
go.opentelemetry.io/otel/trace v1.35.0 // indirect
107+
go.opentelemetry.io/proto/otlp v1.6.0 // indirect
103108
go.uber.org/multierr v1.11.0 // indirect
104-
golang.org/x/crypto v0.36.0 // indirect
109+
golang.org/x/crypto v0.37.0 // indirect
105110
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
106-
golang.org/x/net v0.38.0 // indirect
107-
golang.org/x/oauth2 v0.26.0 // indirect
108-
golang.org/x/sys v0.31.0 // indirect
109-
golang.org/x/term v0.30.0 // indirect
110-
golang.org/x/text v0.23.0 // indirect
111+
golang.org/x/net v0.39.0 // indirect
112+
golang.org/x/oauth2 v0.27.0 // indirect
113+
golang.org/x/sys v0.32.0 // indirect
114+
golang.org/x/term v0.31.0 // indirect
115+
golang.org/x/text v0.24.0 // indirect
111116
golang.org/x/time v0.5.0 // indirect
112117
golang.org/x/tools v0.30.0 // indirect
113118
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
114-
google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a // indirect
115-
google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a // indirect
116-
google.golang.org/protobuf v1.36.5 // indirect
119+
google.golang.org/genproto/googleapis/api v0.0.0-20250428153025-10db94c68c34 // indirect
120+
google.golang.org/genproto/googleapis/rpc v0.0.0-20250428153025-10db94c68c34 // indirect
121+
google.golang.org/protobuf v1.36.6 // indirect
117122
gopkg.in/inf.v0 v0.9.1 // indirect
118123
gopkg.in/yaml.v2 v2.4.0 // indirect
119124
gopkg.in/yaml.v3 v3.0.1 // indirect

0 commit comments

Comments
 (0)