Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 6 additions & 28 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@ jobs:
run: make lint
- name: Integration test
run: make integration-test coverage
- name: Archive coverage report
uses: actions/upload-artifact@v4
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v5
with:
name: coverage
path: ${{ env.CODECOV_FILE }}

token: ${{ secrets.CODECOV_TOKEN }}
files: ${{ env.CODECOV_FILE }}
e2e:
name: E2E Test
if: github.actor!= 'dependabot-preview[bot]'
if: false
# if: github.actor!= 'dependabot-preview[bot]'
needs:
- verify
- image-build
Expand Down Expand Up @@ -108,25 +108,3 @@ jobs:
with:
name: coverage-e2e
path: ${{ env.CODECOV_FILE }}

coverage:
name: Coverage Report
if: github.actor!= 'dependabot-preview[bot]'
needs:
- verify
- e2e
runs-on: ubuntu-latest
steps:
- name: Download coverage report
uses: actions/download-artifact@v4
with:
name: coverage
- name: Download E2E coverage report
uses: actions/download-artifact@v4
with:
name: coverage-e2e
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v5
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: coverage.xml,coverage.e2e.xml
25 changes: 14 additions & 11 deletions cmd/etcd-operator/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"flag"
"fmt"
"os"
"time"

"github.com/agoda-com/etcd-operator/pkg/backup"
"github.com/spf13/cobra"

corev1 "k8s.io/api/core/v1"
Expand All @@ -25,15 +25,16 @@ const (
)

type Config struct {
Pod client.ObjectKey
MetricsAddr string
HealthProbeAddr string
LeaderElection bool
WatchNamespaces []string
WatchSelector LabelSelector
Image string
ControllerImage string
BackupRetention time.Duration
Pod client.ObjectKey
MetricsAddr string
HealthProbeAddr string
LeaderElection bool
WatchNamespaces []string
WatchSelector LabelSelector
Image string
ControllerImage string
PriorityClassName string
BackupEnv map[string]string
}

func Command() *cobra.Command {
Expand All @@ -50,13 +51,15 @@ func Command() *cobra.Command {
Namespace: os.Getenv("POD_NAMESPACE"),
Name: os.Getenv("POD_NAME"),
},
BackupEnv: backup.LoadEnv(),
}
flags.StringSliceVar(&config.WatchNamespaces, "watch-namespaces", nil, "Namespaces to watch for resources.")
flags.Var(&config.WatchSelector, "watch-selector", "Selector to watch for resources.")
flags.StringVar(&config.PriorityClassName, "priority-class-name", "", "ETCD cluster pods priorityClassName")

flags.StringVar(&config.MetricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flags.StringVar(&config.HealthProbeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flags.BoolVar(&config.LeaderElection, "leader-elect", false, "Enable leader election for controller manager. Enabling this will ensure there is only one active controller manager.")
flags.DurationVar(&config.BackupRetention, "backup-retention", 7*24*time.Hour, "How long to retain a backup objects.")

stdFlags := flag.NewFlagSet("etcd-operator", flag.ContinueOnError)
zapOptions := &zap.Options{}
Expand Down
18 changes: 15 additions & 3 deletions cmd/etcd-operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (

"github.com/agoda-com/etcd-operator/pkg/cluster"
"github.com/agoda-com/etcd-operator/pkg/etcd"
"github.com/agoda-com/etcd-operator/pkg/metrics"
)

func main() {
Expand Down Expand Up @@ -97,14 +98,25 @@ func run(ctx context.Context, logger logr.Logger, kubeconfig *rest.Config, confi
return fmt.Errorf("tls cache: %w", err)
}

err = cluster.CreateControllerWithManager(mgr, tlsCache, cluster.Config{
Image: config.Image,
ControllerImage: config.ControllerImage,
err = cluster.SetupWithManager(mgr, tlsCache, cluster.Config{
Image: config.Image,
ControllerImage: config.ControllerImage,
PriorityClassName: config.PriorityClassName,
BackupEnv: config.BackupEnv,
})
if err != nil {
return fmt.Errorf("cluster controller: %w", err)
}

meterProvider, err := SetupTelemetry(ctx)
if err != nil {
return fmt.Errorf("metrics provider: %w", err)
}
err = metrics.SetupWithManager(mgr, meterProvider)
if err != nil {
return fmt.Errorf("metrics controller: %w", err)
}

err = mgr.AddHealthzCheck("ping", healthz.Ping)
if err != nil {
return err
Expand Down
48 changes: 48 additions & 0 deletions cmd/etcd-operator/telemetry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package main

import (
"context"
"os"
"time"

"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
metricsdk "go.opentelemetry.io/otel/sdk/metric"
"sigs.k8s.io/controller-runtime/pkg/log"
)

func SetupTelemetry(ctx context.Context) (metric.MeterProvider, error) {
logger := log.FromContext(ctx).WithName("metrics")

endpoint := os.Getenv("OTEL_EXPORTER_OLTP_ENDPOINT")
if endpoint != "" {
exporter, err := otlpmetricgrpc.New(ctx, otlpmetricgrpc.WithEndpoint(endpoint))
if err != nil {
return nil, err
}

provider := metricsdk.NewMeterProvider(
metricsdk.WithReader(metricsdk.NewPeriodicReader(exporter)),
)

logger.Info("enabled otlp grpc metrics", "endpoint", endpoint)

go func() {
<-ctx.Done()

ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()

err := provider.Shutdown(ctx)
if err != nil {
logger.Error(err, "shutdown metrics")
}
}()

return provider, nil
}

// fallback on noop provider if endpoint is not configured
return noop.NewMeterProvider(), nil
}
5 changes: 5 additions & 0 deletions cmd/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ require (
github.com/aws/smithy-go v1.22.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
Expand All @@ -68,6 +69,7 @@ require (
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand Down Expand Up @@ -97,9 +99,12 @@ require (
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 // indirect
go.opentelemetry.io/otel v1.35.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0 // indirect
go.opentelemetry.io/otel/metric v1.35.0 // indirect
go.opentelemetry.io/otel/sdk v1.35.0 // indirect
go.opentelemetry.io/otel/sdk/metric v1.35.0 // indirect
go.opentelemetry.io/otel/trace v1.35.0 // indirect
go.opentelemetry.io/proto/otlp v1.5.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/crypto v0.36.0 // indirect
golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect
Expand Down
9 changes: 9 additions & 0 deletions cmd/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cert-manager/cert-manager v1.15.0 h1:xVL8tzdQECMypoYQa9rv4DLjkn2pJXJLTqH4JUsxfko=
github.com/cert-manager/cert-manager v1.15.0/go.mod h1:Vxq6yNKAbgQeMtzu5gqU8n0vXDiZcGTa5LDyCJRbmXE=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
Expand Down Expand Up @@ -116,6 +118,9 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M=
github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
Expand Down Expand Up @@ -229,6 +234,8 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.5
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0/go.mod h1:azvtTADFQJA8mX80jIH/akaE7h+dbm/sVuaHqN13w74=
go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ=
go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0 h1:QcFwRrZLc82r8wODjvyCbP7Ifp3UANaBSmhDSFjnqSc=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0/go.mod h1:CXIWhUomyWBG/oY2/r/kLp6K/cmx9e/7DLpBuuGdLCA=
go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M=
go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE=
go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY=
Expand All @@ -237,6 +244,8 @@ go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5J
go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w=
go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs=
go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4=
go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
Expand Down
14 changes: 7 additions & 7 deletions e2e/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ func TestCluster(t *testing.T) {
})

t.Run("scale", func(t *testing.T) {
scale(t, kcl, cluster, 5)
poll(t, kcl, cluster, 2*time.Minute, available)
Scale(t, kcl, cluster, 5)
Poll(t, kcl, cluster, 2*time.Minute, Available)

t.Logf("cluster %q scaled up to 5 members", cluster.Name)

scale(t, kcl, cluster, 3)
poll(t, kcl, cluster, time.Minute, available)
Scale(t, kcl, cluster, 3)
Poll(t, kcl, cluster, time.Minute, Available)

t.Logf("cluster %q scaled down to 3 members", cluster.Name)
})
Expand Down Expand Up @@ -60,8 +60,8 @@ func TestCluster(t *testing.T) {
}
t.Log("evict pod", key)

poll(t, kcl, cluster, 2*time.Minute, func(cluster *apiv1.EtcdCluster) bool {
if !available(cluster) {
Poll(t, kcl, cluster, 2*time.Minute, func(cluster *apiv1.EtcdCluster) bool {
if !Available(cluster) {
return false
}

Expand All @@ -85,7 +85,7 @@ func TestCluster(t *testing.T) {
}

version = strings.TrimPrefix(version, "v")
poll(t, kcl, cluster, 5*time.Minute, func(cluster *apiv1.EtcdCluster) bool {
Poll(t, kcl, cluster, 5*time.Minute, func(cluster *apiv1.EtcdCluster) bool {
if !conditions.StatusTrue(cluster.Status.Conditions, apiv1.ClusterAvailable) {
return false
}
Expand Down
15 changes: 10 additions & 5 deletions e2e/kube_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
autoscalingv1 "k8s.io/api/autoscaling/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -147,14 +148,14 @@ func createCluster(t testing.TB, kcl client.Client, timeout time.Duration, spec
key := client.ObjectKeyFromObject(cluster)
t.Logf("cluster %q created", key)

poll(t, kcl, cluster, timeout, available)
Poll(t, kcl, cluster, timeout, Available)

t.Logf("cluster %q available", key)

return cluster
}

func poll[T client.Object](t testing.TB, kcl client.Client, obj T, timeout time.Duration, f func(obj T) bool) {
func Poll[T client.Object](t testing.TB, kcl client.Client, obj T, timeout time.Duration, f func(obj T) bool) {
t.Helper()

ctx, cancel := context.WithTimeout(t.Context(), timeout)
Expand All @@ -173,11 +174,11 @@ func poll[T client.Object](t testing.TB, kcl client.Client, obj T, timeout time.
}
}

func available(cluster *apiv1.EtcdCluster) bool {
func Available(cluster *apiv1.EtcdCluster) bool {
return cluster.Status.AvailableReplicas == cluster.Spec.Replicas
}

func scale(t testing.TB, kcl client.Client, obj client.Object, replicas int32) {
func Scale(t testing.TB, kcl client.Client, obj client.Object, replicas int32) {
scale := &autoscalingv1.Scale{}
err := kcl.SubResource("scale").Get(t.Context(), obj, scale)
if err != nil {
Expand Down Expand Up @@ -226,7 +227,11 @@ func triggerCronJob(t testing.TB, kcl client.Client, key client.ObjectKey, timeo
t.Logf("created job %q", client.ObjectKeyFromObject(job))

err = wait.PollUntilContextCancel(ctx, 500*time.Millisecond, true, func(ctx context.Context) (done bool, err error) {
if err := kcl.Get(ctx, client.ObjectKeyFromObject(job), job); err != nil {
err = kcl.Get(ctx, client.ObjectKeyFromObject(job), job)
switch {
case apierrors.IsTooManyRequests(err):
return false, err
case err != nil:
return true, fmt.Errorf("get backup job: %w", err)
}

Expand Down
Loading