Skip to content

Commit a2c4db2

Browse files
committed
all metrics now use otel and fixed bug for managed metric sending multiple times
1 parent 3cc44c3 commit a2c4db2

File tree

5 files changed

+128
-64
lines changed

5 files changed

+128
-64
lines changed

internal/controller/federatedmanagedmetric_controller.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,19 @@ func (r *FederatedManagedMetricReconciler) Reconcile(ctx context.Context, req ct
148148
return ctrl.Result{RequeueAfter: RequeueAfterError}, errCli
149149
}
150150

151+
defer func() {
152+
if err := metricClient.Close(ctx); err != nil {
153+
l.Error(err, "Failed to close metric client during federated managed metric reconciliation", "metric", metric.Name)
154+
}
155+
}()
156+
151157
// should this be the group fo the gvr?
152158
metricClient.SetMeter("managed")
153159

154160
gaugeMetric, errGauge := metricClient.NewMetric(metric.Name)
155161
if errGauge != nil {
156-
l.Error(errCli, fmt.Sprintf("federated metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError))
157-
return ctrl.Result{RequeueAfter: RequeueAfterError}, errCli
162+
l.Error(errGauge, fmt.Sprintf("federated managed metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError))
163+
return ctrl.Result{RequeueAfter: RequeueAfterError}, errGauge
158164
}
159165

160166
for _, queryConfig := range queryConfigs {
@@ -169,7 +175,7 @@ func (r *FederatedManagedMetricReconciler) Reconcile(ctx context.Context, req ct
169175
_, errMon := orchestrator.Handler.Monitor(ctx)
170176

171177
if errMon != nil {
172-
l.Error(errMon, fmt.Sprintf("federated metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError))
178+
l.Error(errMon, fmt.Sprintf("federated managed metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError))
173179
return ctrl.Result{RequeueAfter: RequeueAfterError}, errMon
174180
}
175181

@@ -197,9 +203,14 @@ func (r *FederatedManagedMetricReconciler) Reconcile(ctx context.Context, req ct
197203
/*
198204
4. Re-queue the metric after the frequency or 2 minutes if an error occurred
199205
*/
200-
var requeueTime = metric.Spec.Interval.Duration
206+
var requeueTime time.Duration
207+
if errExport != nil {
208+
requeueTime = RequeueAfterError
209+
} else {
210+
requeueTime = metric.Spec.Interval.Duration
211+
}
201212

202-
l.Info(fmt.Sprintf("generic metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, requeueTime))
213+
l.Info(fmt.Sprintf("federated managed metric '%s' re-queued for execution in %v\n", metric.Spec.Name, requeueTime))
203214

204215
return ctrl.Result{
205216
Requeue: true,

internal/controller/federatedmetric_controller.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,13 +153,19 @@ func (r *FederatedMetricReconciler) Reconcile(ctx context.Context, req ctrl.Requ
153153
return ctrl.Result{RequeueAfter: RequeueAfterError}, errCli
154154
}
155155

156+
defer func() {
157+
if err := metricClient.Close(ctx); err != nil {
158+
l.Error(err, "Failed to close metric client during federated metric reconciliation", "metric", metric.Name)
159+
}
160+
}()
161+
156162
// should this be the group fo the gvr?
157163
metricClient.SetMeter("federated")
158164

159165
gaugeMetric, errGauge := metricClient.NewMetric(metric.Name)
160166
if errGauge != nil {
161-
l.Error(errCli, fmt.Sprintf("federated metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError))
162-
return ctrl.Result{RequeueAfter: RequeueAfterError}, errCli
167+
l.Error(errGauge, fmt.Sprintf("federated metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError))
168+
return ctrl.Result{RequeueAfter: RequeueAfterError}, errGauge
163169
}
164170

165171
for _, queryConfig := range queryConfigs {
@@ -202,9 +208,14 @@ func (r *FederatedMetricReconciler) Reconcile(ctx context.Context, req ctrl.Requ
202208
/*
203209
4. Requeue the metric after the frequency or after 2 minutes if an error occurred
204210
*/
205-
var requeueTime = metric.Spec.Interval.Duration
211+
var requeueTime time.Duration
212+
if errExport != nil {
213+
requeueTime = RequeueAfterError
214+
} else {
215+
requeueTime = metric.Spec.Interval.Duration
216+
}
206217

207-
l.Info(fmt.Sprintf("generic metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, requeueTime))
218+
l.Info(fmt.Sprintf("federated metric '%s' re-queued for execution in %v\n", metric.Spec.Name, requeueTime))
208219

209220
return ctrl.Result{
210221
Requeue: true,

internal/controller/managedmetric_controller.go

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@ import (
2424
"time"
2525

2626
apierrors "k8s.io/apimachinery/pkg/api/errors"
27+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2728
"k8s.io/apimachinery/pkg/runtime"
2829
"k8s.io/client-go/rest"
2930
"k8s.io/client-go/tools/record"
3031
"sigs.k8s.io/controller-runtime/pkg/client"
3132
"sigs.k8s.io/controller-runtime/pkg/log"
3233

34+
"github.com/SAP/metrics-operator/internal/clientoptl"
3335
"github.com/SAP/metrics-operator/internal/common"
3436
"github.com/SAP/metrics-operator/internal/config"
3537
"github.com/SAP/metrics-operator/internal/orchestrator"
@@ -57,6 +59,22 @@ func (r *ManagedMetricReconciler) getRestConfig() *rest.Config {
5759
return r.inRestConfig
5860
}
5961

62+
func (r *ManagedMetricReconciler) scheduleNextReconciliation(metric *v1alpha1.ManagedMetric) (ctrl.Result, error) {
63+
elapsed := time.Since(metric.Status.Observation.Timestamp.Time)
64+
return ctrl.Result{
65+
Requeue: true,
66+
RequeueAfter: metric.Spec.Interval.Duration - elapsed,
67+
}, nil
68+
}
69+
70+
func (r *ManagedMetricReconciler) shouldReconcile(metric *v1alpha1.ManagedMetric) bool {
71+
if metric.Status.Observation.Timestamp.Time.IsZero() {
72+
return true
73+
}
74+
elapsed := time.Since(metric.Status.Observation.Timestamp.Time)
75+
return elapsed >= metric.Spec.Interval.Duration
76+
}
77+
6078
// ManagedMetricReconciler reconciles a ManagedMetric object
6179
type ManagedMetricReconciler struct {
6280
inClient client.Client
@@ -96,6 +114,11 @@ func (r *ManagedMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reques
96114
return ctrl.Result{RequeueAfter: RequeueAfterError}, errLoad
97115
}
98116

117+
// Check if enough time has passed since the last reconciliation
118+
if !r.shouldReconcile(&metric) {
119+
return r.scheduleNextReconciliation(&metric)
120+
}
121+
99122
/*
100123
1.1 Get the Secret that holds the Dynatrace credentials
101124
*/
@@ -116,10 +139,34 @@ func (r *ManagedMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reques
116139
return ctrl.Result{RequeueAfter: RequeueAfterError}, err
117140
}
118141

142+
/*
143+
1.3 Create OTel metric client and gauge metric
144+
*/
145+
metricClient, errCli := clientoptl.NewMetricClient(ctx, credentials.Host, credentials.Path, credentials.Token)
146+
if errCli != nil {
147+
l.Error(errCli, fmt.Sprintf("managed metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError))
148+
return ctrl.Result{RequeueAfter: RequeueAfterError}, errCli
149+
}
150+
151+
defer func() {
152+
if err := metricClient.Close(ctx); err != nil {
153+
l.Error(err, "Failed to close metric client during managed metric reconciliation", "metric", metric.Name)
154+
}
155+
}()
156+
157+
// Set meter name for managed metrics
158+
metricClient.SetMeter("managed")
159+
160+
gaugeMetric, errGauge := metricClient.NewMetric(metric.Name)
161+
if errGauge != nil {
162+
l.Error(errGauge, fmt.Sprintf("managed metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError))
163+
return ctrl.Result{RequeueAfter: RequeueAfterError}, errGauge
164+
}
165+
119166
/*
120167
2. Create a new orchestrator
121168
*/
122-
orchestrator, errOrch := orchestrator.NewOrchestrator(credentials, queryConfig).WithManaged(metric)
169+
orchestrator, errOrch := orchestrator.NewOrchestrator(credentials, queryConfig).WithManaged(metric, gaugeMetric)
123170
if errOrch != nil {
124171
l.Error(errOrch, "unable to create managed metric orchestrator monitor")
125172
r.Recorder.Event(&metric, "Warning", "OrchestratorCreation", "unable to create orchestrator")
@@ -133,6 +180,17 @@ func (r *ManagedMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reques
133180
return ctrl.Result{RequeueAfter: RequeueAfterError}, errMon
134181
}
135182

183+
/*
184+
2.1 Export metrics to data sink
185+
*/
186+
errExport := metricClient.ExportMetrics(ctx)
187+
if errExport != nil {
188+
metric.Status.Ready = v1alpha1.StatusFalse
189+
l.Error(errExport, fmt.Sprintf("managed metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError))
190+
} else {
191+
metric.Status.Ready = v1alpha1.StatusTrue
192+
}
193+
136194
/*
137195
3. Update the status of the metric with conditions and phase
138196
*/
@@ -149,11 +207,16 @@ func (r *ManagedMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reques
149207
r.Recorder.Event(&metric, "Normal", "MetricPending", result.Message)
150208
}
151209

152-
metric.Status.Ready = v1alpha1.StatusFalse
153-
if result.Phase == v1alpha1.PhaseActive {
154-
metric.Status.Ready = v1alpha1.StatusTrue
210+
// Override Ready status if export failed
211+
if errExport != nil {
212+
metric.Status.Ready = v1alpha1.StatusFalse
213+
}
214+
215+
// Update the observation timestamp to track when this reconciliation happened
216+
metric.Status.Observation = v1alpha1.ManagedObservation{
217+
Timestamp: metav1.Now(),
218+
Resources: result.Observation.GetValue(),
155219
}
156-
metric.Status.Observation = v1alpha1.ManagedObservation{Timestamp: result.Observation.GetTimestamp(), Resources: result.Observation.GetValue()}
157220

158221
// conditions are not persisted until the status is updated
159222
errUp := r.inClient.Status().Update(ctx, &metric)
@@ -166,13 +229,13 @@ func (r *ManagedMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reques
166229
4. Requeue the metric after the frequency or after 2 minutes if an error occurred
167230
*/
168231
var requeueTime time.Duration
169-
if result.Error != nil {
232+
if result.Error != nil || errExport != nil {
170233
requeueTime = RequeueAfterError
171234
} else {
172235
requeueTime = metric.Spec.Interval.Duration
173236
}
174237

175-
l.Info(fmt.Sprintf("managed metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, requeueTime))
238+
l.Info(fmt.Sprintf("managed metric '%s' re-queued for execution in %v\n", metric.Spec.Name, requeueTime))
176239

177240
return ctrl.Result{
178241
Requeue: true,

internal/orchestrator/managedhandler.go

Lines changed: 25 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,22 @@ import (
1515
rcli "sigs.k8s.io/controller-runtime/pkg/client"
1616

1717
"github.com/SAP/metrics-operator/api/v1alpha1"
18-
"github.com/SAP/metrics-operator/internal/client"
18+
"github.com/SAP/metrics-operator/internal/clientoptl"
1919
)
2020

2121
// ManagedHandler is used to monitor the metric
2222
type ManagedHandler struct {
2323
client rcli.Client
2424
dCli dynamic.Interface
2525

26-
metric v1alpha1.ManagedMetric
27-
metricMeta client.MetricMetadata
26+
metric v1alpha1.ManagedMetric
27+
gaugeMetric *clientoptl.Metric
2828

29-
dtClient client.DynatraceClient
3029
clusterName *string
3130
}
3231

3332
// NewManagedHandler creates a new ManagedHandler
34-
func NewManagedHandler(metric v1alpha1.ManagedMetric, metricMeta client.MetricMetadata, qc QueryConfig, dtClient client.DynatraceClient) (*ManagedHandler, error) {
33+
func NewManagedHandler(metric v1alpha1.ManagedMetric, qc QueryConfig, gaugeMetric *clientoptl.Metric) (*ManagedHandler, error) {
3534
dynamicClient, errCli := dynamic.NewForConfig(&qc.RestConfig)
3635
if errCli != nil {
3736
return nil, fmt.Errorf("could not create dynamic client: %w", errCli)
@@ -41,40 +40,46 @@ func NewManagedHandler(metric v1alpha1.ManagedMetric, metricMeta client.MetricMe
4140
client: qc.Client,
4241
dCli: dynamicClient,
4342
metric: metric,
44-
metricMeta: metricMeta,
45-
dtClient: dtClient,
43+
gaugeMetric: gaugeMetric,
4644
clusterName: qc.ClusterName,
4745
}
4846

4947
return handler, nil
5048
}
5149

5250
func (h *ManagedHandler) sendStatusBasedMetricValue(ctx context.Context) (string, error) {
53-
// add the Datapoint for the metric
54-
h.metricMeta.AddDatapoint(1)
5551
resources, err := h.getResourcesStatus(ctx)
5652
if err != nil {
5753
return "", err
5854
}
5955

6056
// data point split by dimensions
6157
for _, cr := range resources {
62-
h.metricMeta.ClearDimensions()
63-
_ = h.metricMeta.AddDimension("kind", cr.MangedResource.Kind)
64-
_ = h.metricMeta.AddDimension("apiversion", cr.MangedResource.APIVersion)
58+
// Create a new data point for each resource
59+
dataPoint := clientoptl.NewDataPoint()
60+
dataPoint.AddDimension("kind", cr.MangedResource.Kind)
61+
dataPoint.AddDimension("apiversion", cr.MangedResource.APIVersion)
62+
63+
// Add cluster dimension if available
64+
if h.clusterName != nil {
65+
dataPoint.AddDimension(CLUSTER, *h.clusterName)
66+
}
6567

66-
// TODO: add mcp name as well later
67-
// b.dynaMetric.AddDimension("name", ...)
68+
// Add GVK dimensions
69+
dataPoint.AddDimension(KIND, h.metric.Spec.Kind)
70+
dataPoint.AddDimension(GROUP, h.metric.Spec.Group)
71+
dataPoint.AddDimension(VERSION, h.metric.Spec.Version)
6872

73+
// Add status conditions as dimensions
6974
for typ, state := range cr.Status {
70-
dimErr := h.metricMeta.AddDimension(strings.ToLower(typ), strconv.FormatBool(state))
71-
if dimErr != nil {
72-
return "", dimErr
73-
}
75+
dataPoint.AddDimension(strings.ToLower(typ), strconv.FormatBool(state))
7476
}
7577

76-
// Send Metric
77-
_, err = h.dtClient.SendMetric(ctx, h.metricMeta)
78+
// Set the value to 1 for each resource
79+
dataPoint.SetValue(1)
80+
81+
// Record the metric
82+
err = h.gaugeMetric.RecordMetrics(ctx, dataPoint)
7883
if err != nil {
7984
return "", err
8085
}
@@ -88,27 +93,6 @@ func (h *ManagedHandler) sendStatusBasedMetricValue(ctx context.Context) (string
8893

8994
// Monitor executes the monitoring of the metric
9095
func (h *ManagedHandler) Monitor(ctx context.Context) (MonitorResult, error) {
91-
92-
kindDimErr := h.metricMeta.AddDimension(KIND, h.metric.Spec.Kind)
93-
if kindDimErr != nil {
94-
return MonitorResult{}, fmt.Errorf("could not initialize '"+KIND+"' dimensions: %w", kindDimErr)
95-
}
96-
groupDimErr := h.metricMeta.AddDimension(GROUP, h.metric.Spec.Group)
97-
if groupDimErr != nil {
98-
return MonitorResult{}, fmt.Errorf("could not initialize '"+GROUP+"' dimensions: %w", groupDimErr)
99-
}
100-
versionDimErr := h.metricMeta.AddDimension(VERSION, h.metric.Spec.Version)
101-
if versionDimErr != nil {
102-
return MonitorResult{}, fmt.Errorf("could not initialize '"+VERSION+"' dimensions: %w", versionDimErr)
103-
}
104-
105-
if h.clusterName != nil {
106-
clusterDimErr := h.metricMeta.AddDimension(CLUSTER, *h.clusterName)
107-
if clusterDimErr != nil {
108-
return MonitorResult{}, fmt.Errorf("could not initialize '"+CLUSTER+"' dimensions: %w", clusterDimErr)
109-
}
110-
}
111-
11296
result := MonitorResult{}
11397
resources, err := h.sendStatusBasedMetricValue(ctx)
11498

internal/orchestrator/orchestrator.go

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ import (
77
rcli "sigs.k8s.io/controller-runtime/pkg/client"
88

99
"github.com/SAP/metrics-operator/api/v1alpha1"
10-
"github.com/SAP/metrics-operator/internal/client"
11-
1210
"github.com/SAP/metrics-operator/internal/clientoptl"
1311
"github.com/SAP/metrics-operator/internal/common"
1412
)
@@ -60,12 +58,9 @@ func NewOrchestrator(creds common.DataSinkCredentials, qConfig QueryConfig) *Orc
6058
}
6159

6260
// WithManaged creates a new Orchestrator with a ManagedMetric handler
63-
func (o *Orchestrator) WithManaged(managed v1alpha1.ManagedMetric) (*Orchestrator, error) {
64-
dtClient := client.NewClient(o.credentials.Host, o.credentials.Path, o.credentials.Token)
65-
metricMetadata := client.NewMetricMetadata(managed.Spec.Name, managed.Spec.Name, managed.Spec.Description)
66-
61+
func (o *Orchestrator) WithManaged(managed v1alpha1.ManagedMetric, gaugeMetric *clientoptl.Metric) (*Orchestrator, error) {
6762
var err error
68-
o.Handler, err = NewManagedHandler(managed, metricMetadata, o.queryConfig, dtClient)
63+
o.Handler, err = NewManagedHandler(managed, o.queryConfig, gaugeMetric)
6964
return o, err
7065
}
7166

0 commit comments

Comments
 (0)