diff --git a/Makefile b/Makefile index d9d75fc..3354ab4 100644 --- a/Makefile +++ b/Makefile @@ -343,9 +343,9 @@ crossplane-install: # Install the Kubernetes provider using kubectl crossplane-provider-install: - kubectl apply -f crossplane/provider.yaml -n $(CROSSPLANE_NAMESPACE) + kubectl apply -f examples/crossplane/provider.yaml -n $(CROSSPLANE_NAMESPACE) kubectl wait --for=condition=Healthy provider/provider-helm --timeout=1m - kubectl apply -f crossplane/provider-config.yaml -n $(CROSSPLANE_NAMESPACE) + kubectl apply -f examples/crossplane/provider-config.yaml -n $(CROSSPLANE_NAMESPACE) @@ -353,4 +353,4 @@ crossplane-provider-install: .PHONY: helm-provider-sample crossplane-provider-sample: - kubectl apply -f crossplane/release.yaml -n $(CROSSPLANE_NAMESPACE) + kubectl apply -f examples/crossplane/release.yaml -n $(CROSSPLANE_NAMESPACE) diff --git a/api/v1beta1/common_types.go b/api/v1beta1/common_types.go new file mode 100644 index 0000000..9993ec5 --- /dev/null +++ b/api/v1beta1/common_types.go @@ -0,0 +1,8 @@ +package v1beta1 + +const ( + // StatusTrue indicates the metric resource is considered ready/active. + StatusTrue = "True" + // StatusFalse indicates the metric resource is not ready/active. + StatusFalse = "False" +) diff --git a/examples/basic_metric.yaml b/examples/basic_metric.yaml index d2b862b..a34e895 100644 --- a/examples/basic_metric.yaml +++ b/examples/basic_metric.yaml @@ -8,7 +8,7 @@ spec: kind: Release group: helm.crossplane.io version: v1beta1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes --- apiVersion: metrics.cloud.sap/v1alpha1 kind: Metric @@ -20,7 +20,7 @@ spec: kind: Pod group: "" version: v1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes --- #apiVersion: metrics.cloud.sap/v1alpha1 #kind: Metric diff --git a/examples/crossplane/provider-config.yaml b/examples/crossplane/provider-config.yaml new file mode 100644 index 0000000..8a51904 --- /dev/null +++ b/examples/crossplane/provider-config.yaml @@ -0,0 +1,7 @@ +apiVersion: helm.crossplane.io/v1beta1 +kind: ProviderConfig +metadata: + name: helm-provider +spec: + credentials: + source: InjectedIdentity diff --git a/examples/crossplane/provider.yaml b/examples/crossplane/provider.yaml new file mode 100644 index 0000000..9a33af8 --- /dev/null +++ b/examples/crossplane/provider.yaml @@ -0,0 +1,32 @@ +apiVersion: pkg.crossplane.io/v1 +kind: Provider +metadata: + name: provider-helm +spec: + package: xpkg.upbound.io/crossplane-contrib/provider-helm:v0.17.0 + runtimeConfigRef: + apiVersion: pkg.crossplane.io/v1beta1 + kind: DeploymentRuntimeConfig + name: provider-helm +--- +apiVersion: pkg.crossplane.io/v1beta1 +kind: DeploymentRuntimeConfig +metadata: + name: provider-helm +spec: + serviceAccountTemplate: + metadata: + name: provider-helm +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: provider-helm-cluster-admin +subjects: + - kind: ServiceAccount + name: provider-helm + namespace: crossplane-system +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io diff --git a/examples/crossplane/release-oci.yaml b/examples/crossplane/release-oci.yaml new file mode 100644 index 0000000..7fd3d11 --- /dev/null +++ b/examples/crossplane/release-oci.yaml @@ -0,0 +1,75 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: wordpress2 +--- +apiVersion: helm.crossplane.io/v1beta1 +kind: Release +metadata: + name: wordpress-oci-example +spec: + # rollbackLimit: 3 + forProvider: + chart: + name: wordpress + repository: "oci://localhost:5000/helm-charts" + version: 15.2.5 + # pullSecretRef: + # name: oci-creds + # namespace: default + # url: "oci://localhost:5000/helm-charts/wordpress:9.3.19" + namespace: wordpress2 + # insecureSkipTLSVerify: true + # skipCreateNamespace: true + # wait: true + # skipCRDs: true + values: + service: + type: ClusterIP + set: + - name: param1 + value: value2 + # valuesFrom: + # - configMapKeyRef: + # key: values.yaml + # name: default-vals + # namespace: wordpress + # optional: false + # - secretKeyRef: + # key: svalues.yaml + # name: svals + # namespace: wordpress + # optional: false + # connectionDetails: + # - apiVersion: v1 + # kind: Service + # name: wordpress-example + # namespace: wordpress + # fieldPath: spec.clusterIP + # #fieldPath: status.loadBalancer.ingress[0].ip + # toConnectionSecretKey: ip + # - apiVersion: v1 + # kind: Service + # name: wordpress-example + # namespace: wordpress + # fieldPath: spec.ports[0].port + # toConnectionSecretKey: port + # - apiVersion: v1 + # kind: Secret + # name: wordpress-example + # namespace: wordpress + # fieldPath: data.wordpress-password + # toConnectionSecretKey: password + # - apiVersion: v1 + # kind: Secret + # name: manual-api-secret + # namespace: wordpress + # fieldPath: data.api-key + # toConnectionSecretKey: api-key + # # this secret created manually (not via Helm chart), so skip 'part of helm release' check + # skipPartOfReleaseCheck: true + # writeConnectionSecretToRef: + # name: wordpress-credentials + # namespace: crossplane-system + providerConfigRef: + name: helm-provider diff --git a/examples/crossplane/release.yaml b/examples/crossplane/release.yaml new file mode 100644 index 0000000..0d5e925 --- /dev/null +++ b/examples/crossplane/release.yaml @@ -0,0 +1,70 @@ +apiVersion: helm.crossplane.io/v1beta1 +kind: Release +metadata: + name: wordpress-example +spec: + # rollbackLimit: 3 + forProvider: + chart: + name: wordpress + repository: https://charts.bitnami.com/bitnami + version: 15.2.5 ## To use development versions, set ">0.0.0-0" + # pullSecretRef: + # name: museum-creds + # namespace: default + # url: "https://charts.bitnami.com/bitnami/wordpress-9.3.19.tgz" + namespace: wordpress + # insecureSkipTLSVerify: true + # skipCreateNamespace: true + # wait: true + # skipCRDs: true + values: + service: + type: ClusterIP + set: + - name: param1 + value: value2 + # valuesFrom: + # - configMapKeyRef: + # key: values.yaml + # name: default-vals + # namespace: wordpress + # optional: false + # - secretKeyRef: + # key: svalues.yaml + # name: svals + # namespace: wordpress + # optional: false + # connectionDetails: + # - apiVersion: v1 + # kind: Service + # name: wordpress-example + # namespace: wordpress + # fieldPath: spec.clusterIP + # #fieldPath: status.loadBalancer.ingress[0].ip + # toConnectionSecretKey: ip + # - apiVersion: v1 + # kind: Service + # name: wordpress-example + # namespace: wordpress + # fieldPath: spec.ports[0].port + # toConnectionSecretKey: port + # - apiVersion: v1 + # kind: Secret + # name: wordpress-example + # namespace: wordpress + # fieldPath: data.wordpress-password + # toConnectionSecretKey: password + # - apiVersion: v1 + # kind: Secret + # name: manual-api-secret + # namespace: wordpress + # fieldPath: data.api-key + # toConnectionSecretKey: api-key + # # this secret created manually (not via Helm chart), so skip 'part of helm release' check + # skipPartOfReleaseCheck: true + # writeConnectionSecretToRef: + # name: wordpress-credentials + # namespace: crossplane-system + providerConfigRef: + name: helm-provider diff --git a/examples/dev-core/metric.yaml b/examples/dev-core/metric.yaml index 33b3a55..cb22af6 100644 --- a/examples/dev-core/metric.yaml +++ b/examples/dev-core/metric.yaml @@ -8,5 +8,5 @@ spec: kind: LandscaperDeployment group: landscaper-service.gardener.cloud version: v1alpha1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes --- diff --git a/examples/fed-managed-resoruces/crossplane-managed.yaml b/examples/fed-managed-resoruces/crossplane-managed.yaml index 55ae5af..240c94e 100644 --- a/examples/fed-managed-resoruces/crossplane-managed.yaml +++ b/examples/fed-managed-resoruces/crossplane-managed.yaml @@ -5,7 +5,7 @@ metadata: spec: name: xfed-managed description: crossplane managed resources - frequency: 1 # in minutes + checkInterval: "1m" # in minutes federateCaRef: name: federate-ca-sample namespace: default diff --git a/examples/federated-resources/crdsdeletion.yaml b/examples/federated-resources/crdsdeletion.yaml index 677b271..e3cae47 100644 --- a/examples/federated-resources/crdsdeletion.yaml +++ b/examples/federated-resources/crdsdeletion.yaml @@ -10,7 +10,7 @@ spec: resource: customresourcedefinitions version: v1 fieldSelector: "metadata.deletionTimestamp" - frequency: 1 # in minutes + checkInterval: "1m" # in minutes projections: - name: deletion fieldPath: "metadata.deletionTimestamp" diff --git a/examples/federated-resources/entitlements.yaml b/examples/federated-resources/entitlements.yaml index a02599f..1b17967 100644 --- a/examples/federated-resources/entitlements.yaml +++ b/examples/federated-resources/entitlements.yaml @@ -9,7 +9,7 @@ spec: group: account.btp.orchestrate.cloud.sap resource: entitlements version: v1alpha1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes projections: - name: servicename fieldPath: "spec.forProvider.serviceName" diff --git a/examples/federated-resources/serviceinstances.yaml b/examples/federated-resources/serviceinstances.yaml index 1d21ff0..1afe537 100644 --- a/examples/federated-resources/serviceinstances.yaml +++ b/examples/federated-resources/serviceinstances.yaml @@ -9,7 +9,7 @@ spec: group: services.cloud.sap.com resource: serviceinstances version: v1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes federateCaRef: name: federate-ca-sample namespace: default diff --git a/examples/federated-resources/subaccounts.yaml b/examples/federated-resources/subaccounts.yaml index 8427860..ac634c8 100644 --- a/examples/federated-resources/subaccounts.yaml +++ b/examples/federated-resources/subaccounts.yaml @@ -9,7 +9,7 @@ spec: group: account.btp.orchestrate.cloud.sap resource: subaccounts version: v1alpha1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes projections: - name: region fieldPath: "spec.forProvider.region" diff --git a/examples/managed_metric.yaml b/examples/managed_metric.yaml index 008be65..035165f 100644 --- a/examples/managed_metric.yaml +++ b/examples/managed_metric.yaml @@ -8,4 +8,4 @@ spec: kind: Release group: helm.crossplane.io version: v1beta1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes diff --git a/examples/remoteclusteraccess/metric_with_rca.yaml b/examples/remoteclusteraccess/metric_with_rca.yaml index 9d63ce4..867e239 100644 --- a/examples/remoteclusteraccess/metric_with_rca.yaml +++ b/examples/remoteclusteraccess/metric_with_rca.yaml @@ -8,7 +8,7 @@ spec: kind: ManagedControlPlane group: cola.cloud.sap version: v1alpha1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes remoteClusterAccessRef: name: crate-cluster namespace: test-monitoring @@ -23,7 +23,7 @@ spec: kind: CloudOrchestrator group: cola.cloud.sap version: v1alpha1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes remoteClusterAccessRef: name: crate-cluster namespace: test-monitoring @@ -38,7 +38,7 @@ spec: kind: DataPlane group: cola.cloud.sap version: v1alpha1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes remoteClusterAccessRef: name: crate-cluster namespace: test-monitoring diff --git a/examples/v1beta1/compmetric.yaml b/examples/v1beta1/compmetric.yaml index 54fbab3..57430ea 100644 --- a/examples/v1beta1/compmetric.yaml +++ b/examples/v1beta1/compmetric.yaml @@ -10,7 +10,7 @@ spec: resource: pods group: "" version: v1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes projections: - name: pod-namespace fieldPath: "metadata.namespace" diff --git a/examples/v1beta1/fedmetric.yaml b/examples/v1beta1/fedmetric.yaml index f54109a..6ecfca2 100644 --- a/examples/v1beta1/fedmetric.yaml +++ b/examples/v1beta1/fedmetric.yaml @@ -9,7 +9,7 @@ spec: group: pkg.crossplane.io resource: providers version: v1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes projections: - name: package fieldPath: "spec.package" diff --git a/examples/v1beta1/singlemetric.yaml b/examples/v1beta1/singlemetric.yaml index fb6e31c..2ffe397 100644 --- a/examples/v1beta1/singlemetric.yaml +++ b/examples/v1beta1/singlemetric.yaml @@ -9,7 +9,7 @@ spec: kind: Release group: helm.crossplane.io version: v1beta1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes --- apiVersion: metrics.cloud.sap/v1beta1 kind: SingleMetric @@ -22,7 +22,7 @@ spec: kind: Pod group: "" version: v1 - frequency: 1 # in minutes + checkInterval: "1m" # in minutes --- #apiVersion: metrics.cloud.sap/v1alpha1 #kind: Metric diff --git a/internal/clientlite/dtclient.go b/internal/clientlite/dtclient.go deleted file mode 100644 index edc0b8d..0000000 --- a/internal/clientlite/dtclient.go +++ /dev/null @@ -1,130 +0,0 @@ -package clientlite - -import ( - "bytes" - "context" - "fmt" - "net/http" - "net/url" - "path" - "strings" -) - -// MetricClient represents a client for sending metrics to Dynatrace -type MetricClient struct { - dynatraceURL string - apiToken string - httpClient *http.Client -} - -const ( - // MetricsEndpoint is the endpoint for sending metrics to Dynatrace - MetricsEndpoint = "/metrics/ingest" -) - -// NewMetricClient creates a new MetricClient -func NewMetricClient(baseURL, endpoint, apiToken string) *MetricClient { - // Create the base URL - dynatraceBaseURL := &url.URL{ - Scheme: "https", - Host: baseURL, - } - - fullPath := path.Join(endpoint, MetricsEndpoint) - - // Combine the base URL with the endpoint - fullURL := dynatraceBaseURL.ResolveReference(&url.URL{Path: fullPath}) - - return &MetricClient{ - dynatraceURL: fullURL.String(), - apiToken: apiToken, - httpClient: &http.Client{}, - } -} - -// Metric represents a single metric to be sent to Dynatrace -type Metric struct { - Name string - dimensions map[string]string - gaugeValue float64 -} - -// NewMetric creates a new Metric with the given name -func NewMetric(name string) *Metric { - return &Metric{ - Name: name, - dimensions: make(map[string]string), - } -} - -// AddDimension adds a dimension to the metric -func (m *Metric) AddDimension(key, value string) *Metric { - - if value == "" { - // dont add empty values - return m - } - - m.dimensions[key] = value - return m -} - -// SetGaugeValue sets the gauge value for the metric -func (m *Metric) SetGaugeValue(value float64) *Metric { - m.gaugeValue = value - return m -} - -// formatMetric formats a single metric into the Dynatrace line protocol -func (m *Metric) format() string { - // the general format of the payload is - // format,dataPoint timestamp - // The format of the timestamp is UTC milliseconds. The allowed range is between 1 hour into the past and 10 minutes into the future from now. Data points with timestamps outside of this range are rejected. - // - // If no timestamp is provided, the current timestamp of the server is used. - - dimPairs := make([]string, 0, len(m.dimensions)) - for k, v := range m.dimensions { - dimPairs = append(dimPairs, fmt.Sprintf("%s=\"%s\"", k, v)) // Note: need to add quotes for multi-word values, otherwise an exception is thrown - } - dimensions := strings.Join(dimPairs, ",") - - if dimensions != "" { - dimensions = "," + dimensions - } - - return fmt.Sprintf("%s%s gauge,%.2f", m.Name, dimensions, m.gaugeValue) -} - -// SendMetrics sends multiple metrics to Dynatrace -func (c *MetricClient) SendMetrics(ctx context.Context, metrics ...*Metric) error { - metricLines := make([]string, 0, len(metrics)) - - for _, metric := range metrics { - metricLines = append(metricLines, metric.format()) - } - - payload := strings.Join(metricLines, "\n") - - req, err := http.NewRequestWithContext(ctx, "POST", c.dynatraceURL, bytes.NewBufferString(payload)) - if err != nil { - return fmt.Errorf("error creating request: %w", err) - } - - req.Header.Set("Content-Type", "text/plain") - req.Header.Set("Authorization", "Api-Token "+c.apiToken) - - resp, err := c.httpClient.Do(req) - if err != nil { - return fmt.Errorf("error sending request: %w", err) - } - defer func() { - _ = resp.Body.Close() - }() - - if resp.StatusCode < 200 || resp.StatusCode >= 300 { - return fmt.Errorf("unexpected status code: %d", resp.StatusCode) - } - - return nil -} diff --git a/internal/clientlite/dtclient_test.go b/internal/clientlite/dtclient_test.go deleted file mode 100644 index a7bdf9c..0000000 --- a/internal/clientlite/dtclient_test.go +++ /dev/null @@ -1,35 +0,0 @@ -package clientlite - -import ( - "context" - "testing" -) - -func TestSendMetric_Real(t *testing.T) { - t.Skip("skipping test") - dynatraceURL := "canary.eu21.apm.services.cloud.sap" - apiToken := "" - - cl := NewMetricClient(dynatraceURL, "e/1b9c6fb0-eb17-4fce-96b0-088cee0861b3/api/v2", apiToken) - - mr := NewMetric("xmy.metric"). - AddDimension("device", "device1"). - AddDimension("location", "new_york"). - SetGaugeValue(10) - - mr2 := NewMetric("xmy.metric"). - AddDimension("device", "device2"). - AddDimension("location", "new_york"). - SetGaugeValue(20) - - mr3 := NewMetric("xmy.metric"). - AddDimension("device", "device3"). - AddDimension("location", "new_york"). - SetGaugeValue(12) - - err := cl.SendMetrics(context.Background(), mr, mr2, mr3) - - if err != nil { - t.Errorf("Error sending metrics: %v\n", err) - } -} diff --git a/internal/controller/compoundmetric_controller.go b/internal/controller/compoundmetric_controller.go index 42d589c..5b8bf0f 100644 --- a/internal/controller/compoundmetric_controller.go +++ b/internal/controller/compoundmetric_controller.go @@ -32,6 +32,7 @@ import ( insight "github.com/SAP/metrics-operator/api/v1alpha1" "github.com/SAP/metrics-operator/api/v1beta1" + "github.com/SAP/metrics-operator/internal/clientoptl" // Added "github.com/SAP/metrics-operator/internal/common" orc "github.com/SAP/metrics-operator/internal/orchestrator" ) @@ -144,10 +145,30 @@ func (r *CompoundMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{RequeueAfter: RequeueAfterError}, err } + metricClient, errCli := clientoptl.NewMetricClient(ctx, credentials.Host, credentials.Path, credentials.Token) + if errCli != nil { + l.Error(errCli, fmt.Sprintf("compound metric '%s' failed to create OTel client, re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + // TODO: Update status? + return ctrl.Result{RequeueAfter: RequeueAfterError}, errCli + } + defer func() { + if err := metricClient.Close(ctx); err != nil { + l.Error(err, "Failed to close metric client during compound metric reconciliation", "metric", metric.Name) + } + }() // Ensure exporter is shut down + + metricClient.SetMeter("compound") + + gaugeMetric, errGauge := metricClient.NewMetric(metric.Name) + if errGauge != nil { + l.Error(errGauge, fmt.Sprintf("compound metric '%s' failed to create OTel gauge, re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + // TODO: Update status? + return ctrl.Result{RequeueAfter: RequeueAfterError}, errGauge + } /* 2. Create a new orchestrator */ - orchestrator, errOrch := orc.NewOrchestrator(credentials, queryConfig).WithCompound(metric) + orchestrator, errOrch := orc.NewOrchestrator(credentials, queryConfig).WithCompound(metric, gaugeMetric) // Pass gaugeMetric if errOrch != nil { l.Error(errOrch, "unable to create compound metric orchestrator monitor") r.Recorder.Event(&metric, "Warning", "OrchestratorCreation", "unable to create orchestrator") @@ -157,10 +178,21 @@ func (r *CompoundMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reque result, errMon := orchestrator.Handler.Monitor(ctx) if errMon != nil { + metric.Status.Ready = v1beta1.StatusFalse l.Error(errMon, fmt.Sprintf("compound metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + // Update status before returning + _ = r.getClient().Status().Update(ctx, &metric) // Best effort status update on error return ctrl.Result{RequeueAfter: RequeueAfterError}, errMon } + errExport := metricClient.ExportMetrics(ctx) + if errExport != nil { + metric.Status.Ready = v1beta1.StatusFalse + l.Error(errExport, fmt.Sprintf("compound metric '%s' failed to export, re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + } else { + metric.Status.Ready = v1beta1.StatusTrue + } + /* 3. Update the status of the metric with conditions and phase */ @@ -179,7 +211,10 @@ func (r *CompoundMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reque cObs := result.Observation.(*v1beta1.MetricObservation) - metric.Status.Ready = boolToString(result.Phase == insight.PhaseActive) + // Override Ready status if export failed + if errExport != nil { + metric.Status.Ready = "False" + } metric.Status.Observation = v1beta1.MetricObservation{Timestamp: result.Observation.GetTimestamp(), Dimensions: cObs.Dimensions, LatestValue: cObs.LatestValue} // Update LastReconcileTime @@ -189,7 +224,7 @@ func (r *CompoundMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reque // conditions are not persisted until the status is updated errUp := r.getClient().Status().Update(ctx, &metric) if errUp != nil { - l.Error(errMon, fmt.Sprintf("generic metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + l.Error(errUp, fmt.Sprintf("compound metric '%s' failed to update status, re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) return ctrl.Result{RequeueAfter: RequeueAfterError}, errUp } @@ -197,13 +232,13 @@ func (r *CompoundMetricReconciler) Reconcile(ctx context.Context, req ctrl.Reque 4. Requeue the metric after the frequency or after 2 minutes if an error occurred */ var requeueTime time.Duration - if result.Error != nil { + if result.Error != nil || errExport != nil { // Requeue faster on monitor or export error requeueTime = RequeueAfterError } else { requeueTime = metric.Spec.CheckInterval.Duration } - l.Info(fmt.Sprintf("generic metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, requeueTime)) + l.Info(fmt.Sprintf("compound metric '%s' re-queued for execution in %v\n", metric.Spec.Name, requeueTime)) return ctrl.Result{ Requeue: true, diff --git a/internal/controller/singlemetric_controller.go b/internal/controller/singlemetric_controller.go index 4a15be8..1db31a8 100644 --- a/internal/controller/singlemetric_controller.go +++ b/internal/controller/singlemetric_controller.go @@ -32,6 +32,7 @@ import ( insight "github.com/SAP/metrics-operator/api/v1alpha1" "github.com/SAP/metrics-operator/api/v1beta1" + "github.com/SAP/metrics-operator/internal/clientoptl" // Added "github.com/SAP/metrics-operator/internal/common" "github.com/SAP/metrics-operator/internal/config" orc "github.com/SAP/metrics-operator/internal/orchestrator" @@ -145,10 +146,30 @@ func (r *SingleMetricReconciler) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{RequeueAfter: RequeueAfterError}, err } + metricClient, errCli := clientoptl.NewMetricClient(ctx, credentials.Host, credentials.Path, credentials.Token) + if errCli != nil { + l.Error(errCli, fmt.Sprintf("single metric '%s' failed to create OTel client, re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + // TODO: Update status? + return ctrl.Result{RequeueAfter: RequeueAfterError}, errCli + } + defer func() { + if err := metricClient.Close(ctx); err != nil { + l.Error(err, "Failed to close metric client during single metric reconciliation", "metric", metric.Name) + } + }() // Ensure exporter is shut down + + metricClient.SetMeter("single") + + gaugeMetric, errGauge := metricClient.NewMetric(metric.Name) + if errGauge != nil { + l.Error(errGauge, fmt.Sprintf("single metric '%s' failed to create OTel gauge, re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + // TODO: Update status? + return ctrl.Result{RequeueAfter: RequeueAfterError}, errGauge + } /* 2. Create a new orchestrator */ - orchestrator, errOrch := orc.NewOrchestrator(credentials, queryConfig).WithSingle(metric) + orchestrator, errOrch := orc.NewOrchestrator(credentials, queryConfig).WithSingle(metric, gaugeMetric) // Pass gaugeMetric if errOrch != nil { l.Error(errOrch, "unable to create single metric orchestrator monitor") r.Recorder.Event(&metric, "Warning", "OrchestratorCreation", "unable to create orchestrator") @@ -158,10 +179,21 @@ func (r *SingleMetricReconciler) Reconcile(ctx context.Context, req ctrl.Request result, errMon := orchestrator.Handler.Monitor(ctx) if errMon != nil { + metric.Status.Ready = v1beta1.StatusFalse l.Error(errMon, fmt.Sprintf("single metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + // Update status before returning + _ = r.getClient().Status().Update(ctx, &metric) // Best effort status update on error return ctrl.Result{RequeueAfter: RequeueAfterError}, errMon } + errExport := metricClient.ExportMetrics(ctx) + if errExport != nil { + metric.Status.Ready = v1beta1.StatusFalse + l.Error(errExport, fmt.Sprintf("single metric '%s' failed to export, re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + } else { + metric.Status.Ready = v1beta1.StatusTrue + } + /* 3. Update the status of the metric with conditions and phase */ @@ -178,7 +210,10 @@ func (r *SingleMetricReconciler) Reconcile(ctx context.Context, req ctrl.Request r.Recorder.Event(&metric, "Normal", "MetricPending", result.Message) } - metric.Status.Ready = boolToString(result.Phase == insight.PhaseActive) + // Override Ready status if export failed + if errExport != nil { + metric.Status.Ready = v1beta1.StatusFalse + } metric.Status.Observation = v1beta1.MetricObservation{Timestamp: result.Observation.GetTimestamp(), LatestValue: result.Observation.GetValue()} // Update LastReconcileTime @@ -188,7 +223,7 @@ func (r *SingleMetricReconciler) Reconcile(ctx context.Context, req ctrl.Request // conditions are not persisted until the status is updated errUp := r.getClient().Status().Update(ctx, &metric) if errUp != nil { - l.Error(errMon, fmt.Sprintf("generic metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) + l.Error(errUp, fmt.Sprintf("single metric '%s' failed to update status, re-queued for execution in %v minutes\n", metric.Spec.Name, RequeueAfterError)) return ctrl.Result{RequeueAfter: RequeueAfterError}, errUp } @@ -196,13 +231,13 @@ func (r *SingleMetricReconciler) Reconcile(ctx context.Context, req ctrl.Request 4. Requeue the metric after the frequency or after 2 minutes if an error occurred */ var requeueTime time.Duration - if result.Error != nil { + if result.Error != nil || errExport != nil { // Requeue faster on monitor or export error requeueTime = RequeueAfterError } else { requeueTime = metric.Spec.CheckInterval.Duration } - l.Info(fmt.Sprintf("generic metric '%s' re-queued for execution in %v minutes\n", metric.Spec.Name, requeueTime)) + l.Info(fmt.Sprintf("single metric '%s' re-queued for execution in %v\n", metric.Spec.Name, requeueTime)) return ctrl.Result{ Requeue: true, diff --git a/internal/orchestrator/compoundhandler.go b/internal/orchestrator/compoundhandler.go index 95f579b..12a42a4 100644 --- a/internal/orchestrator/compoundhandler.go +++ b/internal/orchestrator/compoundhandler.go @@ -15,7 +15,7 @@ import ( "github.com/SAP/metrics-operator/api/v1alpha1" "github.com/SAP/metrics-operator/api/v1beta1" - "github.com/SAP/metrics-operator/internal/clientlite" + "github.com/SAP/metrics-operator/internal/clientoptl" // Added ) // CompoundHandler is used to monitor a compound metric @@ -25,65 +25,87 @@ type CompoundHandler struct { metric v1beta1.CompoundMetric - dtClient *clientlite.MetricClient + gaugeMetric *clientoptl.Metric // Changed from dtClient clusterName *string } // Monitor is used to monitor the metric +// +//nolint:gocyclo func (h *CompoundHandler) Monitor(ctx context.Context) (MonitorResult, error) { - mrTotal := h.createGvrBaseMetric() - - if h.clusterName != nil { - mrTotal.AddDimension(CLUSTER, *h.clusterName) - } - + // Metric creation and export are handled by the controller. + // This handler focuses on fetching resources, grouping, and recording data points. result := MonitorResult{Observation: &v1beta1.MetricObservation{Timestamp: metav1.Now()}} - list, err := h.getResources(ctx) - if err != nil { - return MonitorResult{}, fmt.Errorf("could not retrieve target resource(s) %w", err) + list, errGet := h.getResources(ctx) + if errGet != nil { + result.Error = errGet + result.Phase = v1alpha1.PhaseFailed + result.Reason = "GetResourcesFailed" + result.Message = fmt.Sprintf("failed to retrieve target resource(s): %s", errGet.Error()) + return result, nil // Return error state, but not the error itself to controller } groups := h.extractProjectionGroupsFrom(list) - var dimensions []v1beta1.Dimension - - clMetrics := make([]*clientlite.Metric, 0, len(groups)+1) - clMetrics = append(clMetrics, mrTotal) + dataPoints := make([]*clientoptl.DataPoint, 0, len(groups)) + var recordErrors []error for _, group := range groups { + groupCount := len(group) + dataPoint := clientoptl.NewDataPoint().SetValue(int64(groupCount)) - mrGroup := h.createGvrBaseMetric() - mrGroup.SetGaugeValue(float64(len(group))) - clMetrics = append(clMetrics, mrGroup) + // Add base dimensions only if they have a non-empty value + if h.metric.Spec.Target.Resource != "" { + dataPoint.AddDimension(RESOURCE, h.metric.Spec.Target.Resource) + } + if h.metric.Spec.Target.Group != "" { + dataPoint.AddDimension(GROUP, h.metric.Spec.Target.Group) + } + if h.metric.Spec.Target.Version != "" { + dataPoint.AddDimension(VERSION, h.metric.Spec.Target.Version) + } + if h.clusterName != nil && *h.clusterName != "" { + dataPoint.AddDimension(CLUSTER, *h.clusterName) + } + // Add projected dimensions for this specific group for _, pField := range group { - if pField.error == nil { - mrGroup.AddDimension(pField.name, pField.value) - dimensions = append(dimensions, v1beta1.Dimension{Name: pField.name, Value: pField.value}) + // Add projected dimension only if the value is non-empty and no error occurred + if pField.error == nil && pField.value != "" { + dataPoint.AddDimension(pField.name, pField.value) + } else { + // Optionally log or handle projection errors + recordErrors = append(recordErrors, fmt.Errorf("projection error for %s: %w", pField.name, pField.error)) } } - + dataPoints = append(dataPoints, dataPoint) } - err = h.dtClient.SendMetrics(ctx, clMetrics...) + // Record all collected data points + errRecord := h.gaugeMetric.RecordMetrics(ctx, dataPoints...) + if errRecord != nil { + recordErrors = append(recordErrors, errRecord) + } - if err != nil { - result.Error = err + // Update result based on errors during projection or recording + if len(recordErrors) > 0 { + // Combine errors for reporting + combinedError := fmt.Errorf("errors during metric recording: %v", recordErrors) + result.Error = combinedError result.Phase = v1alpha1.PhaseFailed - result.Reason = v1alpha1.ReasonSendMetricFailed - result.Message = fmt.Sprintf("failed to send metric value to data sink. %s", err.Error()) + result.Reason = "RecordMetricFailed" + result.Message = fmt.Sprintf("failed to record metric value(s): %s", combinedError.Error()) } else { result.Phase = v1alpha1.PhaseActive result.Reason = v1alpha1.ReasonMonitoringActive - result.Message = fmt.Sprintf("metric is monitoring resource '%s'", h.metric.Spec.Target.String()) - - if dimensions != nil { - result.Observation = &v1beta1.MetricObservation{Timestamp: metav1.Now(), Dimensions: []v1beta1.Dimension{{Name: dimensions[0].Name, Value: strconv.Itoa(len(list.Items))}}} - } + result.Message = fmt.Sprintf("metric values recorded for resource '%s'", h.metric.Spec.Target.String()) + // Observation might need adjustment depending on how compound results should be represented in status + result.Observation = &v1beta1.MetricObservation{Timestamp: metav1.Now(), LatestValue: strconv.Itoa(len(list.Items))} // Report total count for now } + // Return the result, error indicates failure in Monitor execution, not necessarily metric export failure (handled by controller) return result, nil } @@ -127,12 +149,7 @@ func (h *CompoundHandler) extractProjectionGroupsFrom(list *unstructured.Unstruc return groups } -func (h *CompoundHandler) createGvrBaseMetric() *clientlite.Metric { - return clientlite.NewMetric(h.metric.Name). - AddDimension(RESOURCE, h.metric.Spec.Target.Resource). - AddDimension(GROUP, h.metric.Spec.Target.Group). - AddDimension(VERSION, h.metric.Spec.Target.Version) -} +// Removed createGvrBaseMetric as it's clientlite specific func (h *CompoundHandler) getResources(ctx context.Context) (*unstructured.UnstructuredList, error) { var options = metav1.ListOptions{} @@ -161,7 +178,7 @@ func (h *CompoundHandler) getResources(ctx context.Context) (*unstructured.Unstr } // NewCompoundHandler creates a new CompoundHandler -func NewCompoundHandler(metric v1beta1.CompoundMetric, qc QueryConfig, dtClient *clientlite.MetricClient) (*CompoundHandler, error) { +func NewCompoundHandler(metric v1beta1.CompoundMetric, qc QueryConfig, gaugeMetric *clientoptl.Metric) (*CompoundHandler, error) { // Changed dtClient to gaugeMetric dynamicClient, errCli := dynamic.NewForConfig(&qc.RestConfig) if errCli != nil { return nil, errCli @@ -176,7 +193,7 @@ func NewCompoundHandler(metric v1beta1.CompoundMetric, qc QueryConfig, dtClient metric: metric, dCli: dynamicClient, discoClient: disco, - dtClient: dtClient, + gaugeMetric: gaugeMetric, // Changed dtClient to gaugeMetric clusterName: qc.ClusterName, } diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go index a00065b..f43aa13 100644 --- a/internal/orchestrator/orchestrator.go +++ b/internal/orchestrator/orchestrator.go @@ -9,7 +9,8 @@ import ( v1 "github.com/SAP/metrics-operator/api/v1alpha1" "github.com/SAP/metrics-operator/api/v1beta1" "github.com/SAP/metrics-operator/internal/client" - "github.com/SAP/metrics-operator/internal/clientlite" + + // "github.com/SAP/metrics-operator/internal/clientlite" // Removed "github.com/SAP/metrics-operator/internal/clientoptl" "github.com/SAP/metrics-operator/internal/common" ) @@ -51,11 +52,12 @@ func (o *Orchestrator) WithGeneric(metric v1.Metric) (*Orchestrator, error) { } // WithSingle creates a new Orchestrator with a SingleMetric handler -func (o *Orchestrator) WithSingle(metric v1beta1.SingleMetric) (*Orchestrator, error) { - dtClient := clientlite.NewMetricClient(o.credentials.Host, o.credentials.Path, o.credentials.Token) +func (o *Orchestrator) WithSingle(metric v1beta1.SingleMetric, gaugeMetric *clientoptl.Metric) (*Orchestrator, error) { // Added gaugeMetric parameter + // dtClient creation removed, as it's handled by the controller var err error - o.Handler, err = NewSingleHandler(metric, o.queryConfig, dtClient) + // Pass gaugeMetric instead of dtClient + o.Handler, err = NewSingleHandler(metric, o.queryConfig, gaugeMetric) return o, err } @@ -70,11 +72,12 @@ func (o *Orchestrator) WithManaged(managed v1.ManagedMetric) (*Orchestrator, err } // WithCompound creates a new Orchestrator with a CompoundMetric handler -func (o *Orchestrator) WithCompound(metric v1beta1.CompoundMetric) (*Orchestrator, error) { - dtClient := clientlite.NewMetricClient(o.credentials.Host, o.credentials.Path, o.credentials.Token) +func (o *Orchestrator) WithCompound(metric v1beta1.CompoundMetric, gaugeMetric *clientoptl.Metric) (*Orchestrator, error) { // Added gaugeMetric parameter + // dtClient creation removed, as it's handled by the controller var err error - o.Handler, err = NewCompoundHandler(metric, o.queryConfig, dtClient) + // Pass gaugeMetric instead of dtClient + o.Handler, err = NewCompoundHandler(metric, o.queryConfig, gaugeMetric) return o, err } diff --git a/internal/orchestrator/singlehandler.go b/internal/orchestrator/singlehandler.go index f70354d..f82541b 100644 --- a/internal/orchestrator/singlehandler.go +++ b/internal/orchestrator/singlehandler.go @@ -13,7 +13,7 @@ import ( "github.com/SAP/metrics-operator/api/v1alpha1" "github.com/SAP/metrics-operator/api/v1beta1" - "github.com/SAP/metrics-operator/internal/clientlite" + "github.com/SAP/metrics-operator/internal/clientoptl" // Added ) // SingleHandler is used to monitor a single metric @@ -23,42 +23,60 @@ type SingleHandler struct { metric v1beta1.SingleMetric - dtClient *clientlite.MetricClient + gaugeMetric *clientoptl.Metric // Changed from dtClient clusterName *string } // Monitor is used to monitor the metric func (h *SingleHandler) Monitor(ctx context.Context) (MonitorResult, error) { - mrTotal := h.createGvkBaseMetric() - - if h.clusterName != nil { - mrTotal.AddDimension(CLUSTER, *h.clusterName) - } + // Metric creation and export are handled by the controller. + // This handler focuses on fetching the value and recording it. result := MonitorResult{} - list, err := h.getResources(ctx) - if err != nil { - return MonitorResult{}, fmt.Errorf("could not retrieve target resource(s) %w", err) + list, errGet := h.getResources(ctx) + if errGet != nil { + result.Error = errGet + result.Phase = v1alpha1.PhaseFailed + result.Reason = "GetResourcesFailed" + result.Message = fmt.Sprintf("failed to retrieve target resource(s): %s", errGet.Error()) + return result, nil // Return error state, but not the error itself to controller } primaryCount := len(list.Items) - mrTotal.SetGaugeValue(float64(primaryCount)) + // Create DataPoint and record it + dataPoint := clientoptl.NewDataPoint().SetValue(int64(primaryCount)) - errMetric := h.dtClient.SendMetrics(ctx, mrTotal) + // Add dimensions only if they have a non-empty value + if h.metric.Spec.Target.Group != "" { + dataPoint.AddDimension(GROUP, h.metric.Spec.Target.Group) + } + if h.metric.Spec.Target.Version != "" { + dataPoint.AddDimension(VERSION, h.metric.Spec.Target.Version) + } + if h.metric.Spec.Target.Kind != "" { + dataPoint.AddDimension(KIND, h.metric.Spec.Target.Kind) + } + if h.clusterName != nil && *h.clusterName != "" { + dataPoint.AddDimension(CLUSTER, *h.clusterName) + } + + errRecord := h.gaugeMetric.RecordMetrics(ctx, dataPoint) - if errMetric != nil { - result.Error = err + if errRecord != nil { + result.Error = errRecord result.Phase = v1alpha1.PhaseFailed - result.Reason = "SendMetricFailed" - result.Message = fmt.Sprintf("failed to send metric value to data sink. %s", errMetric.Error()) + result.Reason = "RecordMetricFailed" + result.Message = fmt.Sprintf("failed to record metric value: %s", errRecord.Error()) } else { result.Phase = v1alpha1.PhaseActive result.Reason = "MonitoringActive" - result.Message = fmt.Sprintf("metric is monitoring resource '%s'", h.metric.GvkToString()) + result.Message = fmt.Sprintf("metric value recorded for resource '%s'", h.metric.GvkToString()) result.Observation = &v1beta1.MetricObservation{Timestamp: metav1.Now(), LatestValue: strconv.Itoa(primaryCount)} } + + // Return the result, error indicates failure in Monitor execution, not necessarily metric export failure (handled by controller) return result, nil } @@ -92,15 +110,10 @@ func (h *SingleHandler) getResources(ctx context.Context) (*unstructured.Unstruc return list, nil } -func (h *SingleHandler) createGvkBaseMetric() *clientlite.Metric { - return clientlite.NewMetric(h.metric.Name). - AddDimension(GROUP, h.metric.Spec.Target.Group). - AddDimension(VERSION, h.metric.Spec.Target.Version). - AddDimension(KIND, h.metric.Spec.Target.Kind) -} +// Removed createGvkBaseMetric as it's clientlite specific // NewSingleHandler creates a new SingleHandler -func NewSingleHandler(metric v1beta1.SingleMetric, qc QueryConfig, dtClient *clientlite.MetricClient) (*SingleHandler, error) { +func NewSingleHandler(metric v1beta1.SingleMetric, qc QueryConfig, gaugeMetric *clientoptl.Metric) (*SingleHandler, error) { // Changed dtClient to gaugeMetric dynamicClient, errCli := dynamic.NewForConfig(&qc.RestConfig) if errCli != nil { return nil, errCli @@ -115,7 +128,7 @@ func NewSingleHandler(metric v1beta1.SingleMetric, qc QueryConfig, dtClient *cli metric: metric, dCli: dynamicClient, discoClient: disco, - dtClient: dtClient, + gaugeMetric: gaugeMetric, // Changed dtClient to gaugeMetric clusterName: qc.ClusterName, }