Skip to content

Commit 4ca548f

Browse files
committed
[OSPRH-21020] Change rabbitmq to use podmonitors
RabbitMQ can be deployed with multiple replicas, while each replica will export different metrics. Having a ScrapeConfig with a service as a target means we're not scraping all metrics. PodMonitor will find all rabbitMQ pods based on a label, so all pods will get scraped everytime.
1 parent bffdcca commit 4ca548f

File tree

5 files changed

+324
-103
lines changed

5 files changed

+324
-103
lines changed

api/v1beta1/conditions.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ const (
3939
// ScrapeConfigReadyCondition Status=True condition which indicates if the ScrapeConfig is configured and operational
4040
ScrapeConfigReadyCondition condition.Type = "ScrapeConfigReady"
4141

42+
// PodMonitorReadyCondition Status=True condition which indicates if the PodMonitor is configured and operational
43+
PodMonitorReadyCondition condition.Type = "PodMonitorReady"
44+
4245
// PrometheusReadyCondition Status=True condition which indicates if the Prometheus watch is operational
4346
PrometheusReadyCondition condition.Type = "PrometheusReady"
4447

@@ -169,6 +172,15 @@ const (
169172
// ScrapeConfigUnableToOwnMessage
170173
ScrapeConfigUnableToOwnMessage = "Error occured when trying to own %s"
171174

175+
//
176+
// PodMonitorReady condition messages
177+
//
178+
// PodMonitorReadyInitMessage
179+
PodMonitorReadyInitMessage = "PodMonitor not started"
180+
181+
// PodMonitorUnableToOwnMessage
182+
PodMonitorUnableToOwnMessage = "Error occured when trying to own %s"
183+
172184
//
173185
// PrometheusReady condition messages
174186
//

controllers/metricstorage_controller.go

Lines changed: 114 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ func (r *MetricStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reques
202202
condition.UnknownCondition(condition.ReadyCondition, condition.InitReason, condition.ReadyInitMessage),
203203
condition.UnknownCondition(telemetryv1.MonitoringStackReadyCondition, condition.InitReason, telemetryv1.MonitoringStackReadyInitMessage),
204204
condition.UnknownCondition(telemetryv1.ScrapeConfigReadyCondition, condition.InitReason, telemetryv1.ScrapeConfigReadyInitMessage),
205+
condition.UnknownCondition(telemetryv1.PodMonitorReadyCondition, condition.InitReason, telemetryv1.PodMonitorReadyInitMessage),
205206
condition.UnknownCondition(telemetryv1.DashboardPrometheusRuleReadyCondition, condition.InitReason, telemetryv1.DashboardPrometheusRuleReadyInitMessage),
206207
condition.UnknownCondition(telemetryv1.DashboardPluginReadyCondition, condition.InitReason, telemetryv1.DashboardPluginReadyInitMessage),
207208
condition.UnknownCondition(telemetryv1.DashboardDatasourceReadyCondition, condition.InitReason, telemetryv1.DashboardDatasourceReadyInitMessage),
@@ -247,8 +248,6 @@ func (r *MetricStorageReconciler) reconcileDelete(
247248
return ctrl.Result{}, nil
248249
}
249250

250-
// TODO: call the function appropriately
251-
//
252251
//nolint:all
253252
func (r *MetricStorageReconciler) reconcileUpdate(
254253
ctx context.Context,
@@ -262,6 +261,10 @@ func (r *MetricStorageReconciler) reconcileUpdate(
262261
if err != nil {
263262
return ctrl.Result{}, err
264263
}
264+
err = r.deleteRabbitMQScrapeConfig(ctx, instance)
265+
if err != nil {
266+
return ctrl.Result{}, err
267+
}
265268

266269
Log.Info(fmt.Sprintf("Reconciled Service '%s' update successfully", instance.Name))
267270

@@ -295,6 +298,33 @@ func (r *MetricStorageReconciler) deleteOldServiceMonitors(
295298
return nil
296299
}
297300

301+
// Delete RabbitMQ ScrapeConfig
302+
// A ScrapeConfig for RabbitMQ was last used at the beginning of FR4
303+
func (r *MetricStorageReconciler) deleteRabbitMQScrapeConfig(
304+
ctx context.Context,
305+
instance *telemetryv1.MetricStorage,
306+
) error {
307+
namespacedName := types.NamespacedName{
308+
Name: fmt.Sprintf("%s-rabbitmq", telemetry.ServiceName),
309+
Namespace: instance.Namespace,
310+
}
311+
scrapeConfig := &monv1alpha1.ScrapeConfig{}
312+
err := r.Get(ctx, namespacedName, scrapeConfig)
313+
if err != nil {
314+
if k8s_errors.IsNotFound(err) {
315+
return nil
316+
}
317+
return err
318+
}
319+
if object.CheckOwnerRefExist(instance.UID, scrapeConfig.OwnerReferences) {
320+
err = r.Delete(ctx, scrapeConfig)
321+
if err != nil {
322+
return err
323+
}
324+
}
325+
return nil
326+
}
327+
298328
func (r *MetricStorageReconciler) reconcileNormal(
299329
ctx context.Context,
300330
instance *telemetryv1.MetricStorage,
@@ -453,6 +483,11 @@ func (r *MetricStorageReconciler) reconcileNormal(
453483
return res, err
454484
}
455485

486+
// Deploy PodMonitors
487+
if res, err := r.createPodMonitors(ctx, instance, eventHandler); err != nil {
488+
return res, err
489+
}
490+
456491
if !instance.Spec.DashboardsEnabled {
457492
if res, err := metricstorage.DeleteDashboardObjects(ctx, instance, helper); err != nil {
458493
return res, err
@@ -622,6 +657,14 @@ func (r *MetricStorageReconciler) reconcileNormal(
622657
// when job passed, mark NetworkAttachmentsReadyCondition ready
623658
instance.Status.Conditions.MarkTrue(condition.NetworkAttachmentsReadyCondition, condition.NetworkAttachmentsReadyMessage)
624659

660+
// Handle service update
661+
ctrlResult, err := r.reconcileUpdate(ctx, instance, helper)
662+
if err != nil {
663+
return ctrlResult, err
664+
} else if (ctrlResult != ctrl.Result{}) {
665+
return ctrlResult, nil
666+
}
667+
625668
if instance.Status.Conditions.AllSubConditionIsTrue() {
626669
instance.Status.Conditions.MarkTrue(
627670
condition.ReadyCondition, condition.ReadyMessage)
@@ -684,6 +727,31 @@ func (r *MetricStorageReconciler) prometheusEndpointSecret(
684727
return nil
685728
}
686729

730+
func (r *MetricStorageReconciler) createPodMonitor(
731+
ctx context.Context,
732+
instance *telemetryv1.MetricStorage,
733+
log logr.Logger,
734+
desiredPodMonitor *monv1.PodMonitor,
735+
) error {
736+
podMonitor := &monv1.PodMonitor{
737+
ObjectMeta: metav1.ObjectMeta{
738+
Name: desiredPodMonitor.Name,
739+
Namespace: instance.Namespace,
740+
},
741+
}
742+
op, err := controllerutil.CreateOrPatch(ctx, r.Client, podMonitor, func() error {
743+
desiredPodMonitor.Spec.DeepCopyInto(&podMonitor.Spec)
744+
podMonitor.Labels = desiredPodMonitor.Labels
745+
err := controllerutil.SetControllerReference(instance, podMonitor, r.Scheme)
746+
return err
747+
})
748+
749+
if err == nil && op != controllerutil.OperationResultNone {
750+
log.Info(fmt.Sprintf("PodMonitor %s successfully changed - operation: %s", podMonitor.GetName(), string(op)))
751+
}
752+
return err
753+
}
754+
687755
func (r *MetricStorageReconciler) createServiceScrapeConfig(
688756
ctx context.Context,
689757
instance *telemetryv1.MetricStorage,
@@ -711,6 +779,50 @@ func (r *MetricStorageReconciler) createServiceScrapeConfig(
711779
return err
712780
}
713781

782+
func (r *MetricStorageReconciler) createPodMonitors(
783+
ctx context.Context,
784+
instance *telemetryv1.MetricStorage,
785+
eventHandler handler.EventHandler,
786+
) (ctrl.Result, error) {
787+
Log := r.GetLogger(ctx)
788+
err := r.ensureWatches(ctx, "podmonitors.monitoring.rhobs", &monv1.PodMonitor{}, eventHandler)
789+
if err != nil {
790+
instance.Status.Conditions.MarkFalse(telemetryv1.PodMonitorReadyCondition,
791+
condition.Reason("Can't own PodMonitor resource. The Cluster Observability Operator probably isn't installed"),
792+
condition.SeverityError,
793+
telemetryv1.PodMonitorUnableToOwnMessage, err)
794+
Log.Info("Can't own PodMonitor resource. The Cluster Observability Operator probably isn't installed")
795+
return ctrl.Result{RequeueAfter: telemetryv1.PauseBetweenWatchAttempts}, nil
796+
}
797+
798+
// PodMonitors for RabbitMQ monitoring
799+
// NOTE: We're watching Rabbits and reconciling with each of their change
800+
// that should keep the PodMonitors always up to date.
801+
rabbitList := &rabbitmqv1.RabbitmqClusterList{}
802+
listOpts := []client.ListOption{
803+
client.InNamespace(instance.GetNamespace()),
804+
}
805+
err = r.List(ctx, rabbitList, listOpts...)
806+
if err != nil && !k8s_errors.IsNotFound(err) {
807+
return ctrl.Result{}, err
808+
}
809+
for _, rabbit := range rabbitList.Items {
810+
desiredPodMonitor := metricstorage.RabbitMQPodMonitor(
811+
instance,
812+
serviceLabels,
813+
rabbit.Name,
814+
instance.Spec.PrometheusTLS.Enabled(),
815+
)
816+
err = r.createPodMonitor(ctx, instance, Log, desiredPodMonitor)
817+
if err != nil {
818+
return ctrl.Result{}, err
819+
}
820+
}
821+
822+
instance.Status.Conditions.MarkTrue(telemetryv1.PodMonitorReadyCondition, condition.ReadyMessage)
823+
return ctrl.Result{}, nil
824+
}
825+
714826
func (r *MetricStorageReconciler) createScrapeConfigs(
715827
ctx context.Context,
716828
instance *telemetryv1.MetricStorage,
@@ -760,36 +872,6 @@ func (r *MetricStorageReconciler) createScrapeConfigs(
760872
return ctrl.Result{}, err
761873
}
762874

763-
// ScrapeConfigs for RabbitMQ monitoring
764-
// NOTE: We're watching Rabbits and reconciling with each of their change
765-
// that should keep the targets inside the ScrapeConfig always
766-
// up to date.
767-
rabbitList := &rabbitmqv1.RabbitmqClusterList{}
768-
listOpts := []client.ListOption{
769-
client.InNamespace(instance.GetNamespace()),
770-
}
771-
err = r.List(ctx, rabbitList, listOpts...)
772-
if err != nil && !k8s_errors.IsNotFound(err) {
773-
return ctrl.Result{}, err
774-
}
775-
rabbitTargets := []string{}
776-
for _, rabbit := range rabbitList.Items {
777-
rabbitServerName := fmt.Sprintf("%s.%s.svc", rabbit.Name, rabbit.Namespace)
778-
rabbitTargets = append(rabbitTargets, net.JoinHostPort(rabbitServerName, strconv.Itoa(metricstorage.RabbitMQPrometheusPort)))
779-
}
780-
rabbitCfgName := fmt.Sprintf("%s-rabbitmq", telemetry.ServiceName)
781-
desiredScrapeConfig = metricstorage.ScrapeConfig(
782-
instance,
783-
serviceLabels,
784-
rabbitTargets,
785-
instance.Spec.PrometheusTLS.Enabled(),
786-
)
787-
err = r.createServiceScrapeConfig(ctx, instance, Log, "RabbitMQ",
788-
rabbitCfgName, desiredScrapeConfig)
789-
if err != nil {
790-
return ctrl.Result{}, err
791-
}
792-
793875
// mysqld exporter
794876
ceilometerNamespacedName := types.NamespacedName{
795877
Name: ceilometer.ServiceName,

0 commit comments

Comments
 (0)