-
Notifications
You must be signed in to change notification settings - Fork 38
feat: Add placement status metrics #1077
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
623f26d
8486603
8a89ce8
0200684
c6a8490
ed646ef
a6026c9
222867e
f337151
e89bb6e
82a9a65
c74a770
4ed96c6
d446d00
fef4bb6
07ffce0
1cec5f4
9bbdf16
d3e408d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -94,8 +94,8 @@ func (r *Reconciler) handleDelete(ctx context.Context, crp *fleetv1beta1.Cluster | |
| return ctrl.Result{}, err | ||
| } | ||
|
|
||
| metrics.FleetPlacementComplete.DeletePartialMatch(prometheus.Labels{"name": crp.Name}) | ||
| metrics.FleetPlacementStatus.DeletePartialMatch(prometheus.Labels{"name": crp.Name}) | ||
| metrics.FleetPlacementCompleteLastTimeStampSeconds.DeletePartialMatch(prometheus.Labels{"name": crp.Name}) | ||
| metrics.FleetPlacementStatusLastTimeStampSeconds.DeletePartialMatch(prometheus.Labels{"name": crp.Name}) | ||
| controllerutil.RemoveFinalizer(crp, fleetv1beta1.ClusterResourcePlacementCleanupFinalizer) | ||
| if err := r.Client.Update(ctx, crp); err != nil { | ||
| klog.ErrorS(err, "Failed to remove crp finalizer", "clusterResourcePlacement", crpKObj) | ||
|
|
@@ -178,8 +178,8 @@ func (r *Reconciler) handleUpdate(ctx context.Context, crp *fleetv1beta1.Cluster | |
| klog.ErrorS(updateErr, "Failed to update the status", "clusterResourcePlacement", crpKObj) | ||
| return ctrl.Result{}, controller.NewUpdateIgnoreConflictError(updateErr) | ||
| } | ||
| metrics.FleetPlacementStatus.DeletePartialMatch(prometheus.Labels{"name": crp.Name}) | ||
| metrics.FleetPlacementStatus.WithLabelValues(crp.Name, strconv.FormatInt(crp.Generation, 10), scheduleCondition.Type, string(scheduleCondition.Status)).SetToCurrentTime() | ||
| metrics.FleetPlacementStatusLastTimeStampSeconds.DeletePartialMatch(prometheus.Labels{"name": crp.Name}) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you can call |
||
| metrics.FleetPlacementStatusLastTimeStampSeconds.WithLabelValues(crp.Name, strconv.FormatInt(crp.Generation, 10), scheduleCondition.Type, string(scheduleCondition.Status)).SetToCurrentTime() | ||
| return ctrl.Result{}, err | ||
| } | ||
|
|
||
|
|
@@ -226,12 +226,12 @@ func (r *Reconciler) handleUpdate(ctx context.Context, crp *fleetv1beta1.Cluster | |
| klog.V(2).InfoS("Placement has finished the rollout process and reached the desired status", "clusterResourcePlacement", crpKObj, "generation", crp.Generation) | ||
| r.Recorder.Event(crp, corev1.EventTypeNormal, "PlacementRolloutCompleted", "Placement has finished the rollout process and reached the desired status") | ||
| } | ||
| metrics.FleetPlacementComplete.WithLabelValues(crp.Name, "true").SetToCurrentTime() | ||
| metrics.FleetPlacementCompleteLastTimeStampSeconds.WithLabelValues(crp.Name, "true").SetToCurrentTime() | ||
| // We don't need to requeue any request now by watching the binding changes | ||
| return ctrl.Result{}, nil | ||
| } | ||
|
|
||
| metrics.FleetPlacementComplete.WithLabelValues(crp.Name, "false").SetToCurrentTime() | ||
| metrics.FleetPlacementCompleteLastTimeStampSeconds.WithLabelValues(crp.Name, "false").SetToCurrentTime() | ||
|
|
||
| if !isClusterScheduled { | ||
| // Note: | ||
|
|
@@ -1042,27 +1042,26 @@ func isCRPScheduled(crp *fleetv1beta1.ClusterResourcePlacement) bool { | |
| } | ||
|
|
||
| func emitPlacementStatusMetric(crp *fleetv1beta1.ClusterResourcePlacement) { | ||
| metrics.FleetPlacementStatus.DeletePartialMatch(prometheus.Labels{"name": crp.Name}) | ||
| // Check CRP Scheduled condition | ||
| status := "nil" | ||
| cond := crp.GetCondition(string(fleetv1beta1.ClusterResourcePlacementScheduledConditionType)) | ||
| if !condition.IsConditionStatusTrue(cond, crp.Generation) { | ||
| if cond != nil { | ||
| status = string(cond.Status) | ||
| } | ||
| metrics.FleetPlacementStatus.WithLabelValues(crp.Name, strconv.FormatInt(crp.Generation, 10), string(fleetv1beta1.ClusterResourcePlacementScheduledConditionType), status).SetToCurrentTime() | ||
| metrics.FleetPlacementStatusLastTimeStampSeconds.WithLabelValues(crp.Name, strconv.FormatInt(crp.Generation, 10), string(fleetv1beta1.ClusterResourcePlacementScheduledConditionType), status).SetToCurrentTime() | ||
| return | ||
| } | ||
|
|
||
| // Check CRP expected conditions | ||
| expectedCondTypes := determineExpectedCRPAndResourcePlacementStatusCondType(crp) | ||
britaniar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| for _, condType := range expectedCondTypes { | ||
| cond := crp.GetCondition(string(condType.ClusterResourcePlacementConditionType())) | ||
| cond = crp.GetCondition(string(condType.ClusterResourcePlacementConditionType())) | ||
| if !condition.IsConditionStatusTrue(cond, crp.Generation) { | ||
| if cond != nil { | ||
| status = string(cond.Status) | ||
| } | ||
| metrics.FleetPlacementStatus.WithLabelValues(crp.Name, strconv.FormatInt(crp.Generation, 10), string(condType.ClusterResourcePlacementConditionType()), status).SetToCurrentTime() | ||
| metrics.FleetPlacementStatusLastTimeStampSeconds.WithLabelValues(crp.Name, strconv.FormatInt(crp.Generation, 10), string(condType.ClusterResourcePlacementConditionType()), status).SetToCurrentTime() | ||
| return | ||
| } | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are two options:
Personally i prefer option 2
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I personally prefer option 1. The completed metrics makes it a lot easier to query whether the CRP is completed or not for dashboards. If in the future, not sure if we will, we add more final conditions (like Available and ReportDiff) then we will have to continue changing the alert to include these cases. The second metric was just to provide more information in the alert, while the first helps with dashboard and easier to filter out through the logs.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current status metric never has a "good" status. How are we going to use this metric? I feel this metric is not independent: we always need to use it along with the completed metric, we always need to verify if crp is still incomplete first and then refer to this metric to see why it's still incomplete.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agree with Wantong, they seem coupled, not sure how to join them but with different timestamp.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Talked to Ryan. We will couple them. |
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder why move the delete earlier instead of replacing them at line 105?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I asked to move these earlier. If the pod stops when finalizer is removed but before metrics are removed, they would be left never deleted.