Skip to content

Commit 061d1d7

Browse files
authored
Add metrics for the current plan actions (#879)
1 parent f25a7d1 commit 061d1d7

File tree

4 files changed

+40
-10
lines changed

4 files changed

+40
-10
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
- Add core containers names to follow their terminations
1313
- Add ArangoJob and Apps Operator
1414
- Use Go 1.17
15+
- Add metrics for the plan actions
1516

1617
## [1.2.6](https://github.com/arangodb/kube-arangodb/tree/1.2.6) (2021-12-15)
1718
- Add ArangoBackup backoff functionality

pkg/apis/deployment/v1/plan.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,11 @@ func (a Action) SetImage(image string) Action {
301301
return a
302302
}
303303

304+
// IsStarted returns true if the action has been started already.
305+
func (a Action) IsStarted() bool {
306+
return !a.StartTime.IsZero()
307+
}
308+
304309
// AsPlan parse action list into plan
305310
func AsPlan(a []Action) Plan {
306311
return a

pkg/apis/deployment/v2alpha1/plan.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,11 @@ func (a Action) SetImage(image string) Action {
301301
return a
302302
}
303303

304+
// IsStarted returns true if the action has been started already.
305+
func (a Action) IsStarted() bool {
306+
return !a.StartTime.IsZero()
307+
}
308+
304309
// AsPlan parse action list into plan
305310
func AsPlan(a []Action) Plan {
306311
return a

pkg/deployment/reconcile/plan_executor.go

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@
1717
//
1818
// Copyright holder is ArangoDB GmbH, Cologne, Germany
1919
//
20-
// Author Ewout Prangsma
21-
// Author Tomasz Mielech
22-
//
2320

2421
package reconcile
2522

@@ -28,22 +25,23 @@ import (
2825
"fmt"
2926
"time"
3027

31-
"github.com/arangodb/kube-arangodb/pkg/metrics"
32-
33-
"github.com/arangodb/kube-arangodb/pkg/util/errors"
34-
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
35-
3628
"github.com/rs/zerolog"
3729
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3830

3931
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
32+
"github.com/arangodb/kube-arangodb/pkg/metrics"
33+
"github.com/arangodb/kube-arangodb/pkg/util/errors"
4034
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
35+
inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
4136
)
4237

4338
var (
4439
actionsGeneratedMetrics = metrics.MustRegisterCounterVec(reconciliationComponent, "actions_generated", "Number of actions added to the plan", metrics.DeploymentName, metrics.ActionName, metrics.ActionPriority)
4540
actionsSucceededMetrics = metrics.MustRegisterCounterVec(reconciliationComponent, "actions_succeeded", "Number of succeeded actions", metrics.DeploymentName, metrics.ActionName, metrics.ActionPriority)
4641
actionsFailedMetrics = metrics.MustRegisterCounterVec(reconciliationComponent, "actions_failed", "Number of failed actions", metrics.DeploymentName, metrics.ActionName, metrics.ActionPriority)
42+
actionsCurrentPlan = metrics.MustRegisterGaugeVec(reconciliationComponent, "actions_current",
43+
"The current number of the plan actions are being performed",
44+
metrics.DeploymentName, "group", "member", "name", "priority")
4745
)
4846

4947
type planner interface {
@@ -179,23 +177,40 @@ func (d *Reconciler) executePlan(ctx context.Context, cachedStatus inspectorInte
179177

180178
done, abort, recall, err := d.executeAction(ctx, log, planAction, action)
181179
if err != nil {
180+
// The Plan will be cleaned up, so no actions will be in the queue.
181+
actionsCurrentPlan.WithLabelValues(d.context.GetName(), planAction.Group.AsRole(), planAction.MemberID,
182+
planAction.Type.String(), pg.Type()).Set(0.0)
183+
182184
actionsFailedMetrics.WithLabelValues(d.context.GetName(), planAction.Type.String(), pg.Type()).Inc()
183185
return nil, false, errors.WithStack(err)
184186
}
185187

186188
if abort {
189+
// The Plan will be cleaned up, so no actions will be in the queue.
190+
actionsCurrentPlan.WithLabelValues(d.context.GetName(), planAction.Group.AsRole(), planAction.MemberID,
191+
planAction.Type.String(), pg.Type()).Set(0.0)
192+
187193
actionsFailedMetrics.WithLabelValues(d.context.GetName(), planAction.Type.String(), pg.Type()).Inc()
188194
return nil, true, nil
189195
}
190196

191197
if done {
198+
if planAction.IsStarted() {
199+
// The below metrics was increased in the previous iteration, so it should be decreased now.
200+
// If the action hasn't been started in this iteration then the metrics have not been increased.
201+
actionsCurrentPlan.WithLabelValues(d.context.GetName(), planAction.Group.AsRole(), planAction.MemberID,
202+
planAction.Type.String(), pg.Type()).Dec()
203+
}
204+
192205
actionsSucceededMetrics.WithLabelValues(d.context.GetName(), planAction.Type.String(), pg.Type()).Inc()
193206
if len(plan) > 1 {
194207
plan = plan[1:]
195208
if plan[0].MemberID == api.MemberIDPreviousAction {
196209
plan[0].MemberID = action.MemberID()
197210
}
198211
} else {
212+
actionsCurrentPlan.WithLabelValues(d.context.GetName(), planAction.Group.AsRole(), planAction.MemberID,
213+
planAction.Type.String(), pg.Type()).Set(0.0)
199214
plan = nil
200215
}
201216

@@ -218,7 +233,11 @@ func (d *Reconciler) executePlan(ctx context.Context, cachedStatus inspectorInte
218233
return nil, false, errors.WithStack(err)
219234
}
220235
} else {
221-
if plan[0].StartTime.IsZero() {
236+
if !plan[0].IsStarted() {
237+
// The action has been started in this iteration, but it is not finished yet.
238+
actionsCurrentPlan.WithLabelValues(d.context.GetName(), planAction.Group.AsRole(), planAction.MemberID,
239+
planAction.Type.String(), pg.Type()).Inc()
240+
222241
now := metav1.Now()
223242
plan[0].StartTime = &now
224243
}
@@ -229,7 +248,7 @@ func (d *Reconciler) executePlan(ctx context.Context, cachedStatus inspectorInte
229248
}
230249

231250
func (d *Reconciler) executeAction(ctx context.Context, log zerolog.Logger, planAction api.Action, action Action) (done, abort, callAgain bool, err error) {
232-
if planAction.StartTime.IsZero() {
251+
if !planAction.IsStarted() {
233252
// Not started yet
234253
ready, err := action.Start(ctx)
235254
if err != nil {

0 commit comments

Comments
 (0)