1717//
1818// Copyright holder is ArangoDB GmbH, Cologne, Germany
1919//
20- // Author Ewout Prangsma
21- // Author Tomasz Mielech
22- //
2320
2421package reconcile
2522
@@ -28,22 +25,23 @@ import (
2825 "fmt"
2926 "time"
3027
31- "github.com/arangodb/kube-arangodb/pkg/metrics"
32-
33- "github.com/arangodb/kube-arangodb/pkg/util/errors"
34- inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
35-
3628 "github.com/rs/zerolog"
3729 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3830
3931 api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1"
32+ "github.com/arangodb/kube-arangodb/pkg/metrics"
33+ "github.com/arangodb/kube-arangodb/pkg/util/errors"
4034 "github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
35+ inspectorInterface "github.com/arangodb/kube-arangodb/pkg/util/k8sutil/inspector"
4136)
4237
4338var (
4439 actionsGeneratedMetrics = metrics .MustRegisterCounterVec (reconciliationComponent , "actions_generated" , "Number of actions added to the plan" , metrics .DeploymentName , metrics .ActionName , metrics .ActionPriority )
4540 actionsSucceededMetrics = metrics .MustRegisterCounterVec (reconciliationComponent , "actions_succeeded" , "Number of succeeded actions" , metrics .DeploymentName , metrics .ActionName , metrics .ActionPriority )
4641 actionsFailedMetrics = metrics .MustRegisterCounterVec (reconciliationComponent , "actions_failed" , "Number of failed actions" , metrics .DeploymentName , metrics .ActionName , metrics .ActionPriority )
42+ actionsCurrentPlan = metrics .MustRegisterGaugeVec (reconciliationComponent , "actions_current" ,
43+ "The current number of the plan actions are being performed" ,
44+ metrics .DeploymentName , "group" , "member" , "name" , "priority" )
4745)
4846
4947type planner interface {
@@ -179,23 +177,40 @@ func (d *Reconciler) executePlan(ctx context.Context, cachedStatus inspectorInte
179177
180178 done , abort , recall , err := d .executeAction (ctx , log , planAction , action )
181179 if err != nil {
180+ // The Plan will be cleaned up, so no actions will be in the queue.
181+ actionsCurrentPlan .WithLabelValues (d .context .GetName (), planAction .Group .AsRole (), planAction .MemberID ,
182+ planAction .Type .String (), pg .Type ()).Set (0.0 )
183+
182184 actionsFailedMetrics .WithLabelValues (d .context .GetName (), planAction .Type .String (), pg .Type ()).Inc ()
183185 return nil , false , errors .WithStack (err )
184186 }
185187
186188 if abort {
189+ // The Plan will be cleaned up, so no actions will be in the queue.
190+ actionsCurrentPlan .WithLabelValues (d .context .GetName (), planAction .Group .AsRole (), planAction .MemberID ,
191+ planAction .Type .String (), pg .Type ()).Set (0.0 )
192+
187193 actionsFailedMetrics .WithLabelValues (d .context .GetName (), planAction .Type .String (), pg .Type ()).Inc ()
188194 return nil , true , nil
189195 }
190196
191197 if done {
198+ if planAction .IsStarted () {
199+ // The below metrics was increased in the previous iteration, so it should be decreased now.
200+ // If the action hasn't been started in this iteration then the metrics have not been increased.
201+ actionsCurrentPlan .WithLabelValues (d .context .GetName (), planAction .Group .AsRole (), planAction .MemberID ,
202+ planAction .Type .String (), pg .Type ()).Dec ()
203+ }
204+
192205 actionsSucceededMetrics .WithLabelValues (d .context .GetName (), planAction .Type .String (), pg .Type ()).Inc ()
193206 if len (plan ) > 1 {
194207 plan = plan [1 :]
195208 if plan [0 ].MemberID == api .MemberIDPreviousAction {
196209 plan [0 ].MemberID = action .MemberID ()
197210 }
198211 } else {
212+ actionsCurrentPlan .WithLabelValues (d .context .GetName (), planAction .Group .AsRole (), planAction .MemberID ,
213+ planAction .Type .String (), pg .Type ()).Set (0.0 )
199214 plan = nil
200215 }
201216
@@ -218,7 +233,11 @@ func (d *Reconciler) executePlan(ctx context.Context, cachedStatus inspectorInte
218233 return nil , false , errors .WithStack (err )
219234 }
220235 } else {
221- if plan [0 ].StartTime .IsZero () {
236+ if ! plan [0 ].IsStarted () {
237+ // The action has been started in this iteration, but it is not finished yet.
238+ actionsCurrentPlan .WithLabelValues (d .context .GetName (), planAction .Group .AsRole (), planAction .MemberID ,
239+ planAction .Type .String (), pg .Type ()).Inc ()
240+
222241 now := metav1 .Now ()
223242 plan [0 ].StartTime = & now
224243 }
@@ -229,7 +248,7 @@ func (d *Reconciler) executePlan(ctx context.Context, cachedStatus inspectorInte
229248}
230249
231250func (d * Reconciler ) executeAction (ctx context.Context , log zerolog.Logger , planAction api.Action , action Action ) (done , abort , callAgain bool , err error ) {
232- if planAction .StartTime . IsZero () {
251+ if ! planAction .IsStarted () {
233252 // Not started yet
234253 ready , err := action .Start (ctx )
235254 if err != nil {
0 commit comments