Skip to content

Commit 0d37711

Browse files
committed
kvserver: move bq.enqueueAdd update to be outside of defer
Previously, we updated bq.enqueueAdd inside the defer statement of addInternal. This was incorrect because we may return queued = true for a replica already processing and was marked for requeue. That replica would later be requeued in finishProcessingReplica, incrementing the metric again, lead to double counting.
1 parent 6268773 commit 0d37711

File tree

1 file changed

+27
-7
lines changed

1 file changed

+27
-7
lines changed

pkg/kv/kvserver/queue.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -387,11 +387,21 @@ type queueConfig struct {
387387
// replicas that have been destroyed but not GCed.
388388
processDestroyedReplicas bool
389389
// processTimeout returns the timeout for processing a replica.
390-
processTimeoutFunc queueProcessTimeoutFunc
391-
enqueueAdd *metric.Counter
390+
processTimeoutFunc queueProcessTimeoutFunc
391+
// enqueueAdd is a counter of replicas that were successfully added to the
392+
// queue.
393+
enqueueAdd *metric.Counter
394+
// enqueueFailedPrecondition is a counter of replicas that failed the
395+
// precondition checks and were therefore not added to the queue.
392396
enqueueFailedPrecondition *metric.Counter
393-
enqueueNoAction *metric.Counter
394-
enqueueUnexpectedError *metric.Counter
397+
// enqueueNoAction is a counter of replicas that had ShouldQueue determine no
398+
// action was needed and were therefore not added to the queue.
399+
enqueueNoAction *metric.Counter
400+
// enqueueUnexpectedError is a counter of replicas that were expected to be
401+
// enqueued (either had ShouldQueue return true or the caller explicitly
402+
// requested to be added to the queue directly), but failed to be enqueued
403+
// during the enqueue process (such as Async was rated limited).
404+
enqueueUnexpectedError *metric.Counter
395405
// successes is a counter of replicas processed successfully.
396406
successes *metric.Counter
397407
// failures is a counter of replicas which failed processing.
@@ -787,6 +797,14 @@ func (bq *baseQueue) updateMetricsOnEnqueueUnexpectedError() {
787797
}
788798
}
789799

800+
// updateMetricsOnEnqueueAdd updates the metrics when a replica is successfully
801+
// added to the queue.
802+
func (bq *baseQueue) updateMetricsOnEnqueueAdd() {
803+
if bq.enqueueAdd != nil {
804+
bq.enqueueAdd.Inc(1)
805+
}
806+
}
807+
790808
func (bq *baseQueue) maybeAdd(ctx context.Context, repl replicaInQueue, now hlc.ClockTimestamp) {
791809
ctx = repl.AnnotateCtx(ctx)
792810
ctx = bq.AnnotateCtx(ctx)
@@ -887,9 +905,7 @@ func (bq *baseQueue) addInternal(
887905
cb processCallback,
888906
) (added bool, err error) {
889907
defer func() {
890-
if added && bq.enqueueAdd != nil {
891-
bq.enqueueAdd.Inc(1)
892-
}
908+
// INVARIANT: added => err == nil.
893909
if err != nil {
894910
cb.onEnqueueResult(-1 /* indexOnHeap */, err)
895911
bq.updateMetricsOnEnqueueUnexpectedError()
@@ -985,6 +1001,10 @@ func (bq *baseQueue) addInternal(
9851001
default:
9861002
// No need to signal again.
9871003
}
1004+
// Note that we are bumping enqueueAdd here instead of during defer to avoid
1005+
// treating requeuing a processing replica as newly added. They will be
1006+
// re-added to the queue later which will double count them.
1007+
bq.updateMetricsOnEnqueueAdd()
9881008
// Note: it may already be dropped or dropped afterwards.
9891009
cb.onEnqueueResult(item.index /*indexOnHeap*/, nil)
9901010
return true, nil

0 commit comments

Comments
 (0)