Skip to content

Commit 47425ed

Browse files
committed
kvserver: move bq.enqueueAdd update to be outside of defer
Previously, we updated bq.enqueueAdd inside the defer statement of addInternal. This was incorrect because we may return queued = true for a replica already processing and was marked for requeue. That replica would later be requeued in finishProcessingReplica, incrementing the metric again, lead to double counting.
1 parent fc26f6c commit 47425ed

File tree

1 file changed

+27
-7
lines changed

1 file changed

+27
-7
lines changed

pkg/kv/kvserver/queue.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -388,11 +388,21 @@ type queueConfig struct {
388388
// replicas that have been destroyed but not GCed.
389389
processDestroyedReplicas bool
390390
// processTimeout returns the timeout for processing a replica.
391-
processTimeoutFunc queueProcessTimeoutFunc
392-
enqueueAdd *metric.Counter
391+
processTimeoutFunc queueProcessTimeoutFunc
392+
// enqueueAdd is a counter of replicas that were successfully added to the
393+
// queue.
394+
enqueueAdd *metric.Counter
395+
// enqueueFailedPrecondition is a counter of replicas that failed the
396+
// precondition checks and were therefore not added to the queue.
393397
enqueueFailedPrecondition *metric.Counter
394-
enqueueNoAction *metric.Counter
395-
enqueueUnexpectedError *metric.Counter
398+
// enqueueNoAction is a counter of replicas that had ShouldQueue determine no
399+
// action was needed and were therefore not added to the queue.
400+
enqueueNoAction *metric.Counter
401+
// enqueueUnexpectedError is a counter of replicas that were expected to be
402+
// enqueued (either had ShouldQueue return true or the caller explicitly
403+
// requested to be added to the queue directly), but failed to be enqueued
404+
// during the enqueue process (such as Async was rated limited).
405+
enqueueUnexpectedError *metric.Counter
396406
// successes is a counter of replicas processed successfully.
397407
successes *metric.Counter
398408
// failures is a counter of replicas which failed processing.
@@ -791,6 +801,14 @@ func (bq *baseQueue) updateMetricsOnEnqueueUnexpectedError() {
791801
}
792802
}
793803

804+
// updateMetricsOnEnqueueAdd updates the metrics when a replica is successfully
805+
// added to the queue.
806+
func (bq *baseQueue) updateMetricsOnEnqueueAdd() {
807+
if bq.enqueueAdd != nil {
808+
bq.enqueueAdd.Inc(1)
809+
}
810+
}
811+
794812
func (bq *baseQueue) maybeAdd(ctx context.Context, repl replicaInQueue, now hlc.ClockTimestamp) {
795813
ctx = repl.AnnotateCtx(ctx)
796814
ctx = bq.AnnotateCtx(ctx)
@@ -891,9 +909,7 @@ func (bq *baseQueue) addInternal(
891909
cb processCallback,
892910
) (added bool, err error) {
893911
defer func() {
894-
if added && bq.enqueueAdd != nil {
895-
bq.enqueueAdd.Inc(1)
896-
}
912+
// INVARIANT: added => err == nil.
897913
if err != nil {
898914
cb.onEnqueueResult(-1 /* indexOnHeap */, err)
899915
bq.updateMetricsOnEnqueueUnexpectedError()
@@ -989,6 +1005,10 @@ func (bq *baseQueue) addInternal(
9891005
default:
9901006
// No need to signal again.
9911007
}
1008+
// Note that we are bumping enqueueAdd here instead of during defer to avoid
1009+
// treating requeuing a processing replica as newly added. They will be
1010+
// re-added to the queue later which will double count them.
1011+
bq.updateMetricsOnEnqueueAdd()
9921012
// Note: it may already be dropped or dropped afterwards.
9931013
cb.onEnqueueResult(item.index /*indexOnHeap*/, nil)
9941014
return true, nil

0 commit comments

Comments
 (0)