Skip to content

Commit 8e9e4c1

Browse files
committed
kvserver: move priority inversion check before applyChange
Previously, we checked for priority inversion before planning errors, which meant we could return requeue = true even when a planning error occurred. This commit changes it so that planning errors should take higher precedence over a priority inversion error. rq.processOneChange now returns early if there is a planning error and only check for priority inversion right before applying a change.
1 parent 9ab3dc0 commit 8e9e4c1

File tree

1 file changed

+22
-18
lines changed

1 file changed

+22
-18
lines changed

pkg/kv/kvserver/replicate_queue.go

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -926,24 +926,6 @@ func (rq *replicateQueue) processOneChange(
926926
change, err := rq.planner.PlanOneChange(
927927
ctx, repl, desc, conf, plan.PlannerOptions{Scatter: scatter})
928928

929-
if PriorityInversionRequeue.Get(&rq.store.cfg.Settings.SV) {
930-
if inversion, shouldRequeue := allocatorimpl.CheckPriorityInversion(priorityAtEnqueue, change.Action); inversion {
931-
if priorityInversionLogEveryN.ShouldLog() {
932-
log.KvDistribution.Infof(ctx,
933-
"priority inversion during process: shouldRequeue = %t action=%s, priority=%v, enqueuePriority=%v",
934-
shouldRequeue, change.Action, change.Action.Priority(), priorityAtEnqueue)
935-
}
936-
if shouldRequeue {
937-
// Return true to requeue the range. Return the error to ensure it is
938-
// logged and tracked in replicate queue bq.failures metrics. See
939-
// replicateQueue.process for details.
940-
return true /*requeue*/, maybeAnnotateDecommissionErr(
941-
errors.Errorf("requing due to priority inversion: action=%s, priority=%v, enqueuePriority=%v",
942-
change.Action, change.Action.Priority(), priorityAtEnqueue), change.Action)
943-
}
944-
}
945-
}
946-
947929
// When there is an error planning a change, return the error immediately
948930
// and do not requeue. It is unlikely that the range or storepool state
949931
// will change quickly enough in order to not get the same error and
@@ -968,6 +950,28 @@ func (rq *replicateQueue) processOneChange(
968950
return false, nil
969951
}
970952

953+
// At this point, planning returned no error, and we're not doing a dry run.
954+
// Check for priority inversion if enabled. If detected, we may requeue the
955+
// replica to return an error early to requeue the range instead to avoid
956+
// starving other higher priority work.
957+
if PriorityInversionRequeue.Get(&rq.store.cfg.Settings.SV) {
958+
if inversion, shouldRequeue := allocatorimpl.CheckPriorityInversion(priorityAtEnqueue, change.Action); inversion {
959+
if priorityInversionLogEveryN.ShouldLog() {
960+
log.KvDistribution.Infof(ctx,
961+
"priority inversion during process: shouldRequeue = %t action=%s, priority=%v, enqueuePriority=%v",
962+
shouldRequeue, change.Action, change.Action.Priority(), priorityAtEnqueue)
963+
}
964+
if shouldRequeue {
965+
// Return true to requeue the range. Return the error to ensure it is
966+
// logged and tracked in replicate queue bq.failures metrics. See
967+
// replicateQueue.process for details.
968+
return true /*requeue*/, maybeAnnotateDecommissionErr(
969+
errors.Errorf("requing due to priority inversion: action=%s, priority=%v, enqueuePriority=%v",
970+
change.Action, change.Action.Priority(), priorityAtEnqueue), change.Action)
971+
}
972+
}
973+
}
974+
971975
// Track the metrics generated during planning. These are not updated
972976
// directly during planning to avoid pushing the dryRun flag into every
973977
// function.

0 commit comments

Comments
 (0)