cockroachdb
diff --git a/‎pkg/kv/kvserver/allocator/mmaprototype/allocator.go‎
Lines changed: 2 additions & 0 deletions b/‎pkg/kv/kvserver/allocator/mmaprototype/allocator.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pkg/kv/kvserver/allocator/mmaprototype/allocator_state.go‎
Lines changed: 40 additions & 12 deletions b/‎pkg/kv/kvserver/allocator/mmaprototype/allocator_state.go‎
Lines changed: 40 additions & 12 deletions
@@ -59,6 +59,8 @@ type Allocator interface {
 	// Calls to AdjustPendingChangesDisposition must be correctly sequenced with
 	// full state updates from the local node provided in
 	// ProcessNodeLoadResponse.
+	//
+	// REQUIRES: len(changes) > 0 and all changes are to the same range.
 	AdjustPendingChangesDisposition(changes []ChangeID, success bool)
 
 	// RegisterExternalChanges informs this allocator about yet to complete
 
@@ -546,9 +546,8 @@ func (a *allocatorState) rebalanceStores(
 				}
 				leaseChanges := MakeLeaseTransferChanges(
 					rangeID, rstate.replicas, rstate.load, addTarget, removeTarget)
-				if valid, reason := a.cs.preCheckOnApplyReplicaChanges(leaseChanges[:]); !valid {
-					panic(fmt.Sprintf("pre-check failed for lease transfer %v: due to %v",
-						leaseChanges, reason))
+				if err := a.cs.preCheckOnApplyReplicaChanges(leaseChanges[:]); err != nil {
+					panic(errors.Wrapf(err, "pre-check failed for lease transfer %v", leaseChanges))
 				}
 				pendingChanges := a.cs.createPendingChanges(leaseChanges[:]...)
 				changes = append(changes, PendingRangeChange{
@@ -764,9 +763,9 @@ func (a *allocatorState) rebalanceStores(
 			}
 			replicaChanges := makeRebalanceReplicaChanges(
 				rangeID, rstate.replicas, rstate.load, addTarget, removeTarget)
-			if valid, reason := a.cs.preCheckOnApplyReplicaChanges(replicaChanges[:]); !valid {
-				panic(fmt.Sprintf("pre-check failed for replica changes: %v due to %v for %v",
-					replicaChanges, reason, rangeID))
+			if err = a.cs.preCheckOnApplyReplicaChanges(replicaChanges[:]); err != nil {
+				panic(errors.Wrapf(err, "pre-check failed for replica changes: %v for %v",
+					replicaChanges, rangeID))
 			}
 			pendingChanges := a.cs.createPendingChanges(replicaChanges[:]...)
 			changes = append(changes, PendingRangeChange{
@@ -819,27 +818,56 @@ func (a *allocatorState) ProcessStoreLoadMsg(ctx context.Context, msg *StoreLoad
 func (a *allocatorState) AdjustPendingChangesDisposition(changeIDs []ChangeID, success bool) {
 	a.mu.Lock()
 	defer a.mu.Unlock()
+	// NB: It is possible that some of the changeIDs have already been enacted
+	// via StoreLeaseholderMsg, and even been garbage collected. So no
+	// assumption can be made about whether these changeIDs will be found in the
+	// allocator's state.
 	if !success {
+		// Gather the changes that are found and need to be undone.
 		replicaChanges := make([]ReplicaChange, 0, len(changeIDs))
 		for _, changeID := range changeIDs {
 			change, ok := a.cs.pendingChanges[changeID]
 			if !ok {
+				continue
+			}
+			rs, ok := a.cs.ranges[change.rangeID]
+			if !ok {
+				panic(errors.AssertionFailedf("range %v not found in cluster state", change.rangeID))
+			}
+			if rs.pendingChangeNoRollback {
+				// All the changes are to the same range, so return.
 				return
 			}
 			replicaChanges = append(replicaChanges, change.ReplicaChange)
 		}
-		if valid, reason := a.cs.preCheckOnUndoReplicaChanges(replicaChanges); !valid {
-			log.KvDistribution.Infof(context.Background(), "did not undo change %v: due to %v", changeIDs, reason)
+		if len(replicaChanges) == 0 {
+			return
+		}
+		// Check that we can undo these changes. If not, log and return.
+		if err := a.cs.preCheckOnUndoReplicaChanges(replicaChanges); err != nil {
+			// TODO(sumeer): we should be able to panic here, once the interface
+			// contract says that all the proposed changes must be included in
+			// changeIDs. Without that contract, there may be a pair of changes
+			// (remove replica and lease from s1), (add replica and lease to s2),
+			// and the caller can provide the first changeID only, and the undo
+			// would cause two leaseholders. The pre-check would catch that here.
+			log.KvDistribution.Infof(context.Background(), "did not undo change %v: due to %v", changeIDs, err)
 			return
 		}
 	}
 
 	for _, changeID := range changeIDs {
-		// We set !requireFound, since a StoreLeaseholderMsg that happened after
-		// the pending change was created and before this call to
+		// We set !requireFound, since some of these pending changes may no longer
+		// exist in the allocator's state. For example, a StoreLeaseholderMsg that
+		// happened after the pending change was created and before this call to
 		// AdjustPendingChangesDisposition may have already removed the pending
 		// change.
 		if success {
+			// TODO(sumeer): this code is implicitly assuming that all the changes
+			// on the rangeState are being enacted. And that is true of the current
+			// callers. We should explicitly state the assumption in the interface.
+			// Because if only some are being enacted, we ought to set
+			// pendingChangeNoRollback, and we don't bother to.
 			a.cs.pendingChangeEnacted(changeID, a.cs.ts.Now(), false)
 		} else {
 			a.cs.undoPendingChange(changeID, false)
@@ -852,10 +880,10 @@ func (a *allocatorState) AdjustPendingChangesDisposition(changeIDs []ChangeID, s
 func (a *allocatorState) RegisterExternalChanges(changes []ReplicaChange) []ChangeID {
 	a.mu.Lock()
 	defer a.mu.Unlock()
-	if valid, reason := a.cs.preCheckOnApplyReplicaChanges(changes); !valid {
+	if err := a.cs.preCheckOnApplyReplicaChanges(changes); err != nil {
 		a.mmaMetrics.ExternalFailedToRegister.Inc(1)
 		log.KvDistribution.Infof(context.Background(),
-			"did not register external changes: due to %v", reason)
+			"did not register external changes: due to %v", err)
 		return nil
 	} else {
 		a.mmaMetrics.ExternaRegisterSuccess.Inc(1)