Skip to content

Commit c8a218a

Browse files
authored
Merge pull request #152460 from iskettaneh/backport25.2-144303
release-25.2: storepool: lock individual storepool details instead of all details
2 parents 6958680 + 080d967 commit c8a218a

16 files changed

+373
-196
lines changed

pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -480,60 +480,68 @@ func mockStorePool(
480480
decommissionedStoreIDs []roachpb.StoreID,
481481
suspectedStoreIDs []roachpb.StoreID,
482482
) {
483-
storePool.DetailsMu.Lock()
484-
defer storePool.DetailsMu.Unlock()
485-
486483
liveNodeSet := map[roachpb.NodeID]livenesspb.NodeLivenessStatus{}
487-
storePool.DetailsMu.StoreDetails = map[roachpb.StoreID]*storepool.StoreDetail{}
488484
for _, storeID := range aliveStoreIDs {
489485
liveNodeSet[roachpb.NodeID(storeID)] = livenesspb.NodeLivenessStatus_LIVE
490-
detail := storePool.GetStoreDetailLocked(storeID)
486+
detail := storePool.GetStoreDetail(storeID)
487+
detail.Lock()
491488
detail.Desc = &roachpb.StoreDescriptor{
492489
StoreID: storeID,
493490
Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)},
494491
}
492+
detail.Unlock()
495493
}
496494
for _, storeID := range unavailableStoreIDs {
497495
liveNodeSet[roachpb.NodeID(storeID)] = livenesspb.NodeLivenessStatus_UNAVAILABLE
498-
detail := storePool.GetStoreDetailLocked(storeID)
496+
detail := storePool.GetStoreDetail(storeID)
497+
detail.Lock()
499498
detail.Desc = &roachpb.StoreDescriptor{
500499
StoreID: storeID,
501500
Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)},
502501
}
502+
detail.Unlock()
503503
}
504504
for _, storeID := range deadStoreIDs {
505505
liveNodeSet[roachpb.NodeID(storeID)] = livenesspb.NodeLivenessStatus_DEAD
506-
detail := storePool.GetStoreDetailLocked(storeID)
506+
detail := storePool.GetStoreDetail(storeID)
507+
detail.Lock()
507508
detail.Desc = &roachpb.StoreDescriptor{
508509
StoreID: storeID,
509510
Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)},
510511
}
512+
detail.Unlock()
511513
}
512514
for _, storeID := range decommissioningStoreIDs {
513515
liveNodeSet[roachpb.NodeID(storeID)] = livenesspb.NodeLivenessStatus_DECOMMISSIONING
514-
detail := storePool.GetStoreDetailLocked(storeID)
516+
detail := storePool.GetStoreDetail(storeID)
517+
detail.Lock()
515518
detail.Desc = &roachpb.StoreDescriptor{
516519
StoreID: storeID,
517520
Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)},
518521
}
522+
detail.Unlock()
519523
}
520524
for _, storeID := range decommissionedStoreIDs {
521525
liveNodeSet[roachpb.NodeID(storeID)] = livenesspb.NodeLivenessStatus_DECOMMISSIONED
522-
detail := storePool.GetStoreDetailLocked(storeID)
526+
detail := storePool.GetStoreDetail(storeID)
527+
detail.Lock()
523528
detail.Desc = &roachpb.StoreDescriptor{
524529
StoreID: storeID,
525530
Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)},
526531
}
532+
detail.Unlock()
527533
}
528534

529535
for _, storeID := range suspectedStoreIDs {
530536
liveNodeSet[roachpb.NodeID(storeID)] = livenesspb.NodeLivenessStatus_LIVE
531-
detail := storePool.GetStoreDetailLocked(storeID)
537+
detail := storePool.GetStoreDetail(storeID)
538+
detail.Lock()
532539
detail.LastUnavailable = storePool.Clock().Now()
533540
detail.Desc = &roachpb.StoreDescriptor{
534541
StoreID: storeID,
535542
Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)},
536543
}
544+
detail.Unlock()
537545
}
538546

539547
// Set the node liveness function using the set we constructed.
@@ -1380,16 +1388,20 @@ func TestAllocatorRebalanceDeadNodes(t *testing.T) {
13801388
}
13811389

13821390
// Initialize 8 stores: where store 6 is the target for rebalancing.
1383-
sp.DetailsMu.Lock()
1384-
sp.GetStoreDetailLocked(1).Desc.Capacity = ranges(100)
1385-
sp.GetStoreDetailLocked(2).Desc.Capacity = ranges(100)
1386-
sp.GetStoreDetailLocked(3).Desc.Capacity = ranges(100)
1387-
sp.GetStoreDetailLocked(4).Desc.Capacity = ranges(100)
1388-
sp.GetStoreDetailLocked(5).Desc.Capacity = ranges(100)
1389-
sp.GetStoreDetailLocked(6).Desc.Capacity = ranges(0)
1390-
sp.GetStoreDetailLocked(7).Desc.Capacity = ranges(100)
1391-
sp.GetStoreDetailLocked(8).Desc.Capacity = ranges(100)
1392-
sp.DetailsMu.Unlock()
1391+
updateDescCapacity := func(storeID roachpb.StoreID, capacity roachpb.StoreCapacity) {
1392+
sd := sp.GetStoreDetail(storeID)
1393+
sd.Lock()
1394+
sd.Desc.Capacity = capacity
1395+
sd.Unlock()
1396+
}
1397+
updateDescCapacity(1, ranges(100))
1398+
updateDescCapacity(2, ranges(100))
1399+
updateDescCapacity(3, ranges(100))
1400+
updateDescCapacity(4, ranges(100))
1401+
updateDescCapacity(5, ranges(100))
1402+
updateDescCapacity(6, ranges(0))
1403+
updateDescCapacity(7, ranges(100))
1404+
updateDescCapacity(8, ranges(100))
13931405

13941406
// Each test case should describe a repair situation which has a lower
13951407
// priority than the previous test case.

pkg/kv/kvserver/allocator/storepool/override_store_pool.go

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -114,38 +114,31 @@ func (o *OverrideStorePool) DecommissioningReplicas(
114114
func (o *OverrideStorePool) GetStoreList(
115115
filter StoreFilter,
116116
) (StoreList, int, ThrottledStoreReasons) {
117-
o.sp.DetailsMu.Lock()
118-
defer o.sp.DetailsMu.Unlock()
119-
120117
var storeIDs roachpb.StoreIDSlice
121-
for storeID := range o.sp.DetailsMu.StoreDetails {
118+
o.sp.Details.StoreDetails.Range(func(storeID roachpb.StoreID, _ *StoreDetailMu) bool {
122119
storeIDs = append(storeIDs, storeID)
123-
}
124-
return o.sp.getStoreListFromIDsLocked(storeIDs, o.overrideNodeLivenessFn, filter)
120+
return true
121+
})
122+
return o.sp.getStoreListFromIDs(storeIDs, o.overrideNodeLivenessFn, filter)
125123
}
126124

127125
// GetStoreListFromIDs implements the AllocatorStorePool interface.
128126
func (o *OverrideStorePool) GetStoreListFromIDs(
129127
storeIDs roachpb.StoreIDSlice, filter StoreFilter,
130128
) (StoreList, int, ThrottledStoreReasons) {
131-
o.sp.DetailsMu.Lock()
132-
defer o.sp.DetailsMu.Unlock()
133-
return o.sp.getStoreListFromIDsLocked(storeIDs, o.overrideNodeLivenessFn, filter)
129+
return o.sp.getStoreListFromIDs(storeIDs, o.overrideNodeLivenessFn, filter)
134130
}
135131

136132
// GetStoreListForTargets implements the AllocatorStorePool interface.
137133
func (o *OverrideStorePool) GetStoreListForTargets(
138134
candidates []roachpb.ReplicationTarget, filter StoreFilter,
139135
) (StoreList, int, ThrottledStoreReasons) {
140-
o.sp.DetailsMu.Lock()
141-
defer o.sp.DetailsMu.Unlock()
142-
143136
storeIDs := make(roachpb.StoreIDSlice, 0, len(candidates))
144137
for _, tgt := range candidates {
145138
storeIDs = append(storeIDs, tgt.StoreID)
146139
}
147140

148-
return o.sp.getStoreListFromIDsLocked(storeIDs, o.overrideNodeLivenessFn, filter)
141+
return o.sp.getStoreListFromIDs(storeIDs, o.overrideNodeLivenessFn, filter)
149142
}
150143

151144
// LiveAndDeadReplicas implements the AllocatorStorePool interface.

pkg/kv/kvserver/allocator/storepool/override_store_pool_test.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -330,9 +330,11 @@ func TestOverrideStorePoolGetStoreList(t *testing.T) {
330330
livenessOverrides[decommissioningStore.Node.NodeID] = livenesspb.NodeLivenessStatus_DECOMMISSIONING
331331

332332
// Set suspectedStore as suspected.
333-
testStorePool.DetailsMu.Lock()
334-
testStorePool.DetailsMu.StoreDetails[suspectedStore.StoreID].LastUnavailable = testStorePool.clock.Now()
335-
testStorePool.DetailsMu.Unlock()
333+
val, ok := testStorePool.Details.StoreDetails.Load(suspectedStore.StoreID)
334+
require.True(t, ok)
335+
val.Lock()
336+
val.LastUnavailable = testStorePool.clock.Now()
337+
val.Unlock()
336338

337339
// No filter or limited set of store IDs.
338340
require.NoError(t, verifyStoreList(

0 commit comments

Comments
 (0)