Skip to content

Commit 44552e1

Browse files
committed
storepool: try to use RLock in StoreDetailMu.status() when possible
Before this commit, we used to take the write lock when querying the StoreDetailMu.status(). The reason of taking the write lock is that the StoreDetailMu.status() updates the lastUnavailable timestamp. This commit attempts to take the read lock, and only upgrades it to a write lock if we end up needing to update the lastUnavailable timestamp. References: #143145 Release note: None
1 parent ff236a6 commit 44552e1

File tree

1 file changed

+30
-16
lines changed

1 file changed

+30
-16
lines changed

pkg/kv/kvserver/allocator/storepool/store_pool.go

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,7 @@ func (sd *StoreDetailMu) status(
148148
nl NodeLivenessFunc,
149149
suspectDuration time.Duration,
150150
) storeStatus {
151-
sd.Lock()
152-
defer sd.Unlock()
151+
sd.RLock() // all exist paths will RUnlock() the lock.
153152
// During normal operation, we expect the state transitions for stores to look like the following:
154153
//
155154
// +-----------------------+
@@ -171,19 +170,37 @@ func (sd *StoreDetailMu) status(
171170
// heartbeat
172171
//
173172

173+
// updateLastUnavailableAndReturnStatusRLocked upgrades the read lock to a
174+
// write lock, updates the LastUnavailable timestamp, returns the store
175+
// status, and unlocks the write lock.
176+
updateLastUnavailableAndReturnStatusRLocked := func(
177+
lastUnavailable hlc.Timestamp, returnStatus storeStatus,
178+
) storeStatus {
179+
sd.RUnlock()
180+
sd.Lock()
181+
defer sd.Unlock()
182+
sd.LastUnavailable = lastUnavailable
183+
return returnStatus
184+
}
185+
186+
// returnStatusRLocked unlocks the read lock and returns the store status.
187+
returnStatusRLocked := func(returnStatus storeStatus) storeStatus {
188+
defer sd.RUnlock()
189+
return returnStatus
190+
}
191+
174192
// The store is considered dead if it hasn't been updated via gossip
175193
// within the liveness threshold. Note that LastUpdatedTime is set
176194
// when the store detail is created and will have a non-zero value
177195
// even before the first gossip arrives for a store.
178196
deadAsOf := sd.LastUpdatedTime.AddDuration(deadThreshold)
179197
if now.After(deadAsOf) {
180-
sd.LastUnavailable = now
181-
return storeStatusDead
198+
return updateLastUnavailableAndReturnStatusRLocked(now, storeStatusDead)
182199
}
183200
// If there's no descriptor (meaning no gossip ever arrived for this
184201
// store), return unavailable.
185202
if sd.Desc == nil {
186-
return storeStatusUnknown
203+
return returnStatusRLocked(storeStatusUnknown)
187204
}
188205

189206
// Even if the store has been updated via gossip, we still rely on
@@ -193,34 +210,31 @@ func (sd *StoreDetailMu) status(
193210
// dead -> decommissioning -> unknown -> draining -> suspect -> available.
194211
switch nl(sd.Desc.Node.NodeID) {
195212
case livenesspb.NodeLivenessStatus_DEAD, livenesspb.NodeLivenessStatus_DECOMMISSIONED:
196-
sd.LastUnavailable = now
197-
return storeStatusDead
213+
return updateLastUnavailableAndReturnStatusRLocked(now, storeStatusDead)
198214
case livenesspb.NodeLivenessStatus_DECOMMISSIONING:
199-
return storeStatusDecommissioning
215+
return returnStatusRLocked(storeStatusDecommissioning)
200216
case livenesspb.NodeLivenessStatus_UNAVAILABLE:
201-
sd.LastUnavailable = now
202-
return storeStatusUnknown
217+
return updateLastUnavailableAndReturnStatusRLocked(now, storeStatusUnknown)
203218
case livenesspb.NodeLivenessStatus_UNKNOWN:
204-
return storeStatusUnknown
219+
return returnStatusRLocked(storeStatusUnknown)
205220
case livenesspb.NodeLivenessStatus_DRAINING:
206-
sd.LastUnavailable = now
207-
return storeStatusDraining
221+
return updateLastUnavailableAndReturnStatusRLocked(now, storeStatusDraining)
208222
}
209223

210224
// A store is throttled if it has missed receiving snapshots recently.
211225
if sd.ThrottledUntil.After(now) {
212-
return storeStatusThrottled
226+
return returnStatusRLocked(storeStatusThrottled)
213227
}
214228

215229
// Check whether the store is currently suspect. We measure that by
216230
// looking at the time it was last unavailable making sure we have not seen any
217231
// failures for a period of time defined by StoreSuspectDuration.
218232
if sd.LastUnavailable.AddDuration(suspectDuration).After(now) {
219-
return storeStatusSuspect
233+
return returnStatusRLocked(storeStatusSuspect)
220234
}
221235

222236
// Clear out the LastUnavailable once we return available status.
223-
return storeStatusAvailable
237+
return returnStatusRLocked(storeStatusAvailable)
224238
}
225239

226240
// localityWithString maintains a string representation of each locality along

0 commit comments

Comments
 (0)