Skip to content

Commit 01a6aab

Browse files
craig[bot]knz
andcommitted
108401: server: avoid a race in the server controller r=lidorcarmel a=knz Co-authored with `@lidorcarmel` Prior to this patch, the Go race detector was complaining about racy concurrent accesses to `serverStateUsingChannels.server`, via `(*serverStateUsingChannels) getServer()` and `(*channelOrchestrator) startControlledServer()`. These racy accesses happened to be safe because the writes and reads to that field were correctly ordered around updates to an atomic bool (the `started` field). However, the race detector is not sufficiently sophisticated to detect this ordering and satisfy itself that the state transition can only happen once. In order to silence the race detector (with no change in correctness), this patch replaces the atomic bool by a mutex, whose access semantics are properly understood by the race detector. This change incidentally makes the code slightly easier to read and understand. Supersedes cockroachdb#108371. Fixes cockroachdb#107930. Epic: CRDB-28893 Co-authored-by: Raphael 'kena' Poss <[email protected]>
2 parents 6aa4615 + 26d007b commit 01a6aab

File tree

1 file changed

+22
-12
lines changed

1 file changed

+22
-12
lines changed

pkg/server/server_controller_channel_orchestrator.go

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,14 @@ type serverStateUsingChannels struct {
7474
// features that label data with the current tenant name.
7575
nc *roachpb.TenantNameContainer
7676

77-
// server is the server that is being controlled.
78-
server orchestratedServer
77+
startedMu struct {
78+
syncutil.Mutex
79+
80+
// server is the server that is being controlled.
81+
// This is only set when the corresponding orchestratedServer
82+
// instance is ready.
83+
server orchestratedServer
84+
}
7985

8086
// startedOrStopped is closed when the server has either started or
8187
// stopped. This can be used to wait for a server start.
@@ -85,10 +91,6 @@ type serverStateUsingChannels struct {
8591
// during server creation if any.
8692
startErr error
8793

88-
// started is marked true when the server has started. This can
89-
// be used to observe the start state without waiting.
90-
started syncutil.AtomicBool
91-
9294
// requestImmediateStop can be called to request a server to stop
9395
// ungracefully.
9496
// It can be called multiple times.
@@ -106,7 +108,9 @@ var _ serverState = (*serverStateUsingChannels)(nil)
106108

107109
// getServer is part of the serverState interface.
108110
func (s *serverStateUsingChannels) getServer() (orchestratedServer, bool) {
109-
return s.server, s.started.Get()
111+
s.startedMu.Lock()
112+
defer s.startedMu.Unlock()
113+
return s.startedMu.server, s.startedMu.server != nil
110114
}
111115

112116
// nameContainer is part of the serverState interface.
@@ -154,14 +158,13 @@ func (o *channelOrchestrator) makeServerStateForSystemTenant(
154158
closeCtx, cancelFn := context.WithCancel(context.Background())
155159
st := &serverStateUsingChannels{
156160
nc: nc,
157-
server: systemSrv,
158161
startedOrStoppedCh: closedChan,
159162
requestImmediateStop: cancelFn,
160163
requestGracefulStop: cancelFn,
161164
stoppedCh: closeCtx.Done(),
162165
}
163166

164-
st.started.Set(true)
167+
st.startedMu.server = systemSrv
165168
return st
166169
}
167170

@@ -301,7 +304,11 @@ func (o *channelOrchestrator) startControlledServer(
301304
// the goroutine above to call tenantStopper.Stop() and
302305
// terminate.
303306
state.requestImmediateStop()
304-
state.started.Set(false)
307+
func() {
308+
state.startedMu.Lock()
309+
defer state.startedMu.Unlock()
310+
state.startedMu.server = nil
311+
}()
305312
close(stoppedCh)
306313
if !startedOrStoppedChAlreadyClosed {
307314
state.startErr = errors.New("server stop before successful start")
@@ -413,9 +420,12 @@ func (o *channelOrchestrator) startControlledServer(
413420
}
414421

415422
// Indicate the server has started.
416-
state.server = tenantServer
417423
startedOrStoppedChAlreadyClosed = true
418-
state.started.Set(true)
424+
func() {
425+
state.startedMu.Lock()
426+
defer state.startedMu.Unlock()
427+
state.startedMu.server = tenantServer
428+
}()
419429
close(startedOrStoppedCh)
420430

421431
// Wait for a request to shut down.

0 commit comments

Comments
 (0)