Skip to content

Commit 3c493b2

Browse files
authored
backend, logger: fix data race in UT (#443)
1 parent 8191da9 commit 3c493b2

File tree

8 files changed

+188
-72
lines changed

8 files changed

+188
-72
lines changed

pkg/manager/logger/manager_test.go

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -184,49 +184,54 @@ func readLogFiles(t *testing.T, dir string) []os.FileInfo {
184184
func TestLogConcurrently(t *testing.T) {
185185
dir := t.TempDir()
186186
fileName := filepath.Join(dir, "proxy.log")
187-
cfg := &config.Config{
188-
Log: config.Log{
189-
Encoder: "tidb",
190-
LogOnline: config.LogOnline{
191-
Level: "info",
192-
LogFile: config.LogFile{
193-
Filename: fileName,
194-
MaxSize: 1,
195-
MaxDays: 2,
196-
MaxBackups: 3,
187+
188+
newCfg := func() *config.Config {
189+
return &config.Config{
190+
Log: config.Log{
191+
Encoder: "tidb",
192+
LogOnline: config.LogOnline{
193+
Level: "info",
194+
LogFile: config.LogFile{
195+
Filename: fileName,
196+
MaxSize: 1,
197+
MaxDays: 2,
198+
MaxBackups: 3,
199+
},
197200
},
198201
},
199-
},
202+
}
200203
}
201204

202-
lg, ch := setupLogManager(t, cfg)
205+
lg, ch := setupLogManager(t, newCfg())
203206
var wg waitgroup.WaitGroup
204207
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
205208
for i := 0; i < 5; i++ {
206209
wg.Run(func() {
207210
for ctx.Err() == nil {
208-
lg = lg.Named("test_name")
209-
lg.Info("test_info")
210-
lg.Warn("test_warn")
211-
lg.Error("test_error")
212-
lg = lg.With(zap.String("with", "test_with"))
213-
lg.Info("test_info")
214-
lg.Warn("test_warn")
215-
lg.Error("test_error")
211+
namedLg := lg.Named("test_name")
212+
namedLg.Info("test_info")
213+
namedLg.Warn("test_warn")
214+
namedLg.Error("test_error")
215+
withLg := namedLg.With(zap.String("with", "test_with"))
216+
withLg.Info("test_info")
217+
withLg.Warn("test_warn")
218+
withLg.Error("test_error")
216219
}
217220
})
218221
}
219222
wg.Run(func() {
220-
newCfg := cfg.Clone()
221223
for ctx.Err() == nil {
222-
newCfg.Log.LogFile.MaxDays = int(rand.Int31n(10))
223-
ch <- newCfg
224+
cfg := newCfg()
225+
cfg.Log.LogFile.MaxDays = int(rand.Int31n(10))
226+
ch <- cfg
224227
time.Sleep(10 * time.Millisecond)
225-
newCfg.Log.LogFile.MaxBackups = int(rand.Int31n(10))
226-
ch <- newCfg
228+
cfg = newCfg()
229+
cfg.Log.LogFile.MaxBackups = int(rand.Int31n(10))
230+
ch <- cfg
227231
time.Sleep(10 * time.Millisecond)
228-
newCfg.Log.LogFile.MaxSize = int(rand.Int31n(10))
229-
ch <- newCfg
232+
cfg = newCfg()
233+
cfg.Log.LogFile.MaxSize = int(rand.Int31n(10))
234+
ch <- cfg
230235
time.Sleep(10 * time.Millisecond)
231236
}
232237
})

pkg/proxy/backend/authenticator.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package backend
55

66
import (
77
"bytes"
8+
"context"
89
"crypto/tls"
910
"encoding/binary"
1011
"fmt"
@@ -82,9 +83,9 @@ func (auth *Authenticator) verifyBackendCaps(logger *zap.Logger, backendCapabili
8283
return nil
8384
}
8485

85-
type backendIOGetter func(ctx ConnContext, auth *Authenticator, resp *pnet.HandshakeResp) (*pnet.PacketIO, error)
86+
type backendIOGetter func(ctx context.Context, cctx ConnContext, resp *pnet.HandshakeResp) (*pnet.PacketIO, error)
8687

87-
func (auth *Authenticator) handshakeFirstTime(logger *zap.Logger, cctx ConnContext, clientIO *pnet.PacketIO, handshakeHandler HandshakeHandler,
88+
func (auth *Authenticator) handshakeFirstTime(ctx context.Context, logger *zap.Logger, cctx ConnContext, clientIO *pnet.PacketIO, handshakeHandler HandshakeHandler,
8889
getBackendIO backendIOGetter, frontendTLSConfig, backendTLSConfig *tls.Config) error {
8990
clientIO.ResetSequence()
9091

@@ -159,7 +160,7 @@ func (auth *Authenticator) handshakeFirstTime(logger *zap.Logger, cctx ConnConte
159160
RECONNECT:
160161

161162
// In case of testing, backendIO is passed manually that we don't want to bother with the routing logic.
162-
backendIO, err := getBackendIO(cctx, auth, clientResp)
163+
backendIO, err := getBackendIO(ctx, cctx, clientResp)
163164
if err != nil {
164165
return err
165166
}

pkg/proxy/backend/backend_conn_mgr.go

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ func (cfg *BCConfig) check() {
100100
// - If it retries after each command: the latency will be unacceptable afterwards if it always fails.
101101
// - If it stops receiving signals: the previous new backend may be abnormal but the next new backend may be good.
102102
type BackendConnManager struct {
103-
// processLock makes redirecting and command processing exclusive.
103+
// processLock makes all processes exclusive.
104104
processLock sync.Mutex
105105
wg waitgroup.WaitGroup
106106
// signalReceived is used to notify the signal processing goroutine.
@@ -110,10 +110,11 @@ type BackendConnManager struct {
110110
eventReceiver unsafe.Pointer
111111
config *BCConfig
112112
logger *zap.Logger
113-
// It will be set to nil after migration.
113+
// Redirect() sets it without lock. It will be set to nil after migration.
114114
redirectInfo atomic.Pointer[router.BackendInst]
115115
// redirectResCh is used to notify the event receiver asynchronously.
116-
redirectResCh chan *redirectResult
116+
redirectResCh chan *redirectResult
117+
// GracefulClose() sets it without lock.
117118
closeStatus atomic.Int32
118119
checkBackendTicker *time.Ticker
119120
// cancelFunc is used to cancel the signal processing goroutine.
@@ -163,9 +164,13 @@ func (mgr *BackendConnManager) Connect(ctx context.Context, clientIO *pnet.Packe
163164
defer mgr.processLock.Unlock()
164165

165166
mgr.backendTLS = backendTLSConfig
166-
167167
mgr.clientIO = clientIO
168-
err := mgr.authenticator.handshakeFirstTime(mgr.logger.Named("authenticator"), mgr, clientIO, mgr.handshakeHandler, mgr.getBackendIO, frontendTLSConfig, backendTLSConfig)
168+
169+
if mgr.closeStatus.Load() >= statusNotifyClose {
170+
mgr.quitSource = SrcProxyQuit
171+
return errors.New("graceful shutdown before connecting")
172+
}
173+
err := mgr.authenticator.handshakeFirstTime(ctx, mgr.logger.Named("authenticator"), mgr, clientIO, mgr.handshakeHandler, mgr.getBackendIO, frontendTLSConfig, backendTLSConfig)
169174
if err != nil {
170175
src := Error2Source(err)
171176
mgr.handshakeHandler.OnHandshake(mgr, mgr.ServerAddr(), err, src)
@@ -188,15 +193,15 @@ func (mgr *BackendConnManager) Connect(ctx context.Context, clientIO *pnet.Packe
188193
return nil
189194
}
190195

191-
func (mgr *BackendConnManager) getBackendIO(cctx ConnContext, auth *Authenticator, resp *pnet.HandshakeResp) (*pnet.PacketIO, error) {
196+
func (mgr *BackendConnManager) getBackendIO(ctx context.Context, cctx ConnContext, resp *pnet.HandshakeResp) (*pnet.PacketIO, error) {
192197
r, err := mgr.handshakeHandler.GetRouter(cctx, resp)
193198
if err != nil {
194199
return nil, errors.Wrap(ErrProxyErr, err)
195200
}
196201
// Reasons to wait:
197202
// - The TiDB instances may not be initialized yet
198203
// - One TiDB may be just shut down and another is just started but not ready yet
199-
bctx, cancel := context.WithTimeout(context.Background(), mgr.config.ConnectTimeout)
204+
bctx, cancel := context.WithTimeout(ctx, mgr.config.ConnectTimeout)
200205
selector := r.GetBackendSelector()
201206
startTime := time.Now()
202207
var addr string
@@ -259,23 +264,25 @@ func (mgr *BackendConnManager) getBackendIO(cctx ConnContext, auth *Authenticato
259264
// ExecuteCmd forwards messages between the client and the backend.
260265
// If it finds that the session is ready for redirection, it migrates the session.
261266
func (mgr *BackendConnManager) ExecuteCmd(ctx context.Context, request []byte) (err error) {
267+
mgr.processLock.Lock()
262268
defer func() {
263269
mgr.setQuitSourceByErr(err)
264270
mgr.handshakeHandler.OnTraffic(mgr)
271+
mgr.processLock.Unlock()
265272
}()
266273
if len(request) < 1 {
267274
err = mysql.ErrMalformPacket
268275
return
269276
}
270277
cmd := pnet.Command(request[0])
271278
startTime := time.Now()
272-
mgr.processLock.Lock()
273-
defer mgr.processLock.Unlock()
274279

275-
switch mgr.closeStatus.Load() {
276-
case statusClosing, statusClosed:
280+
// Once the request is accepted, it's treated in the transaction, so we don't check graceful shutdown here.
281+
if mgr.closeStatus.Load() >= statusClosing {
277282
return
278283
}
284+
// The query may last over CheckBackendInterval. In this case we don't need to check the backend after the query.
285+
mgr.checkBackendTicker.Stop()
279286
defer mgr.resetCheckBackendTicker()
280287
waitingRedirect := mgr.redirectInfo.Load() != nil
281288
var holdRequest bool
@@ -400,8 +407,7 @@ func (mgr *BackendConnManager) processSignals(ctx context.Context) {
400407
// tryRedirect tries to migrate the session if the session is redirect-able.
401408
// NOTE: processLock should be held before calling this function.
402409
func (mgr *BackendConnManager) tryRedirect(ctx context.Context) {
403-
switch mgr.closeStatus.Load() {
404-
case statusNotifyClose, statusClosing, statusClosed:
410+
if mgr.closeStatus.Load() >= statusNotifyClose || ctx.Err() != nil {
405411
return
406412
}
407413
backendInst := mgr.redirectInfo.Load()
@@ -442,6 +448,10 @@ func (mgr *BackendConnManager) tryRedirect(ctx context.Context) {
442448
}
443449
return
444450
}
451+
if ctx.Err() != nil {
452+
rs.err = ctx.Err()
453+
return
454+
}
445455
if rs.err = mgr.updateAuthInfoFromSessionStates(hack.Slice(sessionStates)); rs.err != nil {
446456
return
447457
}
@@ -492,8 +502,7 @@ func (mgr *BackendConnManager) updateAuthInfoFromSessionStates(sessionStates []b
492502
// Note that the caller requires the function to be non-blocking.
493503
func (mgr *BackendConnManager) Redirect(backendInst router.BackendInst) bool {
494504
// NOTE: BackendConnManager may be closing concurrently because of no lock.
495-
switch mgr.closeStatus.Load() {
496-
case statusNotifyClose, statusClosing, statusClosed:
505+
if mgr.closeStatus.Load() >= statusNotifyClose {
497506
return false
498507
}
499508
mgr.redirectInfo.Store(&backendInst)
@@ -523,12 +532,13 @@ func (mgr *BackendConnManager) notifyRedirectResult(ctx context.Context, rs *red
523532

524533
// GracefulClose waits for the end of the transaction and closes the session.
525534
func (mgr *BackendConnManager) GracefulClose() {
526-
mgr.closeStatus.Store(statusNotifyClose)
527-
mgr.signalReceived <- signalTypeGracefulClose
535+
if mgr.closeStatus.CompareAndSwap(statusActive, statusNotifyClose) {
536+
mgr.signalReceived <- signalTypeGracefulClose
537+
}
528538
}
529539

530540
func (mgr *BackendConnManager) tryGracefulClose(ctx context.Context) {
531-
if mgr.closeStatus.Load() != statusNotifyClose {
541+
if mgr.closeStatus.Load() != statusNotifyClose || ctx.Err() != nil {
532542
return
533543
}
534544
if !mgr.cmdProcessor.finishedTxn() {
@@ -539,17 +549,16 @@ func (mgr *BackendConnManager) tryGracefulClose(ctx context.Context) {
539549
if err := mgr.clientIO.GracefulClose(); err != nil {
540550
mgr.logger.Warn("graceful close client IO error", zap.Stringer("client_addr", mgr.clientIO.RemoteAddr()), zap.Error(err))
541551
}
542-
mgr.closeStatus.Store(statusClosing)
552+
mgr.closeStatus.CompareAndSwap(statusNotifyClose, statusClosing)
543553
}
544554

545555
func (mgr *BackendConnManager) checkBackendActive() {
546-
switch mgr.closeStatus.Load() {
547-
case statusClosing, statusClosed:
548-
return
549-
}
550-
551556
mgr.processLock.Lock()
552557
defer mgr.processLock.Unlock()
558+
559+
if mgr.closeStatus.Load() >= statusNotifyClose {
560+
return
561+
}
553562
backendIO := mgr.backendIO.Load()
554563
if !backendIO.IsPeerActive() {
555564
mgr.logger.Info("backend connection is closed, close client connection",
@@ -558,7 +567,7 @@ func (mgr *BackendConnManager) checkBackendActive() {
558567
if err := mgr.clientIO.GracefulClose(); err != nil {
559568
mgr.logger.Warn("graceful close client IO error", zap.Stringer("client_addr", mgr.clientIO.RemoteAddr()), zap.Error(err))
560569
}
561-
mgr.closeStatus.Store(statusClosing)
570+
mgr.closeStatus.CompareAndSwap(statusActive, statusClosing)
562571
}
563572
}
564573

@@ -618,6 +627,17 @@ func (mgr *BackendConnManager) Value(key any) any {
618627

619628
// Close releases all resources.
620629
func (mgr *BackendConnManager) Close() error {
630+
// BackendConnMgr may close even before connecting, so protect the members with a lock.
631+
mgr.processLock.Lock()
632+
defer func() {
633+
mgr.processLock.Unlock()
634+
// Wait out of the lock to avoid deadlock.
635+
mgr.wg.Wait()
636+
}()
637+
if mgr.closeStatus.Load() >= statusClosed {
638+
return nil
639+
}
640+
621641
mgr.closeStatus.Store(statusClosing)
622642
if mgr.checkBackendTicker != nil {
623643
mgr.checkBackendTicker.Stop()
@@ -626,18 +646,16 @@ func (mgr *BackendConnManager) Close() error {
626646
mgr.cancelFunc()
627647
mgr.cancelFunc = nil
628648
}
629-
mgr.wg.Wait()
630649

650+
// OnConnClose may read ServerAddr(), so call it before closing backendIO.
631651
handErr := mgr.handshakeHandler.OnConnClose(mgr, mgr.quitSource)
632652

633653
var connErr error
634654
var addr string
635-
mgr.processLock.Lock()
636655
if backendIO := mgr.backendIO.Swap(nil); backendIO != nil {
637656
addr = backendIO.RemoteAddr().String()
638657
connErr = backendIO.Close()
639658
}
640-
mgr.processLock.Unlock()
641659

642660
eventReceiver := mgr.getEventReceiver()
643661
if eventReceiver != nil {
@@ -690,16 +708,20 @@ func (mgr *BackendConnManager) setQuitSourceByErr(err error) {
690708
mgr.quitSource = Error2Source(err)
691709
}
692710

711+
// UpdateLogger add fields to the logger.
712+
// Note: it should be called within the lock.
693713
func (mgr *BackendConnManager) UpdateLogger(fields ...zap.Field) {
694714
mgr.logger = mgr.logger.With(fields...)
695715
}
696716

697717
// ConnInfo returns detailed info of the connection, which should not be logged too many times.
698718
func (mgr *BackendConnManager) ConnInfo() []zap.Field {
719+
mgr.processLock.Lock()
699720
var fields []zap.Field
700721
if mgr.authenticator != nil {
701722
fields = mgr.authenticator.ConnInfo()
702723
}
724+
mgr.processLock.Unlock()
703725
fields = append(fields, zap.String("backend_addr", mgr.ServerAddr()))
704726
return fields
705727
}

0 commit comments

Comments
 (0)