Skip to content

Commit fcfa0c8

Browse files
Fix deadlock in semi-sync monitor (vitessio#18276)
Signed-off-by: Manan Gupta <[email protected]>
1 parent 9f6b874 commit fcfa0c8

File tree

2 files changed

+66
-7
lines changed

2 files changed

+66
-7
lines changed

go/vt/vttablet/tabletmanager/semisyncmonitor/monitor.go

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ type Monitor struct {
6666
// the semisync_heartbeat table.
6767
clearTicks *timer.Timer
6868

69+
// timerMu protects operations on the timers to prevent deadlocks
70+
// This must be acquired before mu if both are needed.
71+
timerMu sync.Mutex
72+
6973
// mu protects the fields below.
7074
mu sync.Mutex
7175
appPool *dbconnpool.ConnectionPool
@@ -123,8 +127,13 @@ func CreateTestSemiSyncMonitor(db *fakesqldb.DB, exporter *servenv.Exporter) *Mo
123127

124128
// Open starts the monitor.
125129
func (m *Monitor) Open() {
130+
// First acquire the timer mutex to prevent deadlock - we acquire in a consistent order
131+
m.timerMu.Lock()
132+
defer m.timerMu.Unlock()
133+
126134
m.mu.Lock()
127135
defer m.mu.Unlock()
136+
128137
// The check for config being nil is only requried for tests.
129138
if m.isOpen || m.config == nil || m.config.DB == nil {
130139
// If we are already open, then there is nothing to do
@@ -145,16 +154,19 @@ func (m *Monitor) Open() {
145154

146155
// Close stops the monitor.
147156
func (m *Monitor) Close() {
148-
m.mu.Lock()
149-
defer m.mu.Unlock()
150-
if !m.isOpen {
151-
// If we are already closed, then there is nothing to do
152-
return
153-
}
154-
m.isOpen = false
157+
// First acquire the timer mutex to prevent deadlock - we acquire in a consistent order
158+
m.timerMu.Lock()
159+
defer m.timerMu.Unlock()
155160
log.Info("SemiSync Monitor: closing")
161+
// We close the ticks before we acquire the mu mutex to prevent deadlock.
162+
// The timer Close should not be called while holding a mutex that the function
163+
// the timer runs also acquires.
156164
m.clearTicks.Stop()
157165
m.ticks.Stop()
166+
// Acquire the mu mutex to update the isOpen field.
167+
m.mu.Lock()
168+
defer m.mu.Unlock()
169+
m.isOpen = false
158170
m.appPool.Close()
159171
}
160172

go/vt/vttablet/tabletmanager/semisyncmonitor/monitor_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,53 @@ func TestWaitUntilSemiSyncUnblocked(t *testing.T) {
619619
require.True(t, m.isClosed())
620620
}
621621

622+
// TestDeadlockOnClose tests the deadlock that can occur when calling Close().
623+
// Look at https://github.com/vitessio/vitess/issues/18275 for more details.
624+
func TestDeadlockOnClose(t *testing.T) {
625+
db := fakesqldb.New(t)
626+
defer db.Close()
627+
params := db.ConnParams()
628+
cp := *params
629+
dbc := dbconfigs.NewTestDBConfigs(cp, cp, "")
630+
config := &tabletenv.TabletConfig{
631+
DB: dbc,
632+
SemiSyncMonitor: tabletenv.SemiSyncMonitorConfig{
633+
// Extremely low interval to trigger the deadlock quickly.
634+
// This makes the monitor try and write that it is still blocked quite aggressively.
635+
Interval: 10 * time.Millisecond,
636+
},
637+
}
638+
m := NewMonitor(config, exporter)
639+
640+
// Set up for semisync to be blocked
641+
db.SetNeverFail(true)
642+
db.AddQuery(semiSyncWaitSessionsRead, sqltypes.MakeTestResult(sqltypes.MakeTestFields("variable_value", "varchar"), "1"))
643+
644+
// Open the monitor
645+
m.Open()
646+
defer m.Close()
647+
648+
// We will now try to close and open the monitor multiple times to see if we can trigger a deadlock.
649+
finishCh := make(chan int)
650+
go func() {
651+
count := 100
652+
for i := 0; i < count; i++ {
653+
m.Close()
654+
m.Open()
655+
time.Sleep(20 * time.Millisecond)
656+
}
657+
close(finishCh)
658+
}()
659+
660+
select {
661+
case <-finishCh:
662+
// The test finished without deadlocking.
663+
case <-time.After(5 * time.Second):
664+
// The test timed out, which means we deadlocked.
665+
t.Fatalf("Deadlock occurred while closing the monitor")
666+
}
667+
}
668+
622669
// TestSemiSyncMonitor tests the semi-sync monitor as a black box.
623670
// It only calls the exported methods to see they work as intended.
624671
func TestSemiSyncMonitor(t *testing.T) {

0 commit comments

Comments
 (0)