Skip to content

Commit f99c3c0

Browse files
authored
vip, server: optimize failover for graceful shutdown with VIP (#625)
1 parent e3d35cb commit f99c3c0

File tree

6 files changed

+90
-13
lines changed

6 files changed

+90
-13
lines changed

pkg/balance/metricsreader/backend_reader.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ const (
3939
readerOwnerKeyPrefix = "/tiproxy/metric_reader"
4040
readerOwnerKeySuffix = "owner"
4141
// sessionTTL is the session's TTL in seconds for backend reader owner election.
42-
sessionTTL = 30
42+
sessionTTL = 15
4343
// backendMetricPath is the path of backend HTTP API to read metrics.
4444
backendMetricPath = "/metrics"
4545
// ownerMetricPath is the path of reading backend metrics from the backend reader owner.

pkg/manager/elect/election.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ func (m *election) watchOwner(ctx context.Context, session *concurrency.Session,
264264
func (m *election) Close() {
265265
if m.cancel != nil {
266266
m.cancel()
267+
m.cancel = nil
267268
}
268269
m.wg.Wait()
269270
}

pkg/manager/elect/election_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ func TestElectOwner(t *testing.T) {
5656
_, err := elec.GetOwnerID(context.Background())
5757
require.Error(t, err)
5858
}
59+
// double closing elections is allowed
60+
{
61+
elec := ts.getElection("3")
62+
elec.Close()
63+
}
5964
}
6065

6166
func TestEtcdServerDown(t *testing.T) {

pkg/manager/vip/manager.go

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package vip
66
import (
77
"context"
88
"net"
9+
"sync/atomic"
910

1011
"github.com/pingcap/tiproxy/lib/config"
1112
"github.com/pingcap/tiproxy/pkg/manager/elect"
@@ -17,23 +18,25 @@ const (
1718
// vipKey is the key in etcd for VIP election.
1819
vipKey = "/tiproxy/vip/owner"
1920
// sessionTTL is the session's TTL in seconds for VIP election.
20-
sessionTTL = 5
21+
// The etcd client keeps alive every TTL/3 seconds.
22+
// The TTL determines the failover time so it should be short.
23+
sessionTTL = 3
2124
)
2225

2326
type VIPManager interface {
2427
Start(context.Context, *clientv3.Client) error
25-
OnElected()
26-
OnRetired()
28+
Resign()
2729
Close()
2830
}
2931

3032
var _ VIPManager = (*vipManager)(nil)
3133

3234
type vipManager struct {
33-
operation NetworkOperation
34-
cfgGetter config.ConfigGetter
35-
election elect.Election
36-
lg *zap.Logger
35+
operation NetworkOperation
36+
cfgGetter config.ConfigGetter
37+
election elect.Election
38+
delOnRetire atomic.Bool
39+
lg *zap.Logger
3740
}
3841

3942
func NewVIPManager(lg *zap.Logger, cfgGetter config.ConfigGetter) (*vipManager, error) {
@@ -55,6 +58,7 @@ func NewVIPManager(lg *zap.Logger, cfgGetter config.ConfigGetter) (*vipManager,
5558
return nil, err
5659
}
5760
vm.operation = operation
61+
vm.delOnRetire.Store(true)
5862
return vm, nil
5963
}
6064

@@ -81,6 +85,16 @@ func (vm *vipManager) Start(ctx context.Context, etcdCli *clientv3.Client) error
8185
}
8286

8387
func (vm *vipManager) OnElected() {
88+
vm.addVIP()
89+
}
90+
91+
func (vm *vipManager) OnRetired() {
92+
if vm.delOnRetire.Load() {
93+
vm.delVIP()
94+
}
95+
}
96+
97+
func (vm *vipManager) addVIP() {
8498
hasIP, err := vm.operation.HasIP()
8599
if err != nil {
86100
vm.lg.Error("checking addresses failed", zap.Error(err))
@@ -101,7 +115,7 @@ func (vm *vipManager) OnElected() {
101115
vm.lg.Info("adding VIP success")
102116
}
103117

104-
func (vm *vipManager) OnRetired() {
118+
func (vm *vipManager) delVIP() {
105119
hasIP, err := vm.operation.HasIP()
106120
if err != nil {
107121
vm.lg.Error("checking addresses failed", zap.Error(err))
@@ -118,9 +132,19 @@ func (vm *vipManager) OnRetired() {
118132
vm.lg.Info("deleting VIP success")
119133
}
120134

121-
func (vm *vipManager) Close() {
122-
// The OnRetired() will be called when the election is closed.
135+
// Resign stops compaign but does not delete the VIP.
136+
// It's called before graceful wait to avoid that the VIP is deleted before another member adds VIP.
137+
func (vm *vipManager) Resign() {
138+
vm.delOnRetire.Store(false)
123139
if vm.election != nil {
124140
vm.election.Close()
141+
vm.election = nil
125142
}
126143
}
144+
145+
// Close stops compaign and deletes the VIP.
146+
// It's called after graceful wait to ensure the VIP is finally deleted.
147+
func (vm *vipManager) Close() {
148+
vm.Resign()
149+
vm.delVIP()
150+
}

pkg/manager/vip/manager_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ func TestNetworkOperation(t *testing.T) {
123123
cfgGetter: newMockConfigGetter(config.HA{VirtualIP: "10.10.10.10/24", Interface: "eth0"}),
124124
operation: operation,
125125
}
126+
vm.delOnRetire.Store(true)
126127
vm.election = newMockElection(ch, vm)
127128
childCtx, cancel := context.WithCancel(context.Background())
128129
vm.election.Start(childCtx)
@@ -142,3 +143,45 @@ func TestNetworkOperation(t *testing.T) {
142143
cancel()
143144
vm.Close()
144145
}
146+
147+
func TestResign(t *testing.T) {
148+
tests := []struct {
149+
hasIP bool
150+
expectedLog string
151+
}{
152+
{
153+
hasIP: false,
154+
expectedLog: "do nothing",
155+
},
156+
{
157+
hasIP: true,
158+
expectedLog: "deleting VIP success",
159+
},
160+
}
161+
162+
for i, test := range tests {
163+
lg, text := logger.CreateLoggerForTest(t)
164+
operation := newMockNetworkOperation()
165+
vm := &vipManager{
166+
lg: lg,
167+
cfgGetter: newMockConfigGetter(config.HA{VirtualIP: "10.10.10.10/24", Interface: "eth0"}),
168+
operation: operation,
169+
}
170+
vm.delOnRetire.Store(true)
171+
operation.hasIP.Store(test.hasIP)
172+
173+
// resign doesn't delete vip
174+
vm.Resign()
175+
if test.hasIP {
176+
vm.OnRetired()
177+
}
178+
time.Sleep(100 * time.Millisecond)
179+
require.False(t, strings.Contains(text.String(), test.expectedLog), "case %d", i)
180+
181+
// close deletes vip
182+
vm.Close()
183+
require.Eventually(t, func() bool {
184+
return strings.Contains(text.String(), test.expectedLog)
185+
}, 3*time.Second, 10*time.Millisecond, "case %d", i)
186+
}
187+
}

pkg/server/server.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,13 +209,17 @@ func (s *Server) Close() error {
209209
metrics.ServerEventCounter.WithLabelValues(metrics.EventClose).Inc()
210210

211211
errs := make([]error, 0, 4)
212-
// Remove the VIP before closing the SQL server so that clients connect to other nodes.
212+
// Resign the VIP owner before graceful wait so that clients connect to other nodes.
213213
if s.vipManager != nil && !reflect.ValueOf(s.vipManager).IsNil() {
214-
s.vipManager.Close()
214+
s.vipManager.Resign()
215215
}
216216
if s.proxy != nil {
217217
errs = append(errs, s.proxy.Close())
218218
}
219+
// Delete VIP after graceful wait.
220+
if s.vipManager != nil && !reflect.ValueOf(s.vipManager).IsNil() {
221+
s.vipManager.Close()
222+
}
219223
if s.apiServer != nil {
220224
errs = append(errs, s.apiServer.Close())
221225
}

0 commit comments

Comments
 (0)