@@ -6,7 +6,6 @@ package elect
66import (
77 "context"
88 "strings"
9- "sync/atomic"
109 "time"
1110
1211 "github.com/pingcap/tiproxy/lib/util/errors"
@@ -39,8 +38,6 @@ type Election interface {
3938 Start (context.Context )
4039 // ID returns the member ID.
4140 ID () string
42- // IsOwner returns whether the member is the owner.
43- IsOwner () bool
4441 // GetOwnerID gets the owner ID.
4542 GetOwnerID (ctx context.Context ) (string , error )
4643 // Close stops compaining the owner.
@@ -75,7 +72,6 @@ type election struct {
7572 trimedKey string
7673 lg * zap.Logger
7774 etcdCli * clientv3.Client
78- elec atomic.Pointer [concurrency.Election ]
7975 wg waitgroup.WaitGroup
8076 cancel context.CancelFunc
8177 member Member
@@ -133,19 +129,17 @@ func (m *election) initSession(ctx context.Context) (*concurrency.Session, error
133129 return session , err
134130}
135131
136- func (m * election ) IsOwner () bool {
137- ownerID , err := m .GetOwnerID (context .Background ())
138- if err != nil {
139- return false
140- }
141- return ownerID == m .id
142- }
143-
144132func (m * election ) campaignLoop (ctx context.Context ) {
145133 session , err := m .initSession (ctx )
146134 if err != nil {
147135 return
148136 }
137+ isOwner := false
138+ defer func () {
139+ if isOwner {
140+ m .onRetired ()
141+ }
142+ }()
149143 for {
150144 select {
151145 case <- session .Done ():
@@ -172,34 +166,44 @@ func (m *election) campaignLoop(ctx context.Context) {
172166 continue
173167 }
174168
169+ // Retire after the etcd server can be connected so that there will always be an owner.
170+ // It's allowed if multiple members act as the owner but it's not allowed if no member acts as the owner.
171+ // E.g. at least one member needs to bind the VIP.
172+ if isOwner {
173+ m .onRetired ()
174+ isOwner = false
175+ }
176+
175177 elec := concurrency .NewElection (session , m .key )
176178 if err = elec .Campaign (ctx , m .id ); err != nil {
177179 m .lg .Info ("failed to campaign" , zap .Error (err ))
178180 continue
179181 }
180182
181- ownerID , err := m .GetOwnerID (ctx )
182- if err != nil || ownerID != m .id {
183+ kv , err := m .getOwnerInfo (ctx )
184+ if err != nil {
185+ m .lg .Warn ("failed to get owner info" , zap .Error (err ))
186+ continue
187+ }
188+ if hack .String (kv .Value ) != m .id {
189+ m .lg .Warn ("owner id mismatches" , zap .String ("owner" , hack .String (kv .Value )))
183190 continue
184191 }
185192
186- m .onElected (elec )
187- // NOTICE: watchOwner won't revoke the lease.
188- m .watchOwner (ctx , session , ownerID )
189- m .onRetired ()
193+ m .onElected ()
194+ isOwner = true
195+ m .watchOwner (ctx , session , hack .String (kv .Key ))
190196 }
191197}
192198
193- func (m * election ) onElected (elec * concurrency. Election ) {
199+ func (m * election ) onElected () {
194200 m .member .OnElected ()
195- m .elec .Store (elec )
196201 metrics .OwnerGauge .WithLabelValues (m .trimedKey ).Set (1 )
197202 m .lg .Info ("elected as the owner" )
198203}
199204
200205func (m * election ) onRetired () {
201206 m .member .OnRetired ()
202- m .elec .Store (nil )
203207 // Delete the metric so that it doesn't show on Grafana.
204208 metrics .OwnerGauge .MetricVec .DeletePartialMatch (map [string ]string {metrics .LblType : m .trimedKey })
205209 m .lg .Info ("the owner retires" )
@@ -218,17 +222,25 @@ func (m *election) revokeLease(leaseID clientv3.LeaseID) {
218222
219223// GetOwnerID is similar to concurrency.Election.Leader() but it doesn't need an concurrency.Election.
220224func (m * election ) GetOwnerID (ctx context.Context ) (string , error ) {
225+ kv , err := m .getOwnerInfo (ctx )
226+ if err != nil {
227+ return "" , err
228+ }
229+ return hack .String (kv .Value ), nil
230+ }
231+
232+ func (m * election ) getOwnerInfo (ctx context.Context ) (* mvccpb.KeyValue , error ) {
221233 if m .etcdCli == nil {
222- return "" , concurrency .ErrElectionNoLeader
234+ return nil , concurrency .ErrElectionNoLeader
223235 }
224236 kvs , err := etcd .GetKVs (ctx , m .etcdCli , m .key , clientv3 .WithFirstCreate (), m .cfg .Timeout , m .cfg .RetryIntvl , m .cfg .RetryCnt )
225237 if err != nil {
226- return "" , err
238+ return nil , err
227239 }
228240 if len (kvs ) == 0 {
229- return "" , concurrency .ErrElectionNoLeader
241+ return nil , concurrency .ErrElectionNoLeader
230242 }
231- return hack . String ( kvs [0 ]. Value ) , nil
243+ return kvs [0 ], nil
232244}
233245
234246func (m * election ) watchOwner (ctx context.Context , session * concurrency.Session , key string ) {
0 commit comments