Skip to content

Commit 54a7ac4

Browse files
authored
Improve the failover efficiency (#362)
1 parent e043a82 commit 54a7ac4

File tree

2 files changed

+15
-11
lines changed

2 files changed

+15
-11
lines changed

controller/cluster.go

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -138,27 +138,30 @@ func (c *ClusterChecker) increaseFailureCount(shardIndex int, node store.Node) i
138138
}
139139

140140
log := logger.Get().With(
141+
zap.String("cluster_name", c.clusterName),
141142
zap.String("id", node.ID()),
142143
zap.Bool("is_master", node.IsMaster()),
143144
zap.String("addr", node.Addr()))
144-
if count%c.options.maxFailureCount == 0 {
145+
if count%c.options.maxFailureCount == 0 || count > c.options.maxFailureCount {
145146
cluster, err := c.clusterStore.GetCluster(c.ctx, c.namespace, c.clusterName)
146147
if err != nil {
147-
log.Error("Failed to get the clusterName info", zap.Error(err))
148+
log.Error("Failed to get the cluster info", zap.Error(err))
148149
return count
149150
}
150151
newMasterID, err := cluster.PromoteNewMaster(c.ctx, shardIndex, node.ID(), "")
151-
if err == nil {
152-
// the node is normal if it can be elected as the new master,
153-
// because it requires the node is healthy.
154-
c.resetFailureCount(newMasterID)
155-
err = c.clusterStore.UpdateCluster(c.ctx, c.namespace, cluster)
156-
}
157152
if err != nil {
158153
log.Error("Failed to promote the new master", zap.Error(err))
159-
} else {
160-
log.With(zap.String("new_master_id", newMasterID)).Info("Promote the new master")
154+
return count
155+
}
156+
err = c.clusterStore.UpdateCluster(c.ctx, c.namespace, cluster)
157+
if err != nil {
158+
log.Error("Failed to update the cluster", zap.Error(err))
159+
return count
161160
}
161+
// the node is normal if it can be elected as the new master,
162+
// because it requires the node is healthy.
163+
c.resetFailureCount(newMasterID)
164+
log.With(zap.String("new_master_id", newMasterID)).Info("Promote the new master")
162165
}
163166
return count
164167
}
@@ -216,6 +219,7 @@ func (c *ClusterChecker) parallelProbeNodes(ctx context.Context, cluster *store.
216219
go func(shardIdx int, n store.Node) {
217220
defer wg.Done()
218221
log := logger.Get().With(
222+
zap.String("cluster_name", c.clusterName),
219223
zap.String("id", n.ID()),
220224
zap.Bool("is_master", n.IsMaster()),
221225
zap.String("addr", n.Addr()),

store/cluster_node.go

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ const (
4646
dialTimeout = 3200 * time.Millisecond
4747
readTimeout = 3 * time.Second
4848
writeTimeout = 3 * time.Second
49-
minIdleConns = 3
49+
minIdleConns = 10
5050
)
5151

5252
var (

0 commit comments

Comments
 (0)