Skip to content

Commit 436979f

Browse files
feat(options): Clean failing timeout implementation (#3472)
* Fix hard code of failing timeout 1. if not set failing time limit, default is 15 seconds. * feat: Complete configurable FailingTimeoutSeconds implementation --------- Co-authored-by: Shino Wu <[email protected]>
1 parent 6220024 commit 436979f

File tree

5 files changed

+130
-25
lines changed

5 files changed

+130
-25
lines changed

options.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,11 @@ type Options struct {
231231
// UnstableResp3 enables Unstable mode for Redis Search module with RESP3.
232232
// When unstable mode is enabled, the client will use RESP3 protocol and only be able to use RawResult
233233
UnstableResp3 bool
234+
235+
// FailingTimeoutSeconds is the timeout in seconds for marking a cluster node as failing.
236+
// When a node is marked as failing, it will be avoided for this duration.
237+
// Default is 15 seconds.
238+
FailingTimeoutSeconds int
234239
}
235240

236241
func (opt *Options) init() {

osscluster.go

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,11 @@ type ClusterOptions struct {
124124

125125
// UnstableResp3 enables Unstable mode for Redis Search module with RESP3.
126126
UnstableResp3 bool
127+
128+
// FailingTimeoutSeconds is the timeout in seconds for marking a cluster node as failing.
129+
// When a node is marked as failing, it will be avoided for this duration.
130+
// Default is 15 seconds.
131+
FailingTimeoutSeconds int
127132
}
128133

129134
func (opt *ClusterOptions) init() {
@@ -180,6 +185,10 @@ func (opt *ClusterOptions) init() {
180185
if opt.NewClient == nil {
181186
opt.NewClient = NewClient
182187
}
188+
189+
if opt.FailingTimeoutSeconds == 0 {
190+
opt.FailingTimeoutSeconds = 15
191+
}
183192
}
184193

185194
// ParseClusterURL parses a URL into ClusterOptions that can be used to connect to Redis.
@@ -284,6 +293,7 @@ func setupClusterQueryParams(u *url.URL, o *ClusterOptions) (*ClusterOptions, er
284293
o.PoolTimeout = q.duration("pool_timeout")
285294
o.ConnMaxLifetime = q.duration("conn_max_lifetime")
286295
o.ConnMaxIdleTime = q.duration("conn_max_idle_time")
296+
o.FailingTimeoutSeconds = q.int("failing_timeout_seconds")
287297

288298
if q.err != nil {
289299
return nil, q.err
@@ -330,20 +340,21 @@ func (opt *ClusterOptions) clientOptions() *Options {
330340
WriteTimeout: opt.WriteTimeout,
331341
ContextTimeoutEnabled: opt.ContextTimeoutEnabled,
332342

333-
PoolFIFO: opt.PoolFIFO,
334-
PoolSize: opt.PoolSize,
335-
PoolTimeout: opt.PoolTimeout,
336-
MinIdleConns: opt.MinIdleConns,
337-
MaxIdleConns: opt.MaxIdleConns,
338-
MaxActiveConns: opt.MaxActiveConns,
339-
ConnMaxIdleTime: opt.ConnMaxIdleTime,
340-
ConnMaxLifetime: opt.ConnMaxLifetime,
341-
ReadBufferSize: opt.ReadBufferSize,
342-
WriteBufferSize: opt.WriteBufferSize,
343-
DisableIdentity: opt.DisableIdentity,
344-
DisableIndentity: opt.DisableIdentity,
345-
IdentitySuffix: opt.IdentitySuffix,
346-
TLSConfig: opt.TLSConfig,
343+
PoolFIFO: opt.PoolFIFO,
344+
PoolSize: opt.PoolSize,
345+
PoolTimeout: opt.PoolTimeout,
346+
MinIdleConns: opt.MinIdleConns,
347+
MaxIdleConns: opt.MaxIdleConns,
348+
MaxActiveConns: opt.MaxActiveConns,
349+
ConnMaxIdleTime: opt.ConnMaxIdleTime,
350+
ConnMaxLifetime: opt.ConnMaxLifetime,
351+
ReadBufferSize: opt.ReadBufferSize,
352+
WriteBufferSize: opt.WriteBufferSize,
353+
DisableIdentity: opt.DisableIdentity,
354+
DisableIndentity: opt.DisableIdentity,
355+
IdentitySuffix: opt.IdentitySuffix,
356+
FailingTimeoutSeconds: opt.FailingTimeoutSeconds,
357+
TLSConfig: opt.TLSConfig,
347358
// If ClusterSlots is populated, then we probably have an artificial
348359
// cluster whose nodes are not in clustering mode (otherwise there isn't
349360
// much use for ClusterSlots config). This means we cannot execute the
@@ -432,7 +443,7 @@ func (n *clusterNode) MarkAsFailing() {
432443
}
433444

434445
func (n *clusterNode) Failing() bool {
435-
const timeout = 15 // 15 seconds
446+
timeout := int64(n.Client.opt.FailingTimeoutSeconds)
436447

437448
failing := atomic.LoadUint32(&n.failing)
438449
if failing == 0 {

osscluster_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1665,6 +1665,10 @@ var _ = Describe("ClusterClient ParseURL", func() {
16651665
test: "UseDefault",
16661666
url: "redis://localhost:123?conn_max_idle_time=",
16671667
o: &redis.ClusterOptions{Addrs: []string{"localhost:123"}, ConnMaxIdleTime: 0},
1668+
}, {
1669+
test: "FailingTimeoutSeconds",
1670+
url: "redis://localhost:123?failing_timeout_seconds=25",
1671+
o: &redis.ClusterOptions{Addrs: []string{"localhost:123"}, FailingTimeoutSeconds: 25},
16681672
}, {
16691673
test: "Protocol",
16701674
url: "redis://localhost:123?protocol=2",
@@ -1729,7 +1733,79 @@ var _ = Describe("ClusterClient ParseURL", func() {
17291733
Expect(tc.o.ConnMaxLifetime).To(Equal(actual.ConnMaxLifetime))
17301734
Expect(tc.o.ConnMaxIdleTime).To(Equal(actual.ConnMaxIdleTime))
17311735
Expect(tc.o.PoolTimeout).To(Equal(actual.PoolTimeout))
1736+
Expect(tc.o.FailingTimeoutSeconds).To(Equal(actual.FailingTimeoutSeconds))
17321737
}
17331738
}
17341739
})
17351740
})
1741+
1742+
var _ = Describe("ClusterClient FailingTimeoutSeconds", func() {
1743+
var client *redis.ClusterClient
1744+
1745+
AfterEach(func() {
1746+
if client != nil {
1747+
_ = client.Close()
1748+
}
1749+
})
1750+
1751+
It("should use default failing timeout of 15 seconds", func() {
1752+
opt := redisClusterOptions()
1753+
client = cluster.newClusterClient(ctx, opt)
1754+
1755+
// Default should be 15 seconds
1756+
Expect(opt.FailingTimeoutSeconds).To(Equal(15))
1757+
})
1758+
1759+
It("should use custom failing timeout", func() {
1760+
opt := redisClusterOptions()
1761+
opt.FailingTimeoutSeconds = 30
1762+
client = cluster.newClusterClient(ctx, opt)
1763+
1764+
// Should use custom value
1765+
Expect(opt.FailingTimeoutSeconds).To(Equal(30))
1766+
})
1767+
1768+
It("should parse failing_timeout_seconds from URL", func() {
1769+
url := "redis://localhost:16600?failing_timeout_seconds=25"
1770+
opt, err := redis.ParseClusterURL(url)
1771+
Expect(err).NotTo(HaveOccurred())
1772+
Expect(opt.FailingTimeoutSeconds).To(Equal(25))
1773+
})
1774+
1775+
It("should handle node failing timeout correctly", func() {
1776+
opt := redisClusterOptions()
1777+
opt.FailingTimeoutSeconds = 2 // Short timeout for testing
1778+
client = cluster.newClusterClient(ctx, opt)
1779+
1780+
// Get a node and mark it as failing
1781+
nodes, err := client.Nodes(ctx, "A")
1782+
Expect(err).NotTo(HaveOccurred())
1783+
Expect(len(nodes)).To(BeNumerically(">", 0))
1784+
1785+
node := nodes[0]
1786+
1787+
// Initially not failing
1788+
Expect(node.Failing()).To(BeFalse())
1789+
1790+
// Mark as failing
1791+
node.MarkAsFailing()
1792+
Expect(node.Failing()).To(BeTrue())
1793+
1794+
// Should still be failing after 1 second (less than timeout)
1795+
time.Sleep(1 * time.Second)
1796+
Expect(node.Failing()).To(BeTrue())
1797+
1798+
// Should not be failing after timeout expires
1799+
time.Sleep(2 * time.Second) // Total 3 seconds > 2 second timeout
1800+
Expect(node.Failing()).To(BeFalse())
1801+
})
1802+
1803+
It("should handle zero timeout by using default", func() {
1804+
opt := redisClusterOptions()
1805+
opt.FailingTimeoutSeconds = 0 // Should use default
1806+
client = cluster.newClusterClient(ctx, opt)
1807+
1808+
// After initialization, should be set to default
1809+
Expect(opt.FailingTimeoutSeconds).To(Equal(15))
1810+
})
1811+
})

sentinel.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,13 @@ type FailoverOptions struct {
129129
DisableIdentity bool
130130

131131
IdentitySuffix string
132-
UnstableResp3 bool
132+
133+
// FailingTimeoutSeconds is the timeout in seconds for marking a cluster node as failing.
134+
// When a node is marked as failing, it will be avoided for this duration.
135+
// Only applies to failover cluster clients. Default is 15 seconds.
136+
FailingTimeoutSeconds int
137+
138+
UnstableResp3 bool
133139
}
134140

135141
func (opt *FailoverOptions) clientOptions() *Options {
@@ -263,10 +269,10 @@ func (opt *FailoverOptions) clusterOptions() *ClusterOptions {
263269

264270
TLSConfig: opt.TLSConfig,
265271

266-
DisableIdentity: opt.DisableIdentity,
267-
DisableIndentity: opt.DisableIndentity,
268-
269-
IdentitySuffix: opt.IdentitySuffix,
272+
DisableIdentity: opt.DisableIdentity,
273+
DisableIndentity: opt.DisableIndentity,
274+
IdentitySuffix: opt.IdentitySuffix,
275+
FailingTimeoutSeconds: opt.FailingTimeoutSeconds,
270276
}
271277
}
272278

universal.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,13 @@ type UniversalOptions struct {
9898
DisableIdentity bool
9999

100100
IdentitySuffix string
101-
UnstableResp3 bool
101+
102+
// FailingTimeoutSeconds is the timeout in seconds for marking a cluster node as failing.
103+
// When a node is marked as failing, it will be avoided for this duration.
104+
// Only applies to cluster clients. Default is 15 seconds.
105+
FailingTimeoutSeconds int
106+
107+
UnstableResp3 bool
102108

103109
// IsClusterMode can be used when only one Addrs is provided (e.g. Elasticache supports setting up cluster mode with configuration endpoint).
104110
IsClusterMode bool
@@ -149,10 +155,11 @@ func (o *UniversalOptions) Cluster() *ClusterOptions {
149155

150156
TLSConfig: o.TLSConfig,
151157

152-
DisableIdentity: o.DisableIdentity,
153-
DisableIndentity: o.DisableIndentity,
154-
IdentitySuffix: o.IdentitySuffix,
155-
UnstableResp3: o.UnstableResp3,
158+
DisableIdentity: o.DisableIdentity,
159+
DisableIndentity: o.DisableIndentity,
160+
IdentitySuffix: o.IdentitySuffix,
161+
FailingTimeoutSeconds: o.FailingTimeoutSeconds,
162+
UnstableResp3: o.UnstableResp3,
156163
}
157164
}
158165

0 commit comments

Comments
 (0)