Skip to content

Commit 2eb1b6b

Browse files
fix(salud): use exponential backoff for wake up of salud check (#5109)
1 parent 97bcd68 commit 2eb1b6b

File tree

3 files changed

+17
-6
lines changed

3 files changed

+17
-6
lines changed

pkg/pingpong/pingpong.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222
)
2323

2424
// loggerName is the tree path name of the logger for this package.
25-
const loggerName = "pinpong"
25+
const loggerName = "pingpong"
2626

2727
const (
2828
protocolName = "pingpong"

pkg/salud/salud.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@ import (
2525
const loggerName = "salud"
2626

2727
const (
28-
wakeup = time.Minute * 5
2928
requestTimeout = time.Second * 10
29+
initialBackoffDelay = 10 * time.Second
30+
maxBackoffDelay = 5 * time.Minute
31+
backoffFactor = 2
3032
DefaultMinPeersPerBin = 4
3133
DefaultDurPercentile = 0.4 // consider 40% as healthy, lower percentile = stricter duration check
3234
DefaultConnsPercentile = 0.8 // consider 80% as healthy, lower percentile = stricter conns check
@@ -97,14 +99,20 @@ func (s *service) worker(startupStabilizer stabilization.Subscriber, mode string
9799
s.logger.Debug("node warmup check completed")
98100
}
99101

100-
for {
102+
currentDelay := initialBackoffDelay
101103

104+
for {
102105
s.salud(mode, minPeersPerbin, durPercentile, connsPercentile)
103106

104107
select {
105108
case <-s.quit:
106109
return
107-
case <-time.After(wakeup):
110+
case <-time.After(currentDelay):
111+
}
112+
113+
currentDelay *= time.Duration(backoffFactor)
114+
if currentDelay > maxBackoffDelay {
115+
currentDelay = maxBackoffDelay
108116
}
109117
}
110118
}
@@ -135,7 +143,7 @@ func (s *service) salud(mode string, minPeersPerbin int, durPercentile float64,
135143
bins [swarm.MaxBins]int
136144
)
137145

138-
_ = s.topology.EachConnectedPeer(func(addr swarm.Address, bin uint8) (stop bool, jumpToNext bool, err error) {
146+
err := s.topology.EachConnectedPeer(func(addr swarm.Address, bin uint8) (stop bool, jumpToNext bool, err error) {
139147
wg.Add(1)
140148
go func() {
141149
defer wg.Done()
@@ -164,6 +172,9 @@ func (s *service) salud(mode string, minPeersPerbin int, durPercentile float64,
164172
}()
165173
return false, false, nil
166174
}, topology.Select{})
175+
if err != nil {
176+
s.logger.Error(err, "error iterating over connected peers", "mode", mode)
177+
}
167178

168179
wg.Wait()
169180

pkg/storer/internal/stampindex/stampindex.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ func Delete(s storage.Writer, scope string, stamp swarm.Stamp) error {
213213
StampIndex: stamp.Index(),
214214
}
215215
if err := s.Delete(item); err != nil {
216-
return fmt.Errorf("failed to delete stampindex.Item %s: %w", item, err)
216+
return fmt.Errorf("failed to delete stampindex.Item %s: %w", item.ID(), err)
217217
}
218218
return nil
219219
}

0 commit comments

Comments
 (0)