Skip to content

Commit fb6c2e8

Browse files
authored
feat: add commited depth field to status protocol (#4892)
1 parent e161ee8 commit fb6c2e8

File tree

18 files changed

+151
-92
lines changed

18 files changed

+151
-92
lines changed

openapi/SwarmCommon.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,8 @@ components:
928928
type: boolean
929929
lastSyncedBlock:
930930
type: integer
931+
committedDepth:
932+
type: integer
931933

932934
StatusResponse:
933935
type: object

pkg/api/api_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,6 @@ func createRedistributionAgentService(
712712
tranService,
713713
&mockHealth{},
714714
log.Noop,
715-
0,
716715
)
717716
}
718717

pkg/api/status.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type statusSnapshotResponse struct {
3030
BatchCommitment uint64 `json:"batchCommitment"`
3131
IsReachable bool `json:"isReachable"`
3232
LastSyncedBlock uint64 `json:"lastSyncedBlock"`
33+
CommittedDepth uint8 `json:"committedDepth"`
3334
}
3435

3536
type statusResponse struct {
@@ -94,6 +95,7 @@ func (s *Service) statusGetHandler(w http.ResponseWriter, _ *http.Request) {
9495
BatchCommitment: ss.BatchCommitment,
9596
IsReachable: ss.IsReachable,
9697
LastSyncedBlock: ss.LastSyncedBlock,
98+
CommittedDepth: uint8(ss.CommittedDepth),
9799
})
98100
}
99101

@@ -141,6 +143,7 @@ func (s *Service) statusGetPeersHandler(w http.ResponseWriter, r *http.Request)
141143
snapshot.BatchCommitment = ss.BatchCommitment
142144
snapshot.IsReachable = ss.IsReachable
143145
snapshot.LastSyncedBlock = ss.LastSyncedBlock
146+
snapshot.CommittedDepth = uint8(ss.CommittedDepth)
144147
}
145148

146149
mu.Lock()

pkg/api/status_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ func TestGetStatus(t *testing.T) {
4040
BatchCommitment: 1,
4141
IsReachable: true,
4242
LastSyncedBlock: 6092500,
43+
CommittedDepth: 1,
4344
}
4445

4546
ssMock := &statusSnapshotMock{
@@ -49,6 +50,7 @@ func TestGetStatus(t *testing.T) {
4950
storageRadius: ssr.StorageRadius,
5051
commitment: ssr.BatchCommitment,
5152
chainState: &postage.ChainState{Block: ssr.LastSyncedBlock},
53+
committedDepth: ssr.CommittedDepth,
5254
}
5355

5456
statusSvc := status.NewService(
@@ -122,6 +124,7 @@ type statusSnapshotMock struct {
122124
commitment uint64
123125
chainState *postage.ChainState
124126
neighborhoods []*storer.NeighborhoodStat
127+
committedDepth uint8
125128
}
126129

127130
func (m *statusSnapshotMock) SyncRate() float64 { return m.syncRate }
@@ -135,3 +138,4 @@ func (m *statusSnapshotMock) ReserveSizeWithinRadius() uint64 {
135138
func (m *statusSnapshotMock) NeighborhoodsStat(ctx context.Context) ([]*storer.NeighborhoodStat, error) {
136139
return m.neighborhoods, nil
137140
}
141+
func (m *statusSnapshotMock) CommittedDepth() uint8 { return m.committedDepth }

pkg/node/node.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -907,7 +907,7 @@ func NewBee(
907907
return nil, fmt.Errorf("status service: %w", err)
908908
}
909909

910-
saludService := salud.New(nodeStatus, kad, localStore, logger, warmupTime, api.FullMode.String(), salud.DefaultMinPeersPerBin, salud.DefaultDurPercentile, salud.DefaultConnsPercentile, uint8(o.ReserveCapacityDoubling))
910+
saludService := salud.New(nodeStatus, kad, localStore, logger, warmupTime, api.FullMode.String(), salud.DefaultMinPeersPerBin, salud.DefaultDurPercentile, salud.DefaultConnsPercentile)
911911
b.saludCloser = saludService
912912

913913
rC, unsub := saludService.SubscribeNetworkStorageRadius()
@@ -1086,7 +1086,6 @@ func NewBee(
10861086
transactionService,
10871087
saludService,
10881088
logger,
1089-
uint8(o.ReserveCapacityDoubling),
10901089
)
10911090
if err != nil {
10921091
return nil, fmt.Errorf("storage incentives agent: %w", err)

pkg/salud/salud.go

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,6 @@ type service struct {
5252

5353
radiusSubsMtx sync.Mutex
5454
radiusC []chan uint8
55-
56-
capacityDoubling uint8
5755
}
5856

5957
func New(
@@ -66,20 +64,18 @@ func New(
6664
minPeersPerbin int,
6765
durPercentile float64,
6866
connsPercentile float64,
69-
capacityDoubling uint8,
7067
) *service {
7168

7269
metrics := newMetrics()
7370

7471
s := &service{
75-
quit: make(chan struct{}),
76-
logger: logger.WithName(loggerName).Register(),
77-
status: status,
78-
topology: topology,
79-
metrics: metrics,
80-
isSelfHealthy: atomic.NewBool(true),
81-
reserve: reserve,
82-
capacityDoubling: capacityDoubling,
72+
quit: make(chan struct{}),
73+
logger: logger.WithName(loggerName).Register(),
74+
status: status,
75+
topology: topology,
76+
metrics: metrics,
77+
isSelfHealthy: atomic.NewBool(true),
78+
reserve: reserve,
8379
}
8480

8581
s.wg.Add(1)
@@ -173,7 +169,7 @@ func (s *service) salud(mode string, minPeersPerbin int, durPercentile float64,
173169
return
174170
}
175171

176-
networkRadius, nHoodRadius := s.radius(peers)
172+
networkRadius, nHoodRadius := s.committedDepth(peers)
177173
avgDur := totaldur / float64(len(peers))
178174
pDur := percentileDur(peers, durPercentile)
179175
pConns := percentileConns(peers, connsPercentile)
@@ -199,8 +195,8 @@ func (s *service) salud(mode string, minPeersPerbin int, durPercentile float64,
199195
continue
200196
}
201197

202-
if networkRadius > 0 && peer.status.StorageRadius < uint32(networkRadius-2) {
203-
s.logger.Debug("radius health failure", "radius", peer.status.StorageRadius, "peer_address", peer.addr)
198+
if networkRadius > 0 && peer.status.CommittedDepth < uint32(networkRadius-2) {
199+
s.logger.Debug("radius health failure", "radius", peer.status.CommittedDepth, "peer_address", peer.addr)
204200
} else if peer.dur.Seconds() > pDur {
205201
s.logger.Debug("response duration below threshold", "duration", peer.dur, "peer_address", peer.addr)
206202
} else if peer.status.ConnectedPeers < pConns {
@@ -220,12 +216,10 @@ func (s *service) salud(mode string, minPeersPerbin int, durPercentile float64,
220216
}
221217
}
222218

223-
networkRadiusEstimation := s.reserve.StorageRadius() + s.capacityDoubling
224-
225219
selfHealth := true
226-
if nHoodRadius == networkRadius && networkRadiusEstimation != networkRadius {
220+
if nHoodRadius == networkRadius && s.reserve.CommittedDepth() != networkRadius {
227221
selfHealth = false
228-
s.logger.Warning("node is unhealthy due to storage radius discrepancy", "self_radius", networkRadiusEstimation, "network_radius", networkRadius)
222+
s.logger.Warning("node is unhealthy due to storage radius discrepancy", "self_radius", s.reserve.CommittedDepth(), "network_radius", networkRadius)
229223
}
230224

231225
s.isSelfHealthy.Store(selfHealth)
@@ -294,24 +288,24 @@ func percentileConns(peers []peer, p float64) uint64 {
294288
}
295289

296290
// radius finds the most common radius.
297-
func (s *service) radius(peers []peer) (uint8, uint8) {
291+
func (s *service) committedDepth(peers []peer) (uint8, uint8) {
298292

299-
var networkRadius [swarm.MaxBins]int
300-
var nHoodRadius [swarm.MaxBins]int
293+
var networkDepth [swarm.MaxBins]int
294+
var nHoodDepth [swarm.MaxBins]int
301295

302296
for _, peer := range peers {
303-
if peer.status.StorageRadius < uint32(swarm.MaxBins) {
297+
if peer.status.CommittedDepth < uint32(swarm.MaxBins) {
304298
if peer.neighbor {
305-
nHoodRadius[peer.status.StorageRadius]++
299+
nHoodDepth[peer.status.CommittedDepth]++
306300
}
307-
networkRadius[peer.status.StorageRadius]++
301+
networkDepth[peer.status.CommittedDepth]++
308302
}
309303
}
310304

311-
networkR := maxIndex(networkRadius[:])
312-
hoodR := maxIndex(nHoodRadius[:])
305+
networkD := maxIndex(networkDepth[:])
306+
hoodD := maxIndex(nHoodDepth[:])
313307

314-
return uint8(networkR), uint8(hoodR)
308+
return uint8(networkD), uint8(hoodD)
315309
}
316310

317311
// commitment finds the most common batch commitment.

pkg/salud/salud_test.go

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,28 +31,28 @@ func TestSalud(t *testing.T) {
3131
t.Parallel()
3232
peers := []peer{
3333
// fully healhy
34-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 1, true},
35-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 1, true},
36-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 1, true},
37-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 1, true},
38-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 1, true},
39-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 1, true},
34+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 8}, 1, true},
35+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 8}, 1, true},
36+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 8}, 1, true},
37+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 8}, 1, true},
38+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 8}, 1, true},
39+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 8}, 1, true},
4040

4141
// healthy since radius >= most common radius - 2
42-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 7, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 1, true},
42+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 7, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 7}, 1, true},
4343

4444
// radius too low
45-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 5, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 1, false},
45+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 5, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 5}, 1, false},
4646

4747
// dur too long
48-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 2, false},
49-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 2, false},
48+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 8}, 2, false},
49+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 8}, 2, false},
5050

5151
// connections not enough
52-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 90, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100}, 1, false},
52+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 90, StorageRadius: 8, BeeMode: "full", BatchCommitment: 50, ReserveSize: 100, CommittedDepth: 8}, 1, false},
5353

5454
// commitment wrong
55-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 35, ReserveSize: 100}, 1, false},
55+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", BatchCommitment: 35, ReserveSize: 100, CommittedDepth: 8}, 1, false},
5656
}
5757

5858
statusM := &statusMock{make(map[string]peer)}
@@ -66,11 +66,12 @@ func TestSalud(t *testing.T) {
6666
topM := topMock.NewTopologyDriver(topMock.WithPeers(addrs...))
6767

6868
reserve := mockstorer.NewReserve(
69-
mockstorer.WithRadius(8),
69+
mockstorer.WithRadius(6),
7070
mockstorer.WithReserveSize(100),
71+
mockstorer.WithCapacityDoubling(2),
7172
)
7273

73-
service := salud.New(statusM, topM, reserve, log.Noop, -1, "full", 0, 0.8, 0.8, 0)
74+
service := salud.New(statusM, topM, reserve, log.Noop, -1, "full", 0, 0.8, 0.8)
7475

7576
err := spinlock.Wait(time.Minute, func() bool {
7677
return len(topM.PeersHealth()) == len(peers)
@@ -114,9 +115,10 @@ func TestSelfUnhealthyRadius(t *testing.T) {
114115
reserve := mockstorer.NewReserve(
115116
mockstorer.WithRadius(7),
116117
mockstorer.WithReserveSize(100),
118+
mockstorer.WithCapacityDoubling(0),
117119
)
118120

119-
service := salud.New(statusM, topM, reserve, log.Noop, -1, "full", 0, 0.8, 0.8, 0)
121+
service := salud.New(statusM, topM, reserve, log.Noop, -1, "full", 0, 0.8, 0.8)
120122
testutil.CleanupCloser(t, service)
121123

122124
err := spinlock.Wait(time.Minute, func() bool {
@@ -135,8 +137,8 @@ func TestSelfHealthyCapacityDoubling(t *testing.T) {
135137
t.Parallel()
136138
peers := []peer{
137139
// fully healhy
138-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full"}, 0, true},
139-
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full"}, 0, true},
140+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", CommittedDepth: 8}, 0, true},
141+
{swarm.RandAddress(t), &status.Snapshot{ConnectedPeers: 100, StorageRadius: 8, BeeMode: "full", CommittedDepth: 8}, 0, true},
140142
}
141143

142144
statusM := &statusMock{make(map[string]peer)}
@@ -151,9 +153,10 @@ func TestSelfHealthyCapacityDoubling(t *testing.T) {
151153
reserve := mockstorer.NewReserve(
152154
mockstorer.WithRadius(6),
153155
mockstorer.WithReserveSize(100),
156+
mockstorer.WithCapacityDoubling(2),
154157
)
155158

156-
service := salud.New(statusM, topM, reserve, log.Noop, -1, "full", 0, 0.8, 0.8, 2)
159+
service := salud.New(statusM, topM, reserve, log.Noop, -1, "full", 0, 0.8, 0.8)
157160
testutil.CleanupCloser(t, service)
158161

159162
err := spinlock.Wait(time.Minute, func() bool {
@@ -183,7 +186,7 @@ func TestSubToRadius(t *testing.T) {
183186

184187
topM := topMock.NewTopologyDriver(topMock.WithPeers(addrs...))
185188

186-
service := salud.New(&statusMock{make(map[string]peer)}, topM, mockstorer.NewReserve(), log.Noop, -1, "full", 0, 0.8, 0.8, 0)
189+
service := salud.New(&statusMock{make(map[string]peer)}, topM, mockstorer.NewReserve(), log.Noop, -1, "full", 0, 0.8, 0.8)
187190

188191
c, unsub := service.SubscribeNetworkStorageRadius()
189192
t.Cleanup(unsub)
@@ -216,7 +219,7 @@ func TestUnsub(t *testing.T) {
216219

217220
topM := topMock.NewTopologyDriver(topMock.WithPeers(addrs...))
218221

219-
service := salud.New(&statusMock{make(map[string]peer)}, topM, mockstorer.NewReserve(), log.Noop, -1, "full", 0, 0.8, 0.8, 0)
222+
service := salud.New(&statusMock{make(map[string]peer)}, topM, mockstorer.NewReserve(), log.Noop, -1, "full", 0, 0.8, 0.8)
220223
testutil.CleanupCloser(t, service)
221224

222225
c, unsub := service.SubscribeNetworkStorageRadius()

0 commit comments

Comments
 (0)