Skip to content

Commit 508e798

Browse files
authored
transition to oos from oos for load balanced multinode rpc (#66)
* transition to oos from oos * review
1 parent d49ca2e commit 508e798

File tree

4 files changed

+7
-4
lines changed

4 files changed

+7
-4
lines changed

multinode/node_fsm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) transitionToOutOfSync(fn func()) {
239239
return
240240
}
241241
switch n.state {
242-
case nodeStateAlive:
242+
case nodeStateAlive, nodeStateOutOfSync:
243243
n.rpc.Close()
244244
n.state = nodeStateOutOfSync
245245
default:

multinode/node_fsm_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func TestUnit_Node_StateTransitions(t *testing.T) {
4949
})
5050
t.Run("transitionToOutOfSync", func(t *testing.T) {
5151
const destinationState = nodeStateOutOfSync
52-
allowedStates := []nodeState{nodeStateAlive}
52+
allowedStates := []nodeState{nodeStateAlive, nodeStateOutOfSync}
5353
rpc := newMockRPCClient[ID, Head](t)
5454
rpc.On("Close")
5555
testTransition(t, rpc, testNode.transitionToOutOfSync, destinationState, allowedStates...)

multinode/node_lifecycle.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,8 @@ func (n *node[CHAIN_ID, HEAD, RPC]) outOfSyncLoop(syncIssues syncStatus) {
461461
if n.poolInfoProvider != nil {
462462
if l, _ := n.poolInfoProvider.LatestChainInfo(); l < 1 {
463463
if n.isLoadBalancedRPC {
464-
n.declareUnreachable()
464+
// in case all rpcs behind a load balanced rpc are out of sync, we need to declare out of sync to prevent false transition to alive
465+
n.declareOutOfSync(syncIssues)
465466
return
466467
}
467468
lggr.Criticalw("RPC endpoint is still out of sync, but there are no other available nodes. This RPC node will be forcibly moved back into the live pool in a degraded state", "syncIssues", syncIssues)

multinode/node_lifecycle_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,9 @@ func TestUnit_NodeLifecycle_aliveLoop(t *testing.T) {
227227
rpc.On("ClientVersion", mock.Anything).Return("", pollError)
228228
node.declareAlive()
229229
tests.AssertLogEventually(t, observedLogs, fmt.Sprintf("RPC endpoint failed to respond to %d consecutive polls", pollFailureThreshold))
230-
assert.Equal(t, nodeStateUnreachable, node.State())
230+
tests.AssertEventually(t, func() bool {
231+
return node.State() == nodeStateUnreachable
232+
})
231233
})
232234
t.Run("when behind more than SyncThreshold, transitions to out of sync", func(t *testing.T) {
233235
t.Parallel()

0 commit comments

Comments
 (0)