diff --git a/multinode/node_fsm.go b/multinode/node_fsm.go index 6ff532c..818363e 100644 --- a/multinode/node_fsm.go +++ b/multinode/node_fsm.go @@ -239,7 +239,7 @@ func (n *node[CHAIN_ID, HEAD, RPC]) transitionToOutOfSync(fn func()) { return } switch n.state { - case nodeStateAlive: + case nodeStateAlive, nodeStateOutOfSync: n.rpc.Close() n.state = nodeStateOutOfSync default: diff --git a/multinode/node_fsm_test.go b/multinode/node_fsm_test.go index 18f6210..17d312c 100644 --- a/multinode/node_fsm_test.go +++ b/multinode/node_fsm_test.go @@ -49,7 +49,7 @@ func TestUnit_Node_StateTransitions(t *testing.T) { }) t.Run("transitionToOutOfSync", func(t *testing.T) { const destinationState = nodeStateOutOfSync - allowedStates := []nodeState{nodeStateAlive} + allowedStates := []nodeState{nodeStateAlive, nodeStateOutOfSync} rpc := newMockRPCClient[ID, Head](t) rpc.On("Close") testTransition(t, rpc, testNode.transitionToOutOfSync, destinationState, allowedStates...) diff --git a/multinode/node_lifecycle.go b/multinode/node_lifecycle.go index 512a7db..e2974c0 100644 --- a/multinode/node_lifecycle.go +++ b/multinode/node_lifecycle.go @@ -461,7 +461,8 @@ func (n *node[CHAIN_ID, HEAD, RPC]) outOfSyncLoop(syncIssues syncStatus) { if n.poolInfoProvider != nil { if l, _ := n.poolInfoProvider.LatestChainInfo(); l < 1 { if n.isLoadBalancedRPC { - n.declareUnreachable() + // in case all rpcs behind a load balanced rpc are out of sync, we need to declare out of sync to prevent false transition to alive + n.declareOutOfSync(syncIssues) return } lggr.Criticalw("RPC endpoint is still out of sync, but there are no other available nodes. This RPC node will be forcibly moved back into the live pool in a degraded state", "syncIssues", syncIssues) diff --git a/multinode/node_lifecycle_test.go b/multinode/node_lifecycle_test.go index ba1ade7..684d0c7 100644 --- a/multinode/node_lifecycle_test.go +++ b/multinode/node_lifecycle_test.go @@ -227,7 +227,9 @@ func TestUnit_NodeLifecycle_aliveLoop(t *testing.T) { rpc.On("ClientVersion", mock.Anything).Return("", pollError) node.declareAlive() tests.AssertLogEventually(t, observedLogs, fmt.Sprintf("RPC endpoint failed to respond to %d consecutive polls", pollFailureThreshold)) - assert.Equal(t, nodeStateUnreachable, node.State()) + tests.AssertEventually(t, func() bool { + return node.State() == nodeStateUnreachable + }) }) t.Run("when behind more than SyncThreshold, transitions to out of sync", func(t *testing.T) { t.Parallel()