Skip to content

Commit f74bf66

Browse files
Merge pull request #5258 from IntersectMBO/mw/bootstrap-timeout
Respect bootstrap peers timeout
2 parents 440ffb8 + 187e08d commit f74bf66

File tree

7 files changed

+212
-63
lines changed

7 files changed

+212
-63
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<!--
2+
A new scriv changelog fragment.
3+
4+
Uncomment the section that is right (remove the HTML comment wrapper).
5+
For top level release notes, leave all the headers commented out.
6+
-->
7+
8+
<!--
9+
### Breaking
10+
11+
- A bullet item for the Breaking category.
12+
13+
-->
14+
15+
### Non-Breaking
16+
17+
- Ensure timeout to enter sensitive state when bootstrap peers
18+
are enabled is always respected.
19+
- Added test for timeout

cardano-diffusion/lib/Cardano/Network/PeerSelection/Governor/Monitor.hs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,6 +636,7 @@ waitForSystemToQuiesce st@PeerSelectionState{
636636
, extraState = cpst@Cardano.ExtraState {
637637
Cardano.ledgerStateJudgement
638638
, Cardano.bootstrapPeersFlag
639+
, Cardano.bootstrapPeersTimeout
639640
, Cardano.hasOnlyBootstrapPeers
640641
}
641642
}
@@ -672,7 +673,13 @@ waitForSystemToQuiesce st@PeerSelectionState{
672673
}
673674
}
674675
}
675-
| otherwise = GuardedSkip Nothing
676+
-- We install this timeout here for the corner case where following all hold:
677+
-- a) The timeout expires
678+
-- b) There is nothing else that the governor needs to wake up for to handle
679+
-- This is necessary since this timeout is a pure value which is handled in the
680+
-- governor loop, and otherwise it is inert and its expiration cannot cause
681+
-- an iteration of the loop by itself.
682+
| otherwise = GuardedSkip bootstrapPeersTimeout
676683

677684

678685
-- | Extra trace points for `TracePeerSelection`.

cardano-diffusion/tests/lib/Test/Cardano/Network/Diffusion/Testnet.hs

Lines changed: 140 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import Control.Monad.Class.MonadTime.SI
2626
import Control.Monad.IOSim
2727

2828
import Data.Bifoldable (bifoldMap)
29-
import Data.Bifunctor (bimap, first)
29+
import Data.Bifunctor (bimap, first, second)
3030
import Data.Char (ord)
3131
import Data.Dynamic (fromDynamic)
3232
import Data.Foldable (fold, foldr')
@@ -77,6 +77,7 @@ import Ouroboros.Network.InboundGovernor qualified as IG
7777
import Ouroboros.Network.Mock.ConcreteBlock (BlockHeader)
7878
import Ouroboros.Network.PeerSelection
7979
import Ouroboros.Network.PeerSelection.Governor qualified as Governor
80+
import Ouroboros.Network.PeerSelection.Governor.Types
8081
import Ouroboros.Network.PeerSelection.PublicRootPeers qualified as PublicRootPeers
8182
import Ouroboros.Network.PeerSelection.RootPeersDNS (DNSorIOError (DNSError))
8283
import Ouroboros.Network.PeerSelection.State.EstablishedPeers qualified as EstablishedPeers
@@ -234,6 +235,8 @@ tests =
234235
prop_accept_failure
235236
, testProperty "only bootstrap peers in fallback state"
236237
prop_only_bootstrap_peers_in_fallback_state_iosim
238+
, testProperty "bootstrap peers timeout"
239+
prop_bootstrap_timeout_iosim
237240
, testGroup "local root diffusion mode"
238241
[ testProperty "InitiatorOnly"
239242
(unit_local_root_diffusion_mode InitiatorOnlyDiffusionMode)
@@ -1223,9 +1226,9 @@ prop_only_bootstrap_peers_in_fallback_state ioSimTrace traceNumber =
12231226
trJoinKillSig =
12241227
Signal.fromChangeEvents Terminated -- Default to TrKillingNode
12251228
. Signal.selectEvents
1226-
(\case TrJoiningNetwork -> Just Joined
1227-
TrTerminated -> Just Terminated
1228-
_ -> Nothing
1229+
(\case TrJoiningNetwork{} -> Just Joined
1230+
TrTerminated -> Just Terminated
1231+
_ -> Nothing
12291232
)
12301233
. selectDiffusionSimulationTrace
12311234
$ events
@@ -1301,6 +1304,107 @@ prop_only_bootstrap_peers_in_fallback_state_iosim
13011304
prop_only_bootstrap_peers_in_fallback_state_iosim
13021305
= testWithIOSim prop_only_bootstrap_peers_in_fallback_state long_trace
13031306

1307+
prop_bootstrap_timeout_iosim
1308+
:: AbsBearerInfo -> DiffusionScript -> Property
1309+
prop_bootstrap_timeout_iosim = testWithIOSim prop long_trace
1310+
where
1311+
prop ioSimTrace traceNumber =
1312+
let events :: [Events DiffusionTestTrace]
1313+
events = Trace.toList
1314+
. fmap ( Signal.eventsFromList
1315+
. fmap (\(WithName _ (WithTime t b)) -> (t, b))
1316+
)
1317+
. splitWithNameTrace
1318+
. fmap (\(WithTime t (WithName name b)) -> WithName name (WithTime t b))
1319+
. withTimeNameTraceEvents
1320+
@DiffusionTestTrace
1321+
@NtNAddr
1322+
. Trace.take traceNumber
1323+
$ ioSimTrace
1324+
1325+
in conjoin
1326+
$ (\ev ->
1327+
let evsList = eventsToList ev
1328+
lastTime = fst
1329+
. last
1330+
$ evsList
1331+
in classifySimulatedTime lastTime
1332+
$ classifyNumberOfEvents (length evsList)
1333+
$ timeout_enforced ev
1334+
)
1335+
<$> events
1336+
1337+
timeout_enforced evs =
1338+
let govUseBootstrapPeers :: Signal UseBootstrapPeers
1339+
govUseBootstrapPeers =
1340+
selectDiffusionPeerSelectionState
1341+
(Cardano.ExtraState.bootstrapPeersFlag . Governor.extraState)
1342+
evs
1343+
1344+
govLedgerStateJudgement :: Signal LedgerStateJudgement
1345+
govLedgerStateJudgement =
1346+
selectDiffusionPeerSelectionState
1347+
(Cardano.ExtraState.ledgerStateJudgement . Governor.extraState)
1348+
evs
1349+
1350+
sigPeerSelection
1351+
:: Signal
1352+
(Maybe (Governor.TracePeerSelection Cardano.ExtraState PeerTrustable
1353+
(Cardano.ExtraPeers NtNAddr)
1354+
Cardano.ExtraTrace NtNAddr))
1355+
sigPeerSelection = Signal.fromEvents $ selectDiffusionPeerSelectionEvents evs
1356+
1357+
sigDiffusionSimulation :: Signal (Maybe DiffusionSimulationTrace)
1358+
sigDiffusionSimulation = Signal.fromEvents $ selectDiffusionSimulationTrace evs
1359+
1360+
govBootstrapTimeout :: Signal (Maybe Time)
1361+
govBootstrapTimeout =
1362+
selectDiffusionPeerSelectionState
1363+
(Cardano.ExtraState.bootstrapPeersTimeout . Governor.extraState)
1364+
evs
1365+
1366+
criticalTimeoutError :: [Governor.TracePeerSelection Cardano.ExtraState PeerTrustable
1367+
(Cardano.ExtraPeers NtNAddr)
1368+
Cardano.ExtraTrace NtNAddr]
1369+
criticalTimeoutError =
1370+
filter (\case
1371+
TraceOutboundGovernorCriticalFailure reason
1372+
| Just BootstrapPeersCriticalTimeoutError <- fromException reason
1373+
-> True
1374+
_otherwise -> False)
1375+
. mapMaybe snd
1376+
. Signal.eventsToList
1377+
. Signal.toChangeEvents
1378+
$ sigPeerSelection
1379+
1380+
timeoutExceeded :: Signal Bool
1381+
timeoutExceeded =
1382+
not . Set.null <$> keyedTimeout' (15*60+1)
1383+
(\case
1384+
(_, Just (TrErrored{}), _) -> Nothing
1385+
(True, _, e)
1386+
| Just (TraceOutboundGovernorCriticalFailure e') <- e
1387+
, Just BootstrapPeersCriticalTimeoutError <- fromException e'
1388+
-> Nothing
1389+
| otherwise -> Just (Set.singleton ())
1390+
(False, _, _) -> Nothing
1391+
)
1392+
((,,) . isJust <$> govBootstrapTimeout <*> sigDiffusionSimulation <*> sigPeerSelection)
1393+
1394+
-- Node exiting with a critical timeout error makes this test succeed because
1395+
-- the point here is that the node should always be able to exit one way or another
1396+
-- but that indicates a problem elsewhere.
1397+
in classify (not . null $ criticalTimeoutError) "Passed with BootstrapPeersCriticalTimeoutError"
1398+
. counterexample
1399+
"\nSignal key: (ledger state judgement, use bootstrap peers, timeout at, exceeded?)\n"
1400+
$ signalProperty 20 show
1401+
(\(_, _, _, timedOut) -> not timedOut)
1402+
((,,,) <$> govLedgerStateJudgement
1403+
<*> govUseBootstrapPeers
1404+
<*> govBootstrapTimeout
1405+
<*> timeoutExceeded
1406+
)
1407+
13041408

13051409
-- | Unit test which covers issue #4177
13061410
--
@@ -4008,7 +4112,7 @@ prop_unit_reconnect =
40084112
verify_consistency events =
40094113
let govEstablishedPeersSig :: Signal (Set NtNAddr)
40104114
govEstablishedPeersSig =
4011-
selectDiffusionPeerSelectionState'
4115+
selectDiffusionPeerSelectionState
40124116
(EstablishedPeers.toSet . Governor.establishedPeers)
40134117
(wnEvent <$> events)
40144118

@@ -5071,58 +5175,44 @@ selectDiffusionSimulationTrace :: Events DiffusionTestTrace
50715175
-> Events DiffusionSimulationTrace
50725176
selectDiffusionSimulationTrace = Signal.selectEvents
50735177
(\case DiffusionSimulationTrace e -> Just e
5074-
_ -> Nothing)
5075-
5076-
selectDiffusionPeerSelectionState :: Eq a
5077-
=> (forall peerconn.
5078-
Governor.PeerSelectionState Cardano.ExtraState
5079-
PeerTrustable
5080-
(Cardano.ExtraPeers NtNAddr)
5081-
NtNAddr peerconn
5082-
-> a)
5083-
-> Events DiffusionTestTrace
5084-
-> Signal a
5178+
_ -> Nothing)
5179+
5180+
selectDiffusionPeerSelectionState
5181+
:: Eq a
5182+
=> (forall peerconn.
5183+
Governor.PeerSelectionState Cardano.ExtraState PeerTrustable
5184+
(Cardano.ExtraPeers NtNAddr) NtNAddr peerconn
5185+
-> a)
5186+
-> Events DiffusionTestTrace
5187+
-> Signal a
50855188
selectDiffusionPeerSelectionState f =
50865189
Signal.nub
50875190
-- TODO: #3182 Rng seed should come from quickcheck.
50885191
. (\evs ->
50895192
let evsList = Signal.eventsToList evs
5090-
in
5091-
case evsList of
5092-
[] -> Signal.fromChangeEvents (initialState PraosMode) (snd <$> evs)
5093-
(_, (consensusMode, _)):_ ->
5094-
Signal.fromChangeEvents (initialState consensusMode) (snd <$> evs)
5193+
in case evsList of
5194+
[] -> Signal.fromChangeEvents (initial PraosMode) mempty
5195+
((_, Just govState0):rest) ->
5196+
Signal.fromChangeEvents
5197+
govState0
5198+
(Signal.eventsFromList $ second (fromMaybe govState0) <$> rest)
5199+
_otherwise -> error "impossible"
50955200
)
5096-
. Signal.selectEvents
5097-
(\case
5098-
DiffusionDebugPeerSelectionTrace (TraceGovernorState _ _ st) ->
5099-
Just (Cardano.ExtraState.consensusMode (Governor.extraState st), f st)
5100-
_ ->
5101-
Nothing)
5102-
where
5103-
initialState consensusMode =
5104-
f $ Governor.emptyPeerSelectionState
5105-
(mkStdGen 42)
5106-
(Cardano.ExtraState.empty consensusMode (NumberOfBigLedgerPeers 0)) -- ^ todo: fix
5107-
Cardano.ExtraPeers.empty
5108-
5109-
selectDiffusionPeerSelectionState' :: (forall peerconn. Governor.PeerSelectionState Cardano.ExtraState PeerTrustable (Cardano.ExtraPeers NtNAddr) NtNAddr peerconn -> a)
5110-
-> Events DiffusionTestTrace
5111-
-> Signal a
5112-
selectDiffusionPeerSelectionState' f =
5113-
-- TODO: #3182 Rng seed should come from quickcheck.
5114-
Signal.fromChangeEvents initial
5115-
. Signal.selectEvents
5116-
(\case
5117-
DiffusionDebugPeerSelectionTrace (TraceGovernorState _ _ st) -> Just (f st)
5118-
-- don't let old state linger around when a node is restarted
5119-
DiffusionSimulationTrace TrKillingNode -> Just initial
5120-
_ -> Nothing)
5201+
. Signal.selectEvents (\case
5202+
DiffusionDebugPeerSelectionTrace (TraceGovernorState _ _ st) ->
5203+
Just . Just $! f st
5204+
-- don't let old state linger around when a node is restarted
5205+
DiffusionSimulationTrace TrKillingNode ->
5206+
Just Nothing
5207+
DiffusionSimulationTrace (TrJoiningNetwork consensusMode) ->
5208+
Just . Just $! initial consensusMode
5209+
_ -> Nothing)
51215210
where
5122-
initial = f $ Governor.emptyPeerSelectionState
5123-
(mkStdGen 42)
5124-
(Cardano.ExtraState.empty PraosMode (NumberOfBigLedgerPeers 0))
5125-
Cardano.ExtraPeers.empty
5211+
initial consensusMode =
5212+
f $! Governor.emptyPeerSelectionState
5213+
(mkStdGen 42)
5214+
(Cardano.ExtraState.empty consensusMode (NumberOfBigLedgerPeers 0))
5215+
Cardano.ExtraPeers.empty
51265216

51275217
selectDiffusionConnectionManagerEvents
51285218
:: Trace () DiffusionTestTrace

cardano-diffusion/tests/lib/Test/Cardano/Network/Diffusion/Testnet/Simulation.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -932,7 +932,7 @@ prop_diffusionScript_commandScript_valid (DiffusionScript sa dnsMap ((_, cmds):
932932
-- running
933933
--
934934
data DiffusionSimulationTrace
935-
= TrJoiningNetwork
935+
= TrJoiningNetwork ConsensusMode
936936
| TrKillingNode
937937
| TrReconfiguringNode
938938
| TrUpdatingDNS
@@ -1080,7 +1080,7 @@ diffusionSimulationM
10801080
-- TVar.
10811081
-> [Command] -- ^ List of commands/actions to perform for a single node
10821082
-> m Void
1083-
runCommand ntnSocket ntcSocket dnsMapVar sArgs nArgs@NodeArgs { naAddr }
1083+
runCommand ntnSocket ntcSocket dnsMapVar sArgs nArgs@NodeArgs { naAddr, naConsensusMode }
10841084
connStateIdSupply nodeId hostAndLRP cmds = do
10851085
traceWith (diffSimTracer naAddr) . TrSay $ show nodeId ++ " @ " ++ ppNtNAddr naAddr
10861086
runCommand' hostAndLRP cmds
@@ -1099,7 +1099,7 @@ diffusionSimulationM
10991099
runCommand' Nothing
11001100
(JoinNetwork delay :cs) = do
11011101
threadDelay delay
1102-
traceWith (diffSimTracer naAddr) TrJoiningNetwork
1102+
traceWith (diffSimTracer naAddr) (TrJoiningNetwork naConsensusMode)
11031103
lrpVar <- newTVarIO $ naLocalRootPeers nArgs
11041104
withAsync (runNode sArgs nArgs ntnSocket ntcSocket connStateIdSupply lrpVar dnsMapVar nodeId) $ \nodeAsync ->
11051105
runCommand' (Just (nodeAsync, lrpVar)) cs
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<!--
2+
A new scriv changelog fragment.
3+
4+
Uncomment the section that is right (remove the HTML comment wrapper).
5+
For top level release notes, leave all the headers commented out.
6+
-->
7+
8+
<!--
9+
### Breaking
10+
11+
- A bullet item for the Breaking category.
12+
13+
-->
14+
15+
### Non-Breaking
16+
17+
- Added keyedTimeout', which allows to reset the timeouts
18+
when an arbitrary condition is met

ouroboros-network/lib/Ouroboros/Network/PeerSelection/Governor/Types.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ data ExtraGuardedDecisions extraState extraDebugState extraFlags
464464
:: MonitoringAction extraState extraDebugState extraFlags
465465
extraPeers extraAPI extraCounters extraTrace peeraddr peerconn m
466466

467-
-- | This guarded decision will come before all default non-blocking
467+
-- | This guarded decision follows all default non-blocking
468468
-- decisions. The order matters; first decisions have priority over
469469
-- the later ones.
470470
--

0 commit comments

Comments
 (0)