Skip to content

Commit e1e6f56

Browse files
Niolsneilmayhew
authored andcommitted
Track the last time the ChainDB thread was starved
1 parent 5253563 commit e1e6f56

File tree

8 files changed

+94
-6
lines changed

8 files changed

+94
-6
lines changed

ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,10 @@ traceChainDBEventTestBlockWith tracer = \case
370370
AddedReprocessLoEBlocksToQueue ->
371371
trace $ "Requested ChainSel run"
372372
_ -> pure ()
373+
ChainDB.TraceChainSelStarvationEvent (ChainDB.ChainSelStarvationStarted time) ->
374+
trace $ "ChainSel starvation started at " ++ prettyTime time
375+
ChainDB.TraceChainSelStarvationEvent (ChainDB.ChainSelStarvationEnded time pt) ->
376+
trace $ "ChainSel starvation ended at " ++ prettyTime time ++ " thanks to " ++ terseRealPoint pt
373377
_ -> pure ()
374378
where
375379
trace = traceUnitWith tracer "ChainDB"

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment)
4242
import qualified Ouroboros.Network.AnchoredFragment as AF
4343
import Ouroboros.Network.Block (MaxSlotNo)
4444
import Ouroboros.Network.BlockFetch.ConsensusInterface
45-
(BlockFetchConsensusInterface (..), FetchMode (..),
45+
(BlockFetchConsensusInterface (..),
46+
ChainSelStarvation (..), FetchMode (..),
4647
FromConsensus (..))
4748
import Ouroboros.Network.PeerSelection.Bootstrap (UseBootstrapPeers,
4849
requiresBootstrapPeers)
@@ -56,6 +57,7 @@ data ChainDbView m blk = ChainDbView {
5657
, getIsFetched :: STM m (Point blk -> Bool)
5758
, getMaxSlotNo :: STM m MaxSlotNo
5859
, addBlockWaitWrittenToDisk :: InvalidBlockPunishment m -> blk -> m Bool
60+
, getChainSelStarvation :: STM m ChainSelStarvation
5961
}
6062

6163
defaultChainDbView :: IOLike m => ChainDB m blk -> ChainDbView m blk
@@ -64,6 +66,7 @@ defaultChainDbView chainDB = ChainDbView {
6466
, getIsFetched = ChainDB.getIsFetched chainDB
6567
, getMaxSlotNo = ChainDB.getMaxSlotNo chainDB
6668
, addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB
69+
, getChainSelStarvation = ChainDB.getChainSelStarvation chainDB
6770
}
6871

6972
-- | How to get the wall-clock time of a slot. Note that this is a very
@@ -338,5 +341,7 @@ mkBlockFetchConsensusInterface
338341
headerForgeUTCTime = slotForgeTime . headerRealPoint . unFromConsensus
339342
blockForgeUTCTime = slotForgeTime . blockRealPoint . unFromConsensus
340343

344+
readChainSelStarvation = getChainSelStarvation chainDB
345+
341346
demoteCSJDynamo :: peer -> m ()
342347
demoteCSJDynamo = void . atomically . Jumping.rotateDynamo csHandlesCol

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ import qualified Ouroboros.Network.AnchoredFragment as AF
9191
import Ouroboros.Network.Block (ChainUpdate, MaxSlotNo,
9292
Serialised (..))
9393
import qualified Ouroboros.Network.Block as Network
94+
import Ouroboros.Network.BlockFetch.ConsensusInterface
95+
(ChainSelStarvation (..))
9496
import Ouroboros.Network.Mock.Chain (Chain (..))
9597
import qualified Ouroboros.Network.Mock.Chain as Chain
9698
import System.FS.API.Types (FsError)
@@ -347,6 +349,10 @@ data ChainDB m blk = ChainDB {
347349
-- invalid block is detected. These blocks are likely to be valid.
348350
, getIsInvalidBlock :: STM m (WithFingerprint (HeaderHash blk -> Maybe (ExtValidationError blk)))
349351

352+
-- | Whether ChainSel is currently starved, or when was last time it
353+
-- stopped being starved.
354+
, getChainSelStarvation :: STM m ChainSelStarvation
355+
350356
, closeDB :: m ()
351357

352358
-- | Return 'True' when the database is open.

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl (
1616
, LgrDB.TraceReplayEvent
1717
, SelectionChangedInfo (..)
1818
, TraceAddBlockEvent (..)
19+
, TraceChainSelStarvationEvent (..)
1920
, TraceCopyToImmutableDBEvent (..)
2021
, TraceEvent (..)
2122
, TraceFollowerEvent (..)
@@ -69,6 +70,8 @@ import Ouroboros.Consensus.Util.IOLike
6970
import Ouroboros.Consensus.Util.STM (Fingerprint (..),
7071
WithFingerprint (..))
7172
import qualified Ouroboros.Network.AnchoredFragment as AF
73+
import Ouroboros.Network.BlockFetch.ConsensusInterface
74+
(ChainSelStarvation (..))
7275

7376
{-------------------------------------------------------------------------------
7477
Initialization
@@ -174,6 +177,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do
174177
copyFuse <- newFuse "copy to immutable db"
175178
chainSelFuse <- newFuse "chain selection"
176179
chainSelQueue <- newChainSelQueue (Args.cdbsBlocksToAddSize cdbSpecificArgs)
180+
varChainSelStarvation <- newTVarIO ChainSelStarvationOngoing
177181

178182
let env = CDB { cdbImmutableDB = immutableDB
179183
, cdbVolatileDB = volatileDB
@@ -196,6 +200,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do
196200
, cdbKillBgThreads = varKillBgThreads
197201
, cdbChainSelQueue = chainSelQueue
198202
, cdbLoE = Args.cdbsLoE cdbSpecificArgs
203+
, cdbChainSelStarvation = varChainSelStarvation
199204
}
200205
h <- fmap CDBHandle $ newTVarIO $ ChainDbOpen env
201206
let chainDB = API.ChainDB
@@ -214,6 +219,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do
214219
, stream = Iterator.stream h
215220
, newFollower = Follower.newFollower h
216221
, getIsInvalidBlock = getEnvSTM h Query.getIsInvalidBlock
222+
, getChainSelStarvation = getEnvSTM h Query.getChainSelStarvation
217223
, closeDB = closeDB h
218224
, isOpen = isOpen h
219225
}

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do
522522
-- exception (or it errored), notify the blocked thread
523523
withFuse fuse $
524524
bracketOnError
525-
(lift $ getChainSelMessage cdbChainSelQueue)
525+
(lift $ getChainSelMessage starvationTracer cdbChainSelStarvation cdbChainSelQueue)
526526
(\message -> lift $ atomically $ do
527527
case message of
528528
ChainSelReprocessLoEBlocks varProcessed ->
@@ -542,3 +542,5 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do
542542
trace $ PoppedBlockFromQueue $ FallingEdgeWith $
543543
blockRealPoint blockToAdd
544544
chainSelSync cdb message)
545+
where
546+
starvationTracer = Tracer $ traceWith cdbTracer . TraceChainSelStarvationEvent

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Query (
2222
, getAnyBlockComponent
2323
, getAnyKnownBlock
2424
, getAnyKnownBlockComponent
25+
, getChainSelStarvation
2526
) where
2627

2728
import qualified Data.Map.Strict as Map
@@ -50,6 +51,8 @@ import Ouroboros.Consensus.Util.STM (WithFingerprint (..))
5051
import Ouroboros.Network.AnchoredFragment (AnchoredFragment)
5152
import qualified Ouroboros.Network.AnchoredFragment as AF
5253
import Ouroboros.Network.Block (MaxSlotNo, maxSlotNoFromWithOrigin)
54+
import Ouroboros.Network.BlockFetch.ConsensusInterface
55+
(ChainSelStarvation (..))
5356

5457
-- | Return the last @k@ headers.
5558
--
@@ -181,6 +184,12 @@ getIsInvalidBlock ::
181184
getIsInvalidBlock CDB{..} =
182185
fmap (fmap (fmap invalidBlockReason) . flip Map.lookup) <$> readTVar cdbInvalid
183186

187+
getChainSelStarvation ::
188+
forall m blk. IOLike m
189+
=> ChainDbEnv m blk
190+
-> STM m ChainSelStarvation
191+
getChainSelStarvation CDB {..} = readTVar cdbChainSelStarvation
192+
184193
getIsValid ::
185194
forall m blk. (IOLike m, HasHeader blk)
186195
=> ChainDbEnv m blk

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types (
5151
-- * Trace types
5252
, SelectionChangedInfo (..)
5353
, TraceAddBlockEvent (..)
54+
, TraceChainSelStarvationEvent (..)
5455
, TraceCopyToImmutableDBEvent (..)
5556
, TraceEvent (..)
5657
, TraceFollowerEvent (..)
@@ -62,6 +63,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types (
6263
, TraceValidationEvent (..)
6364
) where
6465

66+
import Cardano.Prelude (whenM)
6567
import Control.ResourceRegistry
6668
import Control.Tracer
6769
import Data.Foldable (traverse_)
@@ -105,6 +107,8 @@ import Ouroboros.Consensus.Util.IOLike
105107
import Ouroboros.Consensus.Util.STM (WithFingerprint)
106108
import Ouroboros.Network.AnchoredFragment (AnchoredFragment)
107109
import Ouroboros.Network.Block (MaxSlotNo)
110+
import Ouroboros.Network.BlockFetch.ConsensusInterface
111+
(ChainSelStarvation (..))
108112

109113
-- | All the serialisation related constraints needed by the ChainDB.
110114
class ( ImmutableDbSerialiseConstraints blk
@@ -254,6 +258,9 @@ data ChainDbEnv m blk = CDB
254258
-- switch back to a chain containing it. The fragment is usually anchored at
255259
-- a recent immutable tip; if it does not, it will conservatively be treated
256260
-- as the empty fragment anchored in the current immutable tip.
261+
, cdbChainSelStarvation :: !(StrictTVar m ChainSelStarvation)
262+
-- ^ Information on the last starvation of ChainSel, whether ongoing or
263+
-- ended recently.
257264
} deriving (Generic)
258265

259266
-- | We include @blk@ in 'showTypeOf' because it helps resolving type families
@@ -483,9 +490,42 @@ addReprocessLoEBlocks tracer (ChainSelQueue queue) = do
483490
return $ ChainSelectionPromise waitUntilRan
484491

485492
-- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the
486-
-- queue is empty.
487-
getChainSelMessage :: IOLike m => ChainSelQueue m blk -> m (ChainSelMessage m blk)
488-
getChainSelMessage (ChainSelQueue queue) = atomically $ readTBQueue queue
493+
-- queue is empty; in that case, reports the starvation (and its end) via the
494+
-- given tracer.
495+
getChainSelMessage
496+
:: forall m blk. (HasHeader blk, IOLike m)
497+
=> Tracer m (TraceChainSelStarvationEvent blk)
498+
-> StrictTVar m ChainSelStarvation
499+
-> ChainSelQueue m blk
500+
-> m (ChainSelMessage m blk)
501+
getChainSelMessage starvationTracer starvationVar (ChainSelQueue queue) =
502+
atomically (tryReadTBQueue' queue) >>= \case
503+
Just msg -> pure msg
504+
Nothing -> do
505+
startStarvationMeasure
506+
msg <- atomically $ readTBQueue queue
507+
terminateStarvationMeasure msg
508+
pure msg
509+
where
510+
startStarvationMeasure :: m ()
511+
startStarvationMeasure = do
512+
prevStarvation <- atomically $ swapTVar starvationVar ChainSelStarvationOngoing
513+
when (prevStarvation /= ChainSelStarvationOngoing) $
514+
traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime
515+
516+
terminateStarvationMeasure :: ChainSelMessage m blk -> m ()
517+
terminateStarvationMeasure = \case
518+
ChainSelAddBlock BlockToAdd{blockToAdd=block} -> do
519+
tf <- getMonotonicTime
520+
let pt = blockRealPoint block
521+
traceWith starvationTracer $ ChainSelStarvationEnded tf pt
522+
atomically $ writeTVar starvationVar (ChainSelStarvationEndedAt tf)
523+
ChainSelReprocessLoEBlocks{} -> pure ()
524+
525+
-- TODO Can't use tryReadTBQueue from io-classes because it is broken for IOSim
526+
-- (but not for IO). https://github.com/input-output-hk/io-sim/issues/195
527+
tryReadTBQueue' :: MonadSTM m => TBQueue m a -> STM m (Maybe a)
528+
tryReadTBQueue' q = (Just <$> readTBQueue q) `orElse` pure Nothing
489529

490530
-- | Flush the 'ChainSelQueue' queue and notify the waiting threads.
491531
--
@@ -519,6 +559,7 @@ data TraceEvent blk
519559
| TraceImmutableDBEvent (ImmutableDB.TraceEvent blk)
520560
| TraceVolatileDBEvent (VolatileDB.TraceEvent blk)
521561
| TraceLastShutdownUnclean
562+
| TraceChainSelStarvationEvent(TraceChainSelStarvationEvent blk)
522563
deriving (Generic)
523564

524565

@@ -827,3 +868,16 @@ data TraceIteratorEvent blk
827868
-- next block we're looking for.
828869
| SwitchBackToVolatileDB
829870
deriving (Generic, Eq, Show)
871+
872+
-- | Chain selection is /starved/ when the background thread runs out of work.
873+
-- This is the usual case and innocent while caught-up; but while syncing, it
874+
-- means that we are downloading blocks at a smaller rate than we can validate
875+
-- them, even though we generally expect to be CPU-bound.
876+
data TraceChainSelStarvationEvent blk
877+
-- | A ChainSel starvation started at the given time.
878+
= ChainSelStarvationStarted Time
879+
880+
-- | The last ChainSel starvation ended at the given time as a block wth the
881+
-- given point has been received.
882+
| ChainSelStarvationEnded Time (RealPoint blk)
883+
deriving (Generic, Eq, Show)

ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,8 @@ deriving instance SOP.Generic (ImmutableDB.TraceEvent blk)
12411241
deriving instance SOP.HasDatatypeInfo (ImmutableDB.TraceEvent blk)
12421242
deriving instance SOP.Generic (VolatileDB.TraceEvent blk)
12431243
deriving instance SOP.HasDatatypeInfo (VolatileDB.TraceEvent blk)
1244+
deriving instance SOP.Generic (TraceChainSelStarvationEvent blk)
1245+
deriving instance SOP.HasDatatypeInfo (TraceChainSelStarvationEvent blk)
12441246

12451247
data Tag =
12461248
TagGetIsValidJust
@@ -1635,7 +1637,7 @@ traceEventName = \case
16351637
TraceImmutableDBEvent ev -> "ImmutableDB." <> constrName ev
16361638
TraceVolatileDBEvent ev -> "VolatileDB." <> constrName ev
16371639
TraceLastShutdownUnclean -> "LastShutdownUnclean"
1638-
TraceChainSelStarvationEvent _ -> "TraceChainSelStarvationEvent"
1640+
TraceChainSelStarvationEvent ev -> "ChainSelStarvation." <> constrName ev
16391641

16401642
mkArgs :: IOLike m
16411643
=> TopLevelConfig Blk

0 commit comments

Comments
 (0)