Skip to content

Commit b24edf6

Browse files
authored
Fix leaking handles in uncommitted forkers (#1640)
2 parents e3ed1c7 + 1c31aeb commit b24edf6

File tree

6 files changed

+87
-29
lines changed

6 files changed

+87
-29
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<!--
2+
A new scriv changelog fragment.
3+
4+
Uncomment the section that is right (remove the HTML comment wrapper).
5+
For top level release notes, leave all the headers commented out.
6+
-->
7+
8+
<!--
9+
### Patch
10+
11+
- A bullet item for the Patch category.
12+
13+
-->
14+
15+
### Non-Breaking
16+
17+
- Ensure uncommitted forkers do not leak Ledger tables handles.
18+
19+
<!--
20+
### Breaking
21+
22+
- A bullet item for the Breaking category.
23+
24+
-->

ouroboros-consensus/ouroboros-consensus.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,7 @@ test-suite storage-test
751751
ouroboros-consensus,
752752
ouroboros-network-api,
753753
ouroboros-network-mock,
754+
ouroboros-network-protocols,
754755
pretty-show,
755756
quickcheck-dynamic,
756757
quickcheck-lockstep ^>=0.8,

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/V2.hs

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,12 @@ module Ouroboros.Consensus.Storage.LedgerDB.V2 (mkInitDb) where
1818

1919
import Cardano.Ledger.BaseTypes (unNonZero)
2020
import Control.Arrow ((>>>))
21-
import Control.Monad (join)
22-
import qualified Control.Monad as Monad (void, (>=>))
21+
import qualified Control.Monad as Monad (join, void)
2322
import Control.Monad.Except
2423
import Control.RAWLock
2524
import qualified Control.RAWLock as RAWLock
2625
import Control.ResourceRegistry
2726
import Control.Tracer
28-
import Data.Foldable (traverse_)
2927
import qualified Data.Foldable as Foldable
3028
import Data.Functor.Contravariant ((>$<))
3129
import Data.Kind (Type)
@@ -195,7 +193,7 @@ mkInternals bss h =
195193
let selectWhereTo = case whereTo of
196194
TakeAtImmutableTip -> anchorHandle
197195
TakeAtVolatileTip -> currentHandle
198-
withStateRef env (MkSolo . selectWhereTo) $ \(MkSolo st) ->
196+
withStateRef env (MkSolo . selectWhereTo) $ \(MkSolo (st, _)) ->
199197
Monad.void $
200198
takeSnapshot
201199
(configCodec . getExtLedgerCfg . ledgerDbCfg $ ldbCfg env)
@@ -249,7 +247,7 @@ mkInternals bss h =
249247

250248
pruneLedgerSeq :: LedgerDBEnv m (ExtLedgerState blk) blk -> m ()
251249
pruneLedgerSeq env =
252-
join $ atomically $ stateTVar (ldbSeq env) $ pruneToImmTipOnly
250+
Monad.join $ atomically $ stateTVar (ldbSeq env) $ pruneToImmTipOnly
253251

254252
-- | Testing only! Truncate all snapshots in the DB.
255253
implIntTruncateSnapshots :: MonadThrow m => SomeHasFS m -> m ()
@@ -360,7 +358,7 @@ implGarbageCollect env slotNo = do
360358
Set.dropWhileAntitone ((< slotNo) . realPointSlot)
361359
-- It is safe to close the handles outside of the locked region, which reduces
362360
-- contention. See the docs of 'ldbOpenHandlesLock'.
363-
join $ RAWLock.withWriteAccess (ldbOpenHandlesLock env) $ \() -> do
361+
Monad.join $ RAWLock.withWriteAccess (ldbOpenHandlesLock env) $ \() -> do
364362
close <- atomically $ stateTVar (ldbSeq env) $ prune (LedgerDbPruneBeforeSlot slotNo)
365363
pure (close, ())
366364

@@ -379,7 +377,7 @@ implTryTakeSnapshot ::
379377
implTryTakeSnapshot bss env mTime nrBlocks =
380378
if onDiskShouldTakeSnapshot (ldbSnapshotPolicy env) (uncurry (flip diffTime) <$> mTime) nrBlocks
381379
then do
382-
withStateRef env (MkSolo . anchorHandle) $ \(MkSolo st) ->
380+
withStateRef env (MkSolo . anchorHandle) $ \(MkSolo (st, _)) ->
383381
Monad.void $
384382
takeSnapshot
385383
(configCodec . getExtLedgerCfg . ledgerDbCfg $ ldbCfg env)
@@ -585,36 +583,37 @@ getVolatileLedgerSeq env =
585583
where
586584
k = unNonZero $ maxRollbacks $ ledgerDbCfgSecParam $ ldbCfg env
587585

588-
-- | Get a 'StateRef' from the 'LedgerSeq' (via 'getVolatileLedgerSeq') in the
589-
-- 'LedgerDBEnv', with the 'LedgerTablesHandle' having been duplicated (such
590-
-- that the original can be closed). The caller is responsible for closing the
591-
-- handle.
586+
-- | Get a 'StateRef' from the 'LedgerSeq' in the 'LedgerDBEnv', with the
587+
-- 'LedgerTablesHandle' having been duplicated (such that the original can be
588+
-- closed). The caller should close the handle using the returned @ResourceKey@,
589+
-- although closing the registry will also release the handle.
592590
--
593591
-- For more flexibility, an arbitrary 'Traversable' of the 'StateRef' can be
594592
-- returned; for the simple use case of getting a single 'StateRef', use @t ~
595593
-- 'Solo'@.
596594
getStateRef ::
597595
(IOLike m, Traversable t, GetTip l) =>
598596
LedgerDBEnv m l blk ->
597+
ResourceRegistry m ->
599598
(LedgerSeq m l -> t (StateRef m l)) ->
600-
m (t (StateRef m l))
601-
getStateRef ldbEnv project =
599+
m (t (StateRef m l, ResourceKey m))
600+
getStateRef ldbEnv reg project =
602601
RAWLock.withReadAccess (ldbOpenHandlesLock ldbEnv) $ \() -> do
603602
tst <- project <$> atomically (getVolatileLedgerSeq ldbEnv)
604603
for tst $ \st -> do
605-
tables' <- duplicate $ tables st
606-
pure st{tables = tables'}
604+
(resKey, tables') <- allocate reg (\_ -> duplicate $ tables st) close
605+
pure (st{tables = tables'}, resKey)
607606

608607
-- | Like 'StateRef', but takes care of closing the handle when the given action
609608
-- returns or errors.
610609
withStateRef ::
611610
(IOLike m, Traversable t, GetTip l) =>
612611
LedgerDBEnv m l blk ->
613612
(LedgerSeq m l -> t (StateRef m l)) ->
614-
(t (StateRef m l) -> m a) ->
613+
(t (StateRef m l, ResourceKey m) -> m a) ->
615614
m a
616-
withStateRef ldbEnv project =
617-
bracket (getStateRef ldbEnv project) (traverse_ (close . tables))
615+
withStateRef ldbEnv project f =
616+
withRegistry $ \reg -> getStateRef ldbEnv reg project >>= f
618617

619618
acquireAtTarget ::
620619
( HeaderHash l ~ HeaderHash blk
@@ -625,9 +624,10 @@ acquireAtTarget ::
625624
) =>
626625
LedgerDBEnv m l blk ->
627626
Either Word64 (Target (Point blk)) ->
628-
m (Either GetForkerError (StateRef m l))
629-
acquireAtTarget ldbEnv target =
630-
getStateRef ldbEnv $ \l -> case target of
627+
ResourceRegistry m ->
628+
m (Either GetForkerError (StateRef m l, ResourceKey m))
629+
acquireAtTarget ldbEnv target reg =
630+
getStateRef ldbEnv reg $ \l -> case target of
631631
Right VolatileTip -> pure $ currentHandle l
632632
Right ImmutableTip -> pure $ anchorHandle l
633633
Right (SpecificPoint pt) -> do
@@ -661,7 +661,7 @@ newForkerAtTarget ::
661661
Target (Point blk) ->
662662
m (Either GetForkerError (Forker m l blk))
663663
newForkerAtTarget h rr pt = getEnv h $ \ldbEnv ->
664-
acquireAtTarget ldbEnv (Right pt) >>= traverse (newForker h ldbEnv rr)
664+
acquireAtTarget ldbEnv (Right pt) rr >>= traverse (newForker h ldbEnv rr)
665665

666666
newForkerByRollback ::
667667
( HeaderHash l ~ HeaderHash blk
@@ -676,14 +676,14 @@ newForkerByRollback ::
676676
Word64 ->
677677
m (Either GetForkerError (Forker m l blk))
678678
newForkerByRollback h rr n = getEnv h $ \ldbEnv ->
679-
acquireAtTarget ldbEnv (Left n) >>= traverse (newForker h ldbEnv rr)
679+
acquireAtTarget ldbEnv (Left n) rr >>= traverse (newForker h ldbEnv rr)
680680

681681
closeForkerEnv ::
682682
IOLike m => ForkerEnv m l blk -> m ()
683683
closeForkerEnv ForkerEnv{foeResourcesToRelease = (lock, key, toRelease)} =
684684
RAWLock.withWriteAccess lock $
685685
const $ do
686-
id =<< atomically (swapTVar toRelease (pure ()))
686+
Monad.join $ atomically (swapTVar toRelease (pure ()))
687687
_ <- release key
688688
pure ((), ())
689689

@@ -773,14 +773,19 @@ newForker ::
773773
LedgerDBHandle m l blk ->
774774
LedgerDBEnv m l blk ->
775775
ResourceRegistry m ->
776-
StateRef m l ->
776+
(StateRef m l, ResourceKey m) ->
777777
m (Forker m l blk)
778-
newForker h ldbEnv rr st = do
778+
newForker h ldbEnv rr (st, rk) = do
779779
forkerKey <- atomically $ stateTVar (ldbNextForkerKey ldbEnv) $ \r -> (r, r + 1)
780780
let tr = LedgerDBForkerEvent . TraceForkerEventWithKey forkerKey >$< ldbTracer ldbEnv
781781
traceWith tr ForkerOpen
782782
lseqVar <- newTVarIO . LedgerSeq . AS.Empty $ st
783-
(k, toRelease) <- allocate rr (\_ -> newTVarIO (pure ())) (readTVarIO Monad.>=> id)
783+
-- The closing action that we allocate in the TVar from the start is not
784+
-- strictly necessary if the caller uses a short-lived registry like the ones
785+
-- in Chain selection or the forging loop. Just in case the user passes a
786+
-- long-lived registry, we store such closing action to make sure the handle
787+
-- is closed even under @forkerClose@ if the registry outlives the forker.
788+
(k, toRelease) <- allocate rr (\_ -> newTVarIO (Monad.void (release rk))) (Monad.join . readTVarIO)
784789
let forkerEnv =
785790
ForkerEnv
786791
{ foeLedgerSeq = lseqVar

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/V2/Forker.hs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,17 @@ implForkerCommit env = do
159159
_ AS.:< closeOld' -> closeLedgerSeq (LedgerSeq closeOld')
160160
-- Finally, close the anchor of @lseq@ (which is a duplicate of
161161
-- the head of @olddb'@).
162+
--
163+
-- Note if the resource registry used to create the Forker is
164+
-- ephemeral as the one created on each Chain selection or each
165+
-- Forging loop iteration, this first duplicated state will be
166+
-- closed by the resource registry closing down, so this will be
167+
-- a double release, which is fine. We prefer keeping this
168+
-- action just in case some client passes a registry that
169+
-- outlives the forker.
170+
--
171+
-- The rest of the states in the forker will be closed via
172+
-- @foeResourcesToRelease@ instead of via the registry.
162173
close $ tables $ AS.anchor lseq
163174
pure (closeDiscarded, LedgerSeq newdb)
164175
)

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/V2/InMemory.hs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,10 @@ newInMemoryLedgerTablesHandle tracer someFS@(SomeHasFS hasFS) l = do
9696
pure
9797
LedgerTablesHandle
9898
{ close = do
99-
atomically $ writeTVar tv LedgerTablesHandleClosed
100-
traceWith tracer V2.TraceLedgerTablesHandleClose
99+
p <- atomically $ swapTVar tv LedgerTablesHandleClosed
100+
case p of
101+
LedgerTablesHandleOpen{} -> traceWith tracer V2.TraceLedgerTablesHandleClose
102+
_ -> pure ()
101103
, duplicate = do
102104
hs <- readTVarIO tv
103105
!x <- guardClosed hs $ newInMemoryLedgerTablesHandle tracer someFS

ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/LedgerDB/StateMachine.hs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ import Ouroboros.Consensus.Util hiding (Some)
7272
import Ouroboros.Consensus.Util.Args
7373
import Ouroboros.Consensus.Util.IOLike
7474
import qualified Ouroboros.Network.AnchoredSeq as AS
75+
import Ouroboros.Network.Protocol.LocalStateQuery.Type
7576
import qualified System.Directory as Dir
7677
import System.FS.API
7778
import qualified System.FS.IO as FSIO
@@ -280,6 +281,9 @@ instance StateModel Model where
280281
Action Model (ExtLedgerState TestBlock EmptyMK, ExtLedgerState TestBlock EmptyMK)
281282
Init :: SecurityParam -> Action Model ()
282283
ValidateAndCommit :: Word64 -> [TestBlock] -> Action Model ()
284+
-- \| This action is used only to observe the side effects of closing an
285+
-- uncommitted forker, to ensure all handles are properly deallocated.
286+
OpenAndCloseForker :: Action Model ()
283287

284288
actionName WipeLedgerDB{} = "WipeLedgerDB"
285289
actionName TruncateSnapshots{} = "TruncateSnapshots"
@@ -288,6 +292,7 @@ instance StateModel Model where
288292
actionName GetState{} = "GetState"
289293
actionName Init{} = "Init"
290294
actionName ValidateAndCommit{} = "ValidateAndCommit"
295+
actionName OpenAndCloseForker = "OpenAndCloseForker"
291296

292297
arbitraryAction _ UnInit = Some . Init <$> QC.arbitrary
293298
arbitraryAction _ model@(Model chain secParam) =
@@ -322,6 +327,7 @@ instance StateModel Model where
322327
)
323328
, (1, pure $ Some WipeLedgerDB)
324329
, (1, pure $ Some TruncateSnapshots)
330+
, (1, pure $ Some OpenAndCloseForker)
325331
]
326332

327333
initialState = UnInit
@@ -363,6 +369,7 @@ instance StateModel Model where
363369
nextState state WipeLedgerDB _var = state
364370
nextState state TruncateSnapshots _var = state
365371
nextState state (DropAndRestore n) _var = modelRollback n state
372+
nextState state OpenAndCloseForker _var = state
366373
nextState UnInit _ _ = error "Uninitialized model created a command different than Init"
367374

368375
precondition UnInit Init{} = True
@@ -583,6 +590,14 @@ instance RunModel Model (StateT Environment IO) where
583590
atomically $ modifyTVar (dbChain chainDb) (drop (fromIntegral n))
584591
closeLedgerDB testInternals
585592
perform state (Init secParam) lk
593+
perform _ OpenAndCloseForker _ = do
594+
Environment ldb _ _ _ _ _ _ <- get
595+
lift $ withRegistry $ \rr -> do
596+
eFrk <- LedgerDB.getForkerAtTarget ldb rr VolatileTip
597+
case eFrk of
598+
Left err -> error $ "Impossible: can't acquire forker at tip: " <> show err
599+
Right frk -> forkerClose frk
600+
pure $ pure ()
586601
perform _ TruncateSnapshots _ = do
587602
Environment _ testInternals _ _ _ _ _ <- get
588603
lift $ truncateSnapshots testInternals

0 commit comments

Comments
 (0)