Skip to content

Commit 6b00088

Browse files
committed
Properly cleanup forkers at the appropriate times
1 parent a8fd47b commit 6b00088

File tree

7 files changed

+167
-111
lines changed

7 files changed

+167
-111
lines changed

ouroboros-consensus-cardano/app/snapshot-converter.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,15 +203,18 @@ load config@Config{inpath = pathToDiskSnapshot -> Just (fs@(SomeHasFS hasFS), pa
203203
(V2.state h,) <$> Trans.lift (V2.readAll (V2.tables h))
204204
LMDB -> do
205205
checkSnapshotFileStructure LMDB path fs
206-
((dbch, bstore), _) <-
206+
((dbch, k, bstore), _) <-
207207
withExceptT SnapshotError $
208208
V1.loadSnapshot
209209
nullTracer
210210
(V1.LMDBBackingStoreArgs tempFP defaultLMDBLimits Dict.Dict)
211211
ccfg
212212
(V1.SnapshotsFS fs)
213+
rr
213214
ds
214-
(V1.current dbch,) <$> Trans.lift (V1.bsReadAll bstore (V1.changelogLastFlushedState dbch))
215+
values <- Trans.lift (V1.bsReadAll bstore (V1.changelogLastFlushedState dbch))
216+
_ <- Trans.lift $ RR.release k
217+
pure (V1.current dbch, values)
215218
load _ _ _ _ = error "Malformed input path!"
216219

217220
store ::
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<!--
2+
A new scriv changelog fragment.
3+
4+
Uncomment the section that is right (remove the HTML comment wrapper).
5+
-->
6+
7+
### Patch
8+
9+
- The backing store of the V1 LedgerDB was only tracked in the
10+
resource registry if we were starting from Genesis. Now the backing
11+
store will be properly tracked in the resource registry even when we
12+
start from a snapshot.
13+
14+
- Closing the LedgerDB will no longer release all the open forkers,
15+
but instead invalidate them by emptying the ldbForkers map, so that
16+
the only possible operation that could be performed is closing them
17+
in the LedgerDB clients, such as ChainSel or the forging loop.
18+
19+
- Closing the forker is idempotent, and it was performed both when
20+
calling `forkerClose` as well as when the resource registry of the
21+
LedgerDB client was going out of scope. Now, `forkerClose` will
22+
release the resource from the registry so this won't run twice.
23+
24+
<!--
25+
### Non-Breaking
26+
-->
27+
<!--
28+
### Breaking
29+
30+
- A bullet item for the Breaking category.
31+
32+
-->

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/V1.hs

Lines changed: 57 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ import Control.Monad.Except
2525
import Control.Monad.Trans (lift)
2626
import Control.ResourceRegistry
2727
import Control.Tracer
28-
import Data.Bifunctor (first)
2928
import qualified Data.Foldable as Foldable
3029
import Data.Functor.Contravariant ((>$<))
3130
import Data.Kind (Type)
@@ -83,24 +82,29 @@ mkInitDb ::
8382
Complete LedgerDbArgs m blk ->
8483
Complete V1.LedgerDbFlavorArgs m ->
8584
ResolveBlock m blk ->
86-
InitDB (DbChangelog' blk, BackingStore' m blk) m blk
85+
InitDB (DbChangelog' blk, ResourceKey m, BackingStore' m blk) m blk
8786
mkInitDb args bss getBlock =
8887
InitDB
8988
{ initFromGenesis = do
9089
st <- lgrGenesis
9190
let genesis = forgetLedgerTables st
9291
chlog = DbCh.empty genesis
93-
(_, backingStore) <-
92+
(bsKey, backingStore) <-
9493
allocate
9594
lgrRegistry
9695
(\_ -> newBackingStore bsTracer baArgs lgrHasFS' genesis (projectLedgerTables st))
9796
bsClose
98-
pure (chlog, backingStore)
97+
pure (chlog, bsKey, backingStore)
9998
, initFromSnapshot =
10099
runExceptT
101-
. loadSnapshot bsTracer baArgs (configCodec . getExtLedgerCfg . ledgerDbCfg $ lgrConfig) lgrHasFS'
102-
, closeDb = bsClose . snd
103-
, initReapplyBlock = \cfg blk (chlog, bstore) -> do
100+
. loadSnapshot
101+
bsTracer
102+
baArgs
103+
(configCodec . getExtLedgerCfg . ledgerDbCfg $ lgrConfig)
104+
lgrHasFS'
105+
lgrRegistry
106+
, closeDb = \(_, r, _) -> void $ release r
107+
, initReapplyBlock = \cfg blk (chlog, r, bstore) -> do
104108
!chlog' <- reapplyThenPush cfg blk (readKeySets bstore) chlog
105109
-- It's OK to flush without a lock here, since the `LedgerDB` has not
106110
-- finished initializing, only this thread has access to the backing
@@ -113,10 +117,10 @@ mkInitDb args bss getBlock =
113117
mapM_ (flushIntoBackingStore bstore) toFlush
114118
pure toKeep
115119
else pure chlog'
116-
pure (chlog'', bstore)
117-
, currentTip = ledgerState . current . fst
118-
, pruneDb = pure . first pruneToImmTipOnly
119-
, mkLedgerDb = \(db, lgrBackingStore) -> do
120+
pure (chlog'', r, bstore)
121+
, currentTip = \(ch, _, _) -> ledgerState . current $ ch
122+
, pruneDb = \(ch, r, bs) -> pure (pruneToImmTipOnly ch, r, bs)
123+
, mkLedgerDb = \(db, ldbBackingStoreKey, ldbBackingStore) -> do
120124
(varDB, prevApplied) <-
121125
(,) <$> newTVarIO db <*> newTVarIO Set.empty
122126
flushLock <- mkLedgerDBLock
@@ -125,7 +129,8 @@ mkInitDb args bss getBlock =
125129
let env =
126130
LedgerDBEnv
127131
{ ldbChangelog = varDB
128-
, ldbBackingStore = lgrBackingStore
132+
, ldbBackingStore = ldbBackingStore
133+
, ldbBackingStoreKey = ldbBackingStoreKey
129134
, ldbLock = flushLock
130135
, ldbPrevApplied = prevApplied
131136
, ldbForkers = forkers
@@ -329,13 +334,14 @@ implCloseDB (LDBHandle varState) = do
329334
-- Idempotent
330335
LedgerDBClosed -> return Nothing
331336
LedgerDBOpen env -> do
337+
-- By writing this tvar, we already make sure that no
338+
-- forkers can perform operations other than closing, as
339+
-- they rely on accessing the LedgerDB, which is now closed.
332340
writeTVar varState LedgerDBClosed
333341
return $ Just env
334342

335343
-- Only when the LedgerDB was open
336-
whenJust mbOpenEnv $ \env -> do
337-
closeAllForkers env
338-
bsClose (ldbBackingStore env)
344+
whenJust mbOpenEnv $ void . release . ldbBackingStoreKey
339345

340346
mkInternals ::
341347
( IOLike m
@@ -351,7 +357,7 @@ mkInternals h =
351357
, push = getEnv1 h implIntPush
352358
, reapplyThenPushNOW = getEnv1 h implIntReapplyThenPush
353359
, wipeLedgerDB = getEnv h $ void . destroySnapshots . snapshotsFs . ldbHasFS
354-
, closeLedgerDB = getEnv h $ bsClose . ldbBackingStore
360+
, closeLedgerDB = getEnv h $ void . release . ldbBackingStoreKey
355361
, truncateSnapshots = getEnv h $ void . implIntTruncateSnapshots . ldbHasFS
356362
, getNumLedgerTablesHandles = pure 0
357363
}
@@ -482,6 +488,10 @@ data LedgerDBEnv m l blk = LedgerDBEnv
482488
, ldbBackingStore :: !(LedgerBackingStore m l)
483489
-- ^ Handle to the ledger's backing store, containing the parts that grow too
484490
-- big for in-memory residency
491+
, ldbBackingStoreKey :: !(ResourceKey m)
492+
-- ^ When deallocating the backing store upon closing the LedgerDB
493+
-- (via the ChainDB shutting down), we will release the backing
494+
-- store with this action.
485495
, ldbLock :: !(LedgerDBLock m)
486496
-- ^ The flush lock to the 'BackingStore'. This lock is crucial when it
487497
-- comes to keeping the data in memory consistent with the data on-disk.
@@ -512,6 +522,18 @@ data LedgerDBEnv m l blk = LedgerDBEnv
512522
-- ^ Open forkers.
513523
--
514524
-- INVARIANT: a forker is open iff its 'ForkerKey' is in this 'Map.
525+
--
526+
-- The resources that could possibly be held by these forkers will
527+
-- be released by each one of the client's registries. This means
528+
-- that for example ChainSelection will, upon closing its registry,
529+
-- release its forker and any resources associated.
530+
--
531+
-- Upon closing the LedgerDB we will overwrite this variable such
532+
-- that existing forkers can only be closed, as closing doesn't
533+
-- involve accessing this map (other than possibly removing the
534+
-- forker from it if the map still exists).
535+
--
536+
-- As the LedgerDB should outlive any clients, this is fine.
515537
, ldbNextForkerKey :: !(StrictTVar m ForkerKey)
516538
, ldbSnapshotPolicy :: !SnapshotPolicy
517539
, ldbTracer :: !(Tracer m (TraceEvent blk))
@@ -691,21 +713,6 @@ newForkerByRollback ::
691713
newForkerByRollback h rr n = getEnv h $ \ldbEnv -> do
692714
withReadLock (ldbLock ldbEnv) (acquireAtTarget ldbEnv (Left n) >>= traverse (newForker h ldbEnv rr))
693715

694-
-- | Close all open block and header 'Forker's.
695-
closeAllForkers ::
696-
IOLike m =>
697-
LedgerDBEnv m l blk ->
698-
m ()
699-
closeAllForkers ldbEnv =
700-
do
701-
forkerEnvs <- atomically $ do
702-
forkerEnvs <- Map.elems <$> readTVar forkersVar
703-
writeTVar forkersVar Map.empty
704-
return forkerEnvs
705-
mapM_ closeForkerEnv forkerEnvs
706-
where
707-
forkersVar = ldbForkers ldbEnv
708-
709716
-- | Acquire both a value handle and a db changelog at the tip. Holds a read lock
710717
-- while doing so.
711718
acquireAtTarget ::
@@ -784,7 +791,7 @@ newForker h ldbEnv rr dblog = readLocked $ do
784791
-- read access we acquired above.
785792
modifyTVar (ldbForkers ldbEnv) $ Map.insert forkerKey forkerEnv
786793
traceWith (foeTracer forkerEnv) ForkerOpen
787-
pure $ mkForker h (ldbQueryBatchSize ldbEnv) forkerKey
794+
pure $ mkForker h (ldbQueryBatchSize ldbEnv) forkerKey forkerEnv
788795

789796
mkForker ::
790797
( IOLike m
@@ -795,10 +802,11 @@ mkForker ::
795802
LedgerDBHandle m l blk ->
796803
QueryBatchSize ->
797804
ForkerKey ->
805+
ForkerEnv m l blk ->
798806
Forker m l blk
799-
mkForker h qbs forkerKey =
807+
mkForker h qbs forkerKey forkerEnv =
800808
Forker
801-
{ forkerClose = implForkerClose h forkerKey
809+
{ forkerClose = implForkerClose h forkerKey forkerEnv
802810
, forkerReadTables = getForkerEnv1 h forkerKey implForkerReadTables
803811
, forkerRangeReadTables = getForkerEnv1 h forkerKey (implForkerRangeReadTables qbs)
804812
, forkerGetLedgerState = getForkerEnvSTM h forkerKey implForkerGetLedgerState
@@ -807,18 +815,23 @@ mkForker h qbs forkerKey =
807815
, forkerCommit = getForkerEnvSTM h forkerKey implForkerCommit
808816
}
809817

818+
-- | This function receives an environment instead of reading it from
819+
-- the DB such that we can close the forker even if the LedgerDB is
820+
-- closed. In fact this should never happen as clients of the LedgerDB
821+
-- (which are the ones opening forkers) should never outlive the
822+
-- LedgerDB.
810823
implForkerClose ::
811824
IOLike m =>
812825
LedgerDBHandle m l blk ->
813826
ForkerKey ->
827+
ForkerEnv m l blk ->
814828
m ()
815-
implForkerClose (LDBHandle varState) forkerKey = do
816-
envMay <-
817-
atomically $
818-
readTVar varState >>= \case
819-
LedgerDBClosed -> pure Nothing
820-
LedgerDBOpen ldbEnv -> do
821-
stateTVar
822-
(ldbForkers ldbEnv)
823-
(Map.updateLookupWithKey (\_ _ -> Nothing) forkerKey)
824-
whenJust envMay closeForkerEnv
829+
implForkerClose (LDBHandle varState) forkerKey env = do
830+
atomically $
831+
readTVar varState >>= \case
832+
LedgerDBClosed -> pure ()
833+
LedgerDBOpen ldbEnv -> do
834+
modifyTVar
835+
(ldbForkers ldbEnv)
836+
(Map.delete forkerKey)
837+
closeForkerEnv env

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/V1/Forker.hs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{-# LANGUAGE DataKinds #-}
12
{-# LANGUAGE DeriveAnyClass #-}
23
{-# LANGUAGE DeriveGeneric #-}
34
{-# LANGUAGE FlexibleContexts #-}
@@ -19,6 +20,7 @@ module Ouroboros.Consensus.Storage.LedgerDB.V1.Forker
1920
, implForkerReadTables
2021
) where
2122

23+
import qualified Control.Monad as Monad
2224
import Control.ResourceRegistry
2325
import Control.Tracer
2426
import qualified Data.Map.Strict as Map
@@ -55,7 +57,7 @@ data ForkerEnv m l blk = ForkerEnv
5557
m
5658
( Either
5759
(LedgerDBLock m, LedgerBackingStore m l, ResourceRegistry m)
58-
(LedgerBackingStoreValueHandle m l)
60+
(ResourceKey m, LedgerBackingStoreValueHandle m l)
5961
)
6062
)
6163
-- ^ Either the ingredients to create a value handle or a value handle, i.e. a
@@ -91,7 +93,9 @@ deriving instance
9193

9294
closeForkerEnv :: IOLike m => ForkerEnv m l blk -> m ()
9395
closeForkerEnv ForkerEnv{foeBackingStoreValueHandle} = do
94-
either (\(l, _, _) -> atomically . unsafeReleaseReadAccess $ l) bsvhClose
96+
either
97+
(\(l, _, _) -> atomically . unsafeReleaseReadAccess $ l)
98+
(Monad.void . release . fst)
9599
=<< takeMVar foeBackingStoreValueHandle
96100

97101
{-------------------------------------------------------------------------------
@@ -103,18 +107,16 @@ closeForkerEnv ForkerEnv{foeBackingStoreValueHandle} = do
103107
getValueHandle :: (GetTip l, IOLike m) => ForkerEnv m l blk -> m (LedgerBackingStoreValueHandle m l)
104108
getValueHandle ForkerEnv{foeBackingStoreValueHandle, foeChangelog} =
105109
modifyMVar foeBackingStoreValueHandle $ \case
106-
r@(Right bsvh) -> pure (r, bsvh)
110+
r@(Right (_, bsvh)) -> pure (r, bsvh)
107111
Left (l, bs, rr) -> do
108-
-- bsvhClose is idempotent, so we let the resource call it even if the value
109-
-- handle might have been closed somewhere else
110-
(_, bsvh) <- allocate rr (\_ -> bsValueHandle bs) bsvhClose
112+
(k, bsvh) <- allocate rr (\_ -> bsValueHandle bs) bsvhClose
111113
dblogSlot <- getTipSlot . changelogLastFlushedState <$> readTVarIO foeChangelog
112114
if bsvhAtSlot bsvh == dblogSlot
113115
then do
114116
atomically $ unsafeReleaseReadAccess l
115-
pure (Right bsvh, bsvh)
117+
pure (Right (k, bsvh), bsvh)
116118
else
117-
bsvhClose bsvh
119+
release k
118120
>> error
119121
( "Critical error: Value handles are created at "
120122
<> show (bsvhAtSlot bsvh)

ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/LedgerDB/V1/Snapshots.hs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ import Codec.Serialise
142142
import qualified Control.Monad as Monad
143143
import Control.Monad.Except
144144
import qualified Control.Monad.Trans as Trans (lift)
145+
import Control.ResourceRegistry
145146
import Control.Tracer
146147
import Data.Functor.Contravariant ((>$<))
147148
import qualified Data.List as List
@@ -259,12 +260,13 @@ loadSnapshot ::
259260
Complete BackingStoreArgs m ->
260261
CodecConfig blk ->
261262
SnapshotsFS m ->
263+
ResourceRegistry m ->
262264
DiskSnapshot ->
263265
ExceptT
264266
(SnapshotFailure blk)
265267
m
266-
((DbChangelog' blk, LedgerBackingStore m (ExtLedgerState blk)), RealPoint blk)
267-
loadSnapshot tracer bss ccfg fs@(SnapshotsFS fs') s = do
268+
((DbChangelog' blk, ResourceKey m, LedgerBackingStore m (ExtLedgerState blk)), RealPoint blk)
269+
loadSnapshot tracer bss ccfg fs@(SnapshotsFS fs') reg s = do
268270
(extLedgerSt, checksumAsRead) <-
269271
withExceptT (InitFailureRead . ReadSnapshotFailed) $
270272
readExtLedgerState fs' (decodeDiskExtLedgerState ccfg) decode (snapshotToStatePath s)
@@ -283,6 +285,8 @@ loadSnapshot tracer bss ccfg fs@(SnapshotsFS fs') s = do
283285
case pointToWithOriginRealPoint (castPoint (getTip extLedgerSt)) of
284286
Origin -> throwError InitFailureGenesis
285287
NotOrigin pt -> do
286-
backingStore <- Trans.lift (restoreBackingStore tracer bss fs extLedgerSt (snapshotToTablesPath s))
288+
(bsKey, backingStore) <-
289+
Trans.lift
290+
(allocate reg (\_ -> restoreBackingStore tracer bss fs extLedgerSt (snapshotToTablesPath s)) bsClose)
287291
let chlog = empty extLedgerSt
288-
pure ((chlog, backingStore), pt)
292+
pure ((chlog, bsKey, backingStore), pt)

0 commit comments

Comments
 (0)