Skip to content

Commit 01ca415

Browse files
authored
Store keys together with node data (#2849)
Currently, computed hash keys are stored in a separate column family with respect to the MPT data they're generated from - this has several disadvantages: * A lot of space is wasted because the lookup key (`RootedVertexID`) is repeated in both tables - this is 30% of the `AriKey` content! * rocksdb must maintain in-memory bloom filters and LRU caches for said keys, doubling its "minimal efficient cache size" * An extra disk traversal must be made to check for existence of cached hash key * Doubles the amount of files on disk due to each column family being its own set of files Here, the two CFs are joined such that both key and data is stored in `AriVtx`. This means: * we save ~30% disk space on repeated lookup keys * we save ~2gb of memory overhead that can be used to cache data instead of indices * we can skip storing hash keys for MPT leaf nodes - these are trivial to compute and waste a lot of space - previously they had to present in the `AriKey` CF to avoid having to look in two tables on the happy path. * There is a small increase in write amplification because when a hash value is updated for a branch node, we must write both key and branch data - previously we would write only the key * There's a small shift in CPU usage - instead of performing lookups in the database, hashes for leaf nodes are (re)-computed on the fly * We can return to slightly smaller on-disk SST files since there's fewer of them, which should reduce disk traffic a bit Internally, there are also other advantages: * when clearing keys, we no longer have to store a zero hash in memory - instead, we deduce staleness of the cached key from the presence of an updated VertexRef - this saves ~1gb of mem overhead during import * hash key cache becomes dedicated to branch keys since leaf keys are no longer stored in memory, reducing churn * key computation is a lot faster thanks to the skipped second disk traversal - a key computation for mainnet can be completed in 11 hours instead of ~2 days (!) thanks to better cache usage and less read amplification - with additional improvements to the on-disk format, we can probably get rid of the initial full traversal method of seeding the key cache on first start after import All in all, this PR reduces the size of a mainnet database from 160gb to 110gb and the peak memory footprint during import by ~1-2gb.
1 parent d496793 commit 01ca415

26 files changed

+247
-289
lines changed

nimbus/db/aristo/aristo_api.nim

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,6 @@ type
510510
AristoApiProfBeGetTuvFn = "be/getTuv"
511511
AristoApiProfBeGetLstFn = "be/getLst"
512512
AristoApiProfBePutVtxFn = "be/putVtx"
513-
AristoApiProfBePutKeyFn = "be/putKey"
514513
AristoApiProfBePutTuvFn = "be/putTuv"
515514
AristoApiProfBePutLstFn = "be/putLst"
516515
AristoApiProfBePutEndFn = "be/putEnd"
@@ -557,8 +556,6 @@ when AutoValidateApiHooks:
557556

558557
doAssert not api.partAccountTwig.isNil
559558
doAssert not api.partStorageTwig.isNil
560-
doAssert not api.partUntwigGeneric.isNil
561-
doAssert not api.partUntwigGenericOk.isNil
562559
doAssert not api.partUntwigPath.isNil
563560
doAssert not api.partUntwigPathOk.isNil
564561

@@ -910,12 +907,6 @@ func init*(
910907
be.putVtxFn(a, b, c)
911908
data.list[AristoApiProfBePutVtxFn.ord].masked = true
912909

913-
beDup.putKeyFn =
914-
proc(a: PutHdlRef; b: RootedVertexID, c: HashKey) =
915-
AristoApiProfBePutKeyFn.profileRunner:
916-
be.putKeyFn(a, b, c)
917-
data.list[AristoApiProfBePutKeyFn.ord].masked = true
918-
919910
beDup.putTuvFn =
920911
proc(a: PutHdlRef; b: VertexID) =
921912
AristoApiProfBePutTuvFn.profileRunner:

nimbus/db/aristo/aristo_blobify.nim

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -157,21 +157,22 @@ proc blobifyTo*(pyl: LeafPayload, data: var seq[byte]) =
157157
data &= pyl.stoData.blobify().data
158158
data &= [0x20.byte]
159159

160-
proc blobifyTo*(vtx: VertexRef; data: var seq[byte]): Result[void,AristoError] =
160+
proc blobifyTo*(vtx: VertexRef; key: HashKey, data: var seq[byte]): Result[void,AristoError] =
161161
## This function serialises the vertex argument to a database record.
162162
## Contrary to RLP based serialisation, these records aim to align on
163163
## fixed byte boundaries.
164164
## ::
165165
## Branch:
166+
## <HashKey> -- optional hash key
166167
## [VertexID, ..] -- list of up to 16 child vertices lookup keys
167168
## seq[byte] -- hex encoded partial path (non-empty for extension nodes)
168169
## uint64 -- lengths of each child vertex, each taking 4 bits
169-
## 0x80 + xx -- marker(2) + pathSegmentLen(6)
170+
## 0x80 + xx -- marker(0/2) + pathSegmentLen(6)
170171
##
171172
## Leaf:
172173
## seq[byte] -- opaque leaf data payload (might be zero length)
173174
## seq[byte] -- hex encoded partial path (at least one byte)
174-
## 0xc0 + yy -- marker(2) + partialPathLen(6)
175+
## 0xc0 + yy -- marker(3) + partialPathLen(6)
175176
##
176177
## For a branch record, the bytes of the `access` array indicate the position
177178
## of the Patricia Trie vertex reference. So the `vertexID` with index `n` has
@@ -182,6 +183,13 @@ proc blobifyTo*(vtx: VertexRef; data: var seq[byte]): Result[void,AristoError] =
182183
return err(BlobifyNilVertex)
183184
case vtx.vType:
184185
of Branch:
186+
let code = if key.isValid:
187+
data.add byte(key.len)
188+
data.add key.data()
189+
# TODO using 0 here for legacy reasons - a bit flag would be easier
190+
0'u8 shl 6
191+
else:
192+
2'u8 shl 6
185193
var
186194
lens = 0u64
187195
pos = data.len
@@ -205,7 +213,7 @@ proc blobifyTo*(vtx: VertexRef; data: var seq[byte]): Result[void,AristoError] =
205213

206214
data &= pSegm.data()
207215
data &= lens.toBytesBE
208-
data &= [0x80u8 or psLen]
216+
data &= [code or psLen]
209217

210218
of Leaf:
211219
let
@@ -215,14 +223,14 @@ proc blobifyTo*(vtx: VertexRef; data: var seq[byte]): Result[void,AristoError] =
215223
return err(BlobifyLeafPathOverflow)
216224
vtx.lData.blobifyTo(data)
217225
data &= pSegm.data()
218-
data &= [0xC0u8 or psLen]
226+
data &= [(3'u8 shl 6) or psLen]
219227

220228
ok()
221229

222-
proc blobify*(vtx: VertexRef): seq[byte] =
230+
proc blobify*(vtx: VertexRef, key: HashKey): seq[byte] =
223231
## Variant of `blobify()`
224232
result = newSeqOfCap[byte](128)
225-
if vtx.blobifyTo(result).isErr:
233+
if vtx.blobifyTo(key, result).isErr:
226234
result.setLen(0) # blobify only fails on invalid verticies
227235

228236
proc blobifyTo*(lSst: SavedState; data: var seq[byte]): Result[void,AristoError] =
@@ -287,7 +295,7 @@ proc deblobifyType*(record: openArray[byte]; T: type VertexRef):
287295
return err(DeblobVtxTooShort)
288296

289297
ok case record[^1] shr 6:
290-
of 2: Branch
298+
of 0, 2: Branch
291299
of 3: Leaf
292300
else:
293301
return err(DeblobUnknown)
@@ -300,16 +308,20 @@ proc deblobify*(
300308
## argument `vtx` can be `nil`.
301309
if record.len < 3: # minimum `Leaf` record
302310
return err(DeblobVtxTooShort)
303-
304-
ok case record[^1] shr 6:
305-
of 2: # `Branch` vertex
306-
if record.len < 11: # at least two edges
311+
let kind = record[^1] shr 6
312+
let start = if kind == 0:
313+
int(record[0] + 1)
314+
else:
315+
0
316+
ok case kind:
317+
of 0, 2: # `Branch` vertex
318+
if record.len - start < 11: # at least two edges
307319
return err(DeblobBranchTooShort)
308320
let
309321
aInx = record.len - 9
310322
aIny = record.len - 2
311323
var
312-
offs = 0
324+
offs = start
313325
lens = uint64.fromBytesBE record.toOpenArray(aInx, aIny) # bitmap
314326
vtxList: array[16,VertexID]
315327
n = 0
@@ -346,12 +358,18 @@ proc deblobify*(
346358
vType: Leaf,
347359
pfx: pathSegment)
348360

349-
? record.toOpenArray(0, pLen - 1).deblobify(vtx.lData)
361+
? record.toOpenArray(start, pLen - 1).deblobify(vtx.lData)
350362
vtx
351363

352364
else:
353365
return err(DeblobUnknown)
354366

367+
proc deblobify*(record: openArray[byte], T: type HashKey): Opt[HashKey] =
368+
if record.len > 1 and ((record[^1] shr 6) == 0) and (int(record[0]) + 1) < record.len:
369+
HashKey.fromBytes(record.toOpenArray(1, int(record[0])))
370+
else:
371+
Opt.none(HashKey)
372+
355373
proc deblobify*(
356374
data: openArray[byte];
357375
T: type SavedState;

0 commit comments

Comments
 (0)