Skip to content

Commit 49ffbd8

Browse files
committed
Path-based VertexID
VertexID has 64 bits of values but currently, but the chain state database uses only a tiny fraction of that (~27 bits). Here, we split up the number space into a fixed portion statically allocated from the MPT path and a dynamic portion for leaves and storage slots. The static portion simply allocates a (bread-first) number based on the first nibbles in the address/path while any "deeper" paths instead get a dynamic VertexID like before. Since the VertedID is path-based, we can more or less guess the VertexID of any node whose path we know based on the "average" depth of the state trie. When we're lucky, a single lookup is sufficient to find the node instead of a one-by-one traversal of each level. Even in the case that a single lookup is not enough and the actual node is "deeper" than the guess, the starting point helps skip a few levels at least. Tree depth is estimated by keeping track of hits and misses and occasionally making an adjustment in the direction of the most misses. On average, this shaves 25% of the import speed for the first 15M blocks where the lookup depth is guessed to be 7 levels - deepening the trie by one more level (when more accounts eventually are added) would see even better performance. Using 8 levels of statically assigned ids results in 2**32 bits left for dynamic ids / storage slots - this should by far be enough for any foreseeable lifetime of the application, specially because large parts of "current" usage of vertexid space is remains used by actual nodes. The resulting lookup structure can be thought of as a hybrid between fully path-based lookupts and the current "sparse" id mapping. made with coffee sponsored by 0x-r4bbit fix off-by-one cleanups
1 parent 6341f5f commit 49ffbd8

File tree

16 files changed

+286
-67
lines changed

16 files changed

+286
-67
lines changed

execution_chain/db/aristo/aristo_blobify.nim

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ template data*(v: SbeBuf): openArray[byte] =
5151
func blobify*(rvid: RootedVertexID): RVidBuf =
5252
# Length-prefixed root encoding creates a unique and common prefix for all
5353
# verticies sharing the same root
54-
# TODO evaluate an encoding that colocates short roots (like VertexID(1)) with
54+
# TODO evaluate an encoding that colocates short roots (like STATE_ROOT_VID) with
5555
# the length
5656
let root = rvid.root.blobify()
5757
result.buf[0] = root.len

execution_chain/db/aristo/aristo_check/check_top.nim

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,6 @@ proc checkTopCommon*(
113113
if rc.value[0].isValid:
114114
return err((rvid.vid,CheckAnyVtxEmptyKeyExpected))
115115

116-
if vTop.distinctBase < LEAST_FREE_VID:
117-
# Verify that all vids are below `LEAST_FREE_VID`
118-
if topVid.distinctBase < LEAST_FREE_VID:
119-
for (rvid,key) in db.layersWalkKey:
120-
if key.isValid and LEAST_FREE_VID <= rvid.vid.distinctBase:
121-
return err((topVid,CheckAnyVTopUnset))
122-
123116
# If present, there are at least as many deleted hashes as there are deleted
124117
# vertices.
125118
if kMapNilCount != 0 and kMapNilCount < nNilVtx:

execution_chain/db/aristo/aristo_check/check_twig.nim

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ proc checkTwig*(
2525
): Result[void,AristoError] =
2626
let
2727
proof = ? db.makeAccountProof(accPath)
28-
key = ? db.computeKey (VertexID(1),VertexID(1))
28+
key = ? db.computeKey (STATE_ROOT_VID,STATE_ROOT_VID)
2929
discard ? proof[0].verifyProof(key.to(Hash32), accPath)
3030

3131
ok()
@@ -38,7 +38,7 @@ proc checkTwig*(
3838
let
3939
proof = ? db.makeStorageProof(accPath, stoPath)
4040
vid = ? db.fetchStorageID accPath
41-
key = ? db.computeKey (VertexID(1),vid)
41+
key = ? db.computeKey (STATE_ROOT_VID,vid)
4242
discard ? proof[0].verifyProof(key.to(Hash32), stoPath)
4343

4444
ok()

execution_chain/db/aristo/aristo_constants.nim

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,25 @@ const
2727
VOID_HASH_KEY* = HashKey()
2828
## Void equivalent for Merkle hash value
2929

30-
LEAST_FREE_VID* = 100
31-
## Vids smaller are used as known state roots and cannot be recycled. Only
32-
## the `VertexID(1)` state root is used by the `Aristo` methods. The other
33-
## numbers smaller than `LEAST_FREE_VID` may be used by application
34-
## functions with fixed assignments of the type of a state root (e.g. for
35-
## a receipt or a transaction root.)
30+
STATE_ROOT_VID* = VertexID(1)
31+
## VertexID of state root entry in the MPT
32+
33+
STATIC_VID_LEVELS* = 8
34+
## Number of MPT levels in the account trie that get a fixed VertexID based
35+
## on the initial nibbles of the path. We'll consume a little bit more than
36+
## `STATIC_VID_LEVELS*4` bits for the static part of the vid space:
37+
##
38+
## STATE_ROOT_VID + 16^0 + 16^1 + ... + 16^STATIC_VID_LEVELS
39+
40+
FIRST_DYNAMIC_VID* = ## First VertexID of the sparse/dynamic part of the MPT
41+
block:
42+
var v = uint64(STATE_ROOT_VID)
43+
for i in 0..STATIC_VID_LEVELS:
44+
v += 1'u64 shl (i * 4)
45+
v
3646

3747
ACC_LRU_SIZE* = 1024 * 1024
3848
## LRU cache size for accounts that have storage, see `.accLeaves` and
3949
## `.stoLeaves` fields of the main descriptor.
4050

41-
static:
42-
# must stay away from `VertexID(1)` and `VertexID(2)`
43-
doAssert 2 < LEAST_FREE_VID
44-
4551
# End

execution_chain/db/aristo/aristo_desc.nim

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ type
125125
# Debugging data below, might go away in future
126126
xMap*: Table[HashKey,RootedVertexID] ## For pretty printing/debugging
127127

128+
staticLevel*: int
129+
## MPT level where "most" leaves can be found, for static vid lookups
130+
lookups*: tuple[lower, hits, higher: int]
131+
128132
Leg* = object
129133
## For constructing a `VertexPath`
130134
wp*: VidVtxPair ## Vertex ID and data ref
@@ -238,6 +242,23 @@ proc deltaAtLevel*(db: AristoTxRef, level: int): AristoTxRef =
238242
return frame
239243
nil
240244
245+
func getStaticLevel*(db: AristoDbRef): int =
246+
# Retrieve the level where we can expect to find a leaf, updating it based on
247+
# recent lookups
248+
249+
if db.lookups[0] + db.lookups[1] + db.lookups[2] >= 1024:
250+
if db.lookups.lower > db.lookups.hits + db.lookups.higher:
251+
db.staticLevel = max(1, db.staticLevel - 1)
252+
elif db.lookups.higher > db.lookups.hits + db.lookups.lower:
253+
db.staticLevel = min(STATIC_VID_LEVELS, db.staticLevel + 1)
254+
reset(db.lookups)
255+
256+
if db.staticLevel == 0:
257+
db.staticLevel = 1
258+
259+
db.staticLevel
260+
261+
241262
# ------------------------------------------------------------------------------
242263
# End
243264
# ------------------------------------------------------------------------------

execution_chain/db/aristo/aristo_fetch.nim

Lines changed: 98 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import
1717
std/typetraits,
1818
eth/common/[base, hashes],
1919
results,
20-
"."/[aristo_compute, aristo_desc, aristo_get, aristo_layers, aristo_hike]
20+
"."/[aristo_compute, aristo_desc, aristo_get, aristo_layers, aristo_hike, aristo_vid]
2121

2222
# ------------------------------------------------------------------------------
2323
# Private functions
@@ -26,9 +26,10 @@ import
2626
proc retrieveLeaf(
2727
db: AristoTxRef;
2828
root: VertexID;
29-
path: Hash32;
29+
path: NibblesBuf;
30+
next = VertexID(0),
3031
): Result[VertexRef,AristoError] =
31-
for step in stepUp(NibblesBuf.fromBytes(path.data), root, db):
32+
for step in stepUp(path, root, db, next):
3233
let vtx = step.valueOr:
3334
if error in HikeAcceptableStopsNotFound:
3435
return err(FetchPathNotFound)
@@ -53,6 +54,80 @@ proc cachedStoLeaf*(db: AristoTxRef; mixPath: Hash32): Opt[StoLeafRef] =
5354
db.db.stoLeaves.get(mixPath) or
5455
Opt.none(StoLeafRef)
5556

57+
proc retrieveAccStatic(
58+
db: AristoTxRef;
59+
accPath: Hash32;
60+
): Result[(AccLeafRef, NibblesBuf, VertexID),AristoError] =
61+
# A static VertexID essentially splits the path into a prefix encoded in the
62+
# vid and the rest of the path stored as normal - here, instead of traversing
63+
# the trie from the root and selecting a path nibble by nibble we travers the
64+
# trie starting at `staticLevel` and search towards the root until either we
65+
# hit the node we're looking for or at least a branch from which we can
66+
# shorten the lookup.
67+
let staticLevel = db.db.getStaticLevel()
68+
69+
var path = NibblesBuf.fromBytes(accPath.data)
70+
var next: VertexID
71+
72+
for sl in countdown(staticLevel, 0):
73+
template countHitOrLower() =
74+
if sl == staticLevel:
75+
db.db.lookups.hits += 1
76+
else:
77+
db.db.lookups.lower += 1
78+
79+
let
80+
svid = path.staticVid(sl)
81+
vtx = db.getVtxRc((STATE_ROOT_VID, svid)).valueOr:
82+
# Either the node doesn't exist or our guess used too many nibbles and
83+
# the trie is not yet this deep at the given path - either way, we'll
84+
# try a less deep guess which will result either in a branch,
85+
# non-matching leaf or more missing verticies.
86+
continue
87+
88+
case vtx[0].vType
89+
of Leaves:
90+
let vtx = AccLeafRef(vtx[0])
91+
92+
countHitOrLower()
93+
return
94+
if vtx.pfx != path.slice(sl): # Same prefix, different path
95+
err FetchPathNotFound
96+
else:
97+
ok (vtx, path, next)
98+
of ExtBranch:
99+
let vtx = ExtBranchRef(vtx[0])
100+
101+
if vtx.pfx != path.slice(sl, sl + vtx.pfx.len): # Same prefix, different path
102+
countHitOrLower()
103+
return err FetchPathNotFound
104+
105+
let nibble = path[sl + vtx.pfx.len]
106+
next = vtx.bVid(nibble)
107+
108+
if not next.isValid():
109+
countHitOrLower()
110+
return err FetchPathNotFound
111+
112+
path = path.slice(sl + vtx.pfx.len + 1)
113+
114+
break # Continue the search down the branch children, starting at `next`
115+
of Branch: # Same as ExtBranch with vtx.pfx.len == 0!
116+
let vtx = BranchRef(vtx[0])
117+
118+
let nibble = path[sl]
119+
next = vtx.bVid(nibble)
120+
121+
if not next.isValid():
122+
countHitOrLower()
123+
return err FetchPathNotFound
124+
125+
path = path.slice(sl + 1)
126+
break # Continue the search down the branch children, starting at `next`
127+
128+
# We end up here when we have to continue the search down a branch
129+
ok (nil, path, next)
130+
56131
proc retrieveAccLeaf(
57132
db: AristoTxRef;
58133
accPath: Hash32;
@@ -62,14 +137,29 @@ proc retrieveAccLeaf(
62137
return err(FetchPathNotFound)
63138
return ok leafVtx[]
64139

140+
let (staticVtx, path, next) = db.retrieveAccStatic(accPath).valueOr:
141+
if error == FetchPathNotFound:
142+
db.db.accLeaves.put(accPath, nil)
143+
return err(error)
144+
145+
if staticVtx.isValid():
146+
db.db.accLeaves.put(accPath, staticVtx)
147+
return ok staticVtx
148+
65149
# Updated payloads are stored in the layers so if we didn't find them there,
66150
# it must have been in the database
67151
let
68-
leafVtx = db.retrieveLeaf(VertexID(1), accPath).valueOr:
152+
leafVtx = db.retrieveLeaf(STATE_ROOT_VID, path, next).valueOr:
69153
if error == FetchPathNotFound:
154+
# The branch was the deepest level where a vertex actually existed
155+
# meaning that it was a hit - else searches for non-existing paths would
156+
# skew the results towards more depth than exists in the MPT
157+
db.db.lookups.hits += 1
70158
db.db.accLeaves.put(accPath, nil)
71159
return err(error)
72160

161+
db.db.lookups.higher += 1
162+
73163
db.db.accLeaves.put(accPath, AccLeafRef(leafVtx))
74164

75165
ok AccLeafRef(leafVtx)
@@ -130,7 +220,7 @@ proc fetchAccountHike*(
130220
if leaf == Opt.some(AccLeafRef(nil)):
131221
return err(FetchAccInaccessible)
132222

133-
accPath.hikeUp(VertexID(1), db, leaf, accHike).isOkOr:
223+
accPath.hikeUp(STATE_ROOT_VID, db, leaf, accHike).isOkOr:
134224
return err(FetchAccInaccessible)
135225

136226
# Extract the account payload from the leaf
@@ -163,7 +253,8 @@ proc retrieveStoragePayload(
163253

164254
# Updated payloads are stored in the layers so if we didn't find them there,
165255
# it must have been in the database
166-
let leafVtx = db.retrieveLeaf(? db.fetchStorageIdImpl(accPath), stoPath).valueOr:
256+
let leafVtx = db.retrieveLeaf(
257+
? db.fetchStorageIdImpl(accPath), NibblesBuf.fromBytes(stoPath.data)).valueOr:
167258
if error == FetchPathNotFound:
168259
db.db.stoLeaves.put(mixPath, nil)
169260
return err(error)
@@ -214,7 +305,7 @@ proc fetchStateRoot*(
214305
db: AristoTxRef;
215306
): Result[Hash32,AristoError] =
216307
## Fetch the Merkle hash of the account root.
217-
db.retrieveMerkleHash(VertexID(1))
308+
db.retrieveMerkleHash(STATE_ROOT_VID)
218309

219310
proc hasPathAccount*(
220311
db: AristoTxRef;

execution_chain/db/aristo/aristo_hike.nim

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,14 @@ iterator stepUp*(
124124
path: NibblesBuf; # Partial path
125125
root: VertexID; # Start vertex
126126
db: AristoTxRef; # Database
127+
next = VertexID(0)
127128
): Result[VertexRef, AristoError] =
128129
## For the argument `path`, iterate over the logest possible path in the
129130
## argument database `db`.
130131
var
131132
path = path
132-
next = root
133-
vtx: VertexRef
133+
next = if next == VertexID(0): root else: next
134+
vtx = VertexRef(nil)
134135
block iter:
135136
while true:
136137
(vtx, path, next) = step(path, (root, next), db).valueOr:

execution_chain/db/aristo/aristo_init/rocks_db/rdb_desc.nim

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ template toOpenArray*(xid: AdminTabID): openArray[byte] =
9191
xid.uint64.toBytesBE.toOpenArray(0,7)
9292

9393
template to*(v: RootedVertexID, T: type RdbStateType): RdbStateType =
94-
if v.root == VertexID(1): RdbStateType.World else: RdbStateType.Account
94+
if v.root == STATE_ROOT_VID: RdbStateType.World else: RdbStateType.Account
9595

9696
template inc*(v: var RdbLruCounter, hit: bool) =
9797
discard v[hit].fetchAdd(1, moRelaxed)

0 commit comments

Comments
 (0)