Skip to content
This repository was archived by the owner on Sep 8, 2025. It is now read-only.

Commit 8f7cb5f

Browse files
committed
Expose API to track gaps in the chain of headers
1 parent 6fef9c2 commit 8f7cb5f

File tree

9 files changed

+333
-1
lines changed

9 files changed

+333
-1
lines changed

eth/abc.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
)
4141
from eth.exceptions import VMError
4242
from eth.typing import (
43+
BlockRange,
4344
BytesOrView,
4445
JournalDBCheckpoint,
4546
AccountState,
@@ -388,6 +389,14 @@ class SchemaAPI(ABC):
388389
"""
389390
A class representing a database schema that maps values to lookup keys.
390391
"""
392+
@staticmethod
393+
@abstractmethod
394+
def make_header_chain_gaps_lookup_key() -> bytes:
395+
"""
396+
Return the lookup key to retrieve the header chain integrity info from the database.
397+
"""
398+
...
399+
391400
@staticmethod
392401
@abstractmethod
393402
def make_canonical_head_hash_lookup_key() -> bytes:
@@ -484,6 +493,18 @@ def __init__(self, db: AtomicDatabaseAPI) -> None:
484493
"""
485494
...
486495

496+
@abstractmethod
497+
def get_header_chain_gaps(self) -> Tuple[BlockRange, ...]:
498+
"""
499+
Return an ordered sequence of block ranges describing the integrity of the chain of
500+
headers. Each block range describes a missing segment in the chain and each range is defined
501+
with inclusive boundaries, meaning the first value describes the first missing block of that
502+
segment and the second value describes the last missing block of the segment.
503+
504+
The last block range in the sequence is expected to have a block number of `-1` as the
505+
right-hand value which is to say the gap is open-ended.
506+
"""
507+
487508
#
488509
# Canonical Chain API
489510
#

eth/db/chain_gaps.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import enum
2+
from typing import Tuple
3+
4+
from eth_typing import BlockNumber
5+
6+
from eth.exceptions import GapTrackingCorrupted
7+
from eth.typing import BlockRange
8+
9+
10+
class GapChange(enum.Enum):
11+
12+
NoChange = enum.auto()
13+
NewGap = enum.auto()
14+
GapSplit = enum.auto()
15+
GapShrink = enum.auto()
16+
TailWrite = enum.auto()
17+
18+
19+
GapInfo = Tuple[GapChange, Tuple[BlockRange, ...]]
20+
21+
22+
def calculate_gaps(newly_persisted: BlockNumber, base_gaps: Tuple[BlockRange, ...]) -> GapInfo:
23+
24+
# If we have a fresh chain, our highest missing number can only be 1
25+
highest_missing_number = 1 if base_gaps == () else base_gaps[-1][0]
26+
27+
if newly_persisted == highest_missing_number:
28+
# This is adding a consecutive header at the very tail
29+
new_last_marker = (newly_persisted + 1, -1)
30+
new_gaps = base_gaps[:-1] + (new_last_marker,)
31+
gap_change = GapChange.TailWrite
32+
elif newly_persisted > highest_missing_number:
33+
# We are creating a gap in the chain
34+
gap_end = newly_persisted - 1
35+
new_tail = ((highest_missing_number, gap_end), (newly_persisted + 1, -1),)
36+
new_gaps = base_gaps[:-1] + new_tail
37+
gap_change = GapChange.NewGap
38+
elif newly_persisted < highest_missing_number:
39+
# We are patching a gap which may either shrink an existing gap or divide it
40+
matching_gaps = [
41+
(index, pair) for index, pair in enumerate(base_gaps)
42+
if newly_persisted >= pair[0] and newly_persisted <= pair[1]
43+
]
44+
45+
if len(matching_gaps) > 1:
46+
raise GapTrackingCorrupted(
47+
"Corrupted chain gap tracking",
48+
f"No {newly_persisted} appears to be missing in multiple gaps",
49+
f"1st gap goes from {matching_gaps[0][1][0]} to {matching_gaps[0][1][1]}"
50+
f"2nd gap goes from {matching_gaps[1][1][0]} to {matching_gaps[1][1][1]}"
51+
)
52+
elif len(matching_gaps) == 0:
53+
# Looks like we are just overwriting an existing header.
54+
return GapChange.NoChange, base_gaps
55+
elif len(matching_gaps) == 1:
56+
gap_index, gap = matching_gaps[0]
57+
if newly_persisted == gap[0] and newly_persisted == gap[1]:
58+
updated_center: Tuple[Tuple[int, int], ...] = ()
59+
gap_change = GapChange.GapShrink
60+
elif newly_persisted == gap[0]:
61+
# we are shrinking the gap at the start
62+
updated_center = ((gap[0] + 1, gap[1],),)
63+
gap_change = GapChange.GapShrink
64+
elif newly_persisted == gap[1]:
65+
# we are shrinking the gap at the tail
66+
updated_center = ((gap[0], gap[1] - 1,),)
67+
gap_change = GapChange.GapShrink
68+
else:
69+
# we are dividing the gap
70+
first_new_gap = (gap[0], newly_persisted - 1)
71+
second_new_gap = (newly_persisted + 1, gap[1])
72+
updated_center = (first_new_gap, second_new_gap,)
73+
gap_change = GapChange.GapSplit
74+
75+
before_gap = base_gaps[:gap_index]
76+
after_gap = base_gaps[gap_index + 1:]
77+
new_gaps = before_gap + updated_center + after_gap
78+
79+
else:
80+
raise Exception("Invariant")
81+
82+
return gap_change, new_gaps

eth/db/header.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,23 @@
2929
from eth.constants import (
3030
GENESIS_PARENT_HASH,
3131
)
32+
from eth.db.chain_gaps import (
33+
calculate_gaps,
34+
GapChange,
35+
GapInfo,
36+
)
3237
from eth.exceptions import (
3338
CanonicalHeadNotFound,
3439
HeaderNotFound,
3540
ParentNotFound,
3641
)
3742
from eth.db.schema import SchemaV1
3843
from eth.rlp.headers import BlockHeader
44+
from eth.rlp.chain_gaps import (
45+
decode_chain_gaps,
46+
encode_chain_gaps,
47+
)
48+
from eth.typing import BlockRange
3949
from eth.validation import (
4050
validate_block_number,
4151
validate_word,
@@ -46,6 +56,40 @@ class HeaderDB(HeaderDatabaseAPI):
4656
def __init__(self, db: AtomicDatabaseAPI) -> None:
4757
self.db = db
4858

59+
def get_header_chain_gaps(self) -> Tuple[BlockRange, ...]:
60+
return self._get_header_chain_gaps(self.db)
61+
62+
@classmethod
63+
def _get_header_chain_gaps(cls, db: DatabaseAPI) -> Tuple[BlockRange, ...]:
64+
try:
65+
encoded_gaps = db[SchemaV1.make_header_chain_gaps_lookup_key()]
66+
except KeyError:
67+
return ()
68+
else:
69+
return decode_chain_gaps(encoded_gaps)
70+
71+
@classmethod
72+
def _update_header_chain_gaps(
73+
cls,
74+
db: DatabaseAPI,
75+
persisted_header: BlockHeaderAPI,
76+
base_gaps: Tuple[BlockRange, ...] = None) -> GapInfo:
77+
78+
# If we make many updates in a row, we can avoid reloading the integrity info by
79+
# continuously caching it and providing it as a parameter to this API
80+
if base_gaps is None:
81+
base_gaps = cls._get_header_chain_gaps(db)
82+
83+
gap_change, gaps = calculate_gaps(persisted_header.block_number, base_gaps)
84+
85+
if gap_change is not GapChange.NoChange:
86+
db.set(
87+
SchemaV1.make_header_chain_gaps_lookup_key(),
88+
encode_chain_gaps(gaps)
89+
)
90+
91+
return gap_change, gaps
92+
4993
#
5094
# Canonical Chain API
5195
#
@@ -178,6 +222,7 @@ def _persist_checkpoint_header(
178222
previous_score = score - header.difficulty
179223
cls._set_hash_scores_to_db(db, header, previous_score)
180224
cls._set_as_canonical_chain_head(db, header, header.parent_hash)
225+
cls._update_header_chain_gaps(db, header)
181226

182227
@classmethod
183228
def _persist_header_chain(
@@ -211,6 +256,9 @@ def _persist_header_chain(
211256
rlp.encode(curr_chain_head),
212257
)
213258
score = cls._set_hash_scores_to_db(db, curr_chain_head, score)
259+
gap_change, gaps = cls._update_header_chain_gaps(db, curr_chain_head)
260+
if gap_change is GapChange.GapShrink or gap_change is GapChange.GapSplit:
261+
cls._add_block_number_to_hash_lookup(db, curr_chain_head)
214262

215263
orig_headers_seq = concat([(first_header,), headers_iterator])
216264
for parent, child in sliding_window(2, orig_headers_seq):
@@ -228,7 +276,9 @@ def _persist_header_chain(
228276
)
229277

230278
score = cls._set_hash_scores_to_db(db, curr_chain_head, score)
231-
279+
gap_change, gaps = cls._update_header_chain_gaps(db, curr_chain_head, gaps)
280+
if gap_change is GapChange.GapShrink or gap_change is GapChange.GapSplit:
281+
cls._add_block_number_to_hash_lookup(db, curr_chain_head)
232282
try:
233283
previous_canonical_head = cls._get_canonical_head_hash(db)
234284
head_score = cls._get_score(db, previous_canonical_head)

eth/db/schema.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ def make_block_number_to_hash_lookup_key(block_number: BlockNumber) -> bytes:
2020
def make_block_hash_to_score_lookup_key(block_hash: Hash32) -> bytes:
2121
return b'block-hash-to-score:%s' % block_hash
2222

23+
@staticmethod
24+
def make_header_chain_gaps_lookup_key() -> bytes:
25+
return b'v1:header_chain_gaps'
26+
2327
@staticmethod
2428
def make_transaction_hash_to_block_lookup_key(transaction_hash: Hash32) -> bytes:
2529
return b'transaction-hash-to-block:%s' % transaction_hash

eth/exceptions.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ class CanonicalHeadNotFound(PyEVMError):
6565
pass
6666

6767

68+
class GapTrackingCorrupted(PyEVMError):
69+
"""
70+
Raised when the tracking of chain gaps appears to be corrupted
71+
"""
72+
pass
73+
74+
6875
class Halt(PyEVMError):
6976
"""
7077
Raised when an opcode function halts vm execution.

eth/rlp/chain_gaps.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from typing import Tuple, Iterable
2+
3+
import rlp.sedes
4+
from eth_utils import to_tuple
5+
6+
from eth._utils.numeric import signed_to_unsigned, unsigned_to_signed
7+
from eth.rlp.sedes import uint32
8+
from eth.typing import BlockRange
9+
10+
chain_gaps = rlp.sedes.CountableList(rlp.sedes.List((uint32, uint32)))
11+
12+
# Chain gaps are defined as sequence of markers that define gaps in a chain of connected
13+
# entities. The right hand side of the very last marker is expected to be -1, meaning the gap
14+
# is open-ended. E.g. (500, -1) means: Every header from number 500 upwards is missing.
15+
# [[first_missing, last_missing], ..., [first_missing, -1]]
16+
# Since RLP doesn't define signed integers, we convert the right-hand side from signed_to_unsigned
17+
# before entries are written and convert from unsigned_to_signed after entries are read from disk.
18+
19+
20+
@to_tuple
21+
def _convert_signed_to_unsigned(gaps: Tuple[BlockRange, ...]) -> Iterable[BlockRange]:
22+
for pair in gaps:
23+
yield (pair[0], signed_to_unsigned(pair[1]))
24+
25+
26+
@to_tuple
27+
def _convert_unsigned_to_signed(gaps: Tuple[BlockRange, ...]) -> Iterable[BlockRange]:
28+
for pair in gaps:
29+
yield (pair[0], unsigned_to_signed(pair[1]))
30+
31+
32+
def encode_chain_gaps(gaps: Tuple[BlockRange, ...]) -> bytes:
33+
return rlp.encode(
34+
_convert_signed_to_unsigned(gaps), sedes=chain_gaps
35+
)
36+
37+
38+
def decode_chain_gaps(gaps: bytes) ->Tuple[BlockRange, ...]:
39+
val = rlp.decode(gaps, sedes=chain_gaps)
40+
return _convert_unsigned_to_signed(val)

eth/typing.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141

4242
AccountDiff = Iterable[Tuple[Address, str, Union[int, bytes], Union[int, bytes]]]
4343

44+
BlockRange = Tuple[int, int]
45+
4446
GeneralState = Union[
4547
AccountState,
4648
List[Tuple[Address, Dict[str, Union[int, bytes, Dict[int, int]]]]]

newsfragments/1924.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Expose ``get_header_chain_gaps()`` API on HeaderDB to track chain gaps

0 commit comments

Comments
 (0)