Skip to content

Commit 8450fb5

Browse files
replay: patch for fec/leader race
1 parent c9e6e6c commit 8450fb5

File tree

1 file changed

+18
-18
lines changed

1 file changed

+18
-18
lines changed

src/discof/replay/fd_replay_tile.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1576,6 +1576,24 @@ can_process_fec( fd_replay_tile_t * ctx ) {
15761576
if( FD_UNLIKELY( !fd_sched_can_ingest( ctx->sched, 1UL ) ) ) return 0;
15771577
if( FD_UNLIKELY( (fec = fd_reasm_peek( ctx->reasm ))==NULL ) ) return 0;
15781578

1579+
if( FD_UNLIKELY( ctx->is_leader && fec->fec_set_idx==0U && fd_reasm_parent( ctx->reasm, fec )->bank_idx==ctx->leader_bank->idx ) ) {
1580+
/* There's a race that's exceedingly rare, where we receive the
1581+
FEC set for the slot right after our leader rotation before we
1582+
freeze the bank for the last slot in our leader rotation.
1583+
Leader slot freezing happens only after if we've received the
1584+
final PoH hash from the poh tile as well as the final FEC set
1585+
for the leader slot. So the race happens when FEC sets are
1586+
delivered and processed sooner than the PoH hash, aka when the
1587+
poh=>shred=>replay path for the block id somehow beats the
1588+
poh=>replay path for the poh hash. To mitigate this race,
1589+
we must block on ingesting the FEC set for the ensuing slot
1590+
before the leader bank freezes, because that would violate
1591+
ordering invariants in banks and sched. */
1592+
FD_TEST( ctx->recv_block_id );
1593+
FD_TEST( !ctx->recv_poh );
1594+
return 0;
1595+
}
1596+
15791597
/* If fec_set_idx is 0, we need a new bank for a new slot. Banks must
15801598
not be full in this case. */
15811599
if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) && fec->fec_set_idx==0 ) ) return 0;
@@ -1807,24 +1825,6 @@ after_credit( fd_replay_tile_t * ctx,
18071825
if( FD_LIKELY( can_process_fec( ctx ) ) ) {
18081826
fd_reasm_fec_t * fec = fd_reasm_peek( ctx->reasm );
18091827

1810-
if( FD_UNLIKELY( ctx->is_leader && fd_reasm_parent( ctx->reasm, fec )->bank_idx==ctx->leader_bank->idx ) ) {
1811-
/* There's a race that's exceedingly rare, where we receive the
1812-
FEC set for the slot right after our leader rotation before we
1813-
freeze the bank for the last slot in our leader rotation.
1814-
Leader slot freezing happens only after if we've received the
1815-
final PoH hash from the poh tile as well as the final FEC set
1816-
for the leader slot. So the race happens when FEC sets are
1817-
delivered and processed sooner than the PoH hash, aka when the
1818-
poh=>shred=>replay path for the block id somehow beats the
1819-
poh=>replay path for the poh hash. We should not process any
1820-
FEC set for the ensuing slot before the leader bank freezes,
1821-
because that would violate ordering invariants in banks and
1822-
sched. */
1823-
FD_TEST( ctx->recv_block_id );
1824-
FD_TEST( !ctx->recv_poh );
1825-
return;
1826-
}
1827-
18281828
/* If fec->eqvoc is set that means that equivocation mid-block was
18291829
detected in fd_reasm_t. We need to replay up to and including
18301830
the equivocating FEC on a new bank. */

0 commit comments

Comments
 (0)