@@ -1576,6 +1576,24 @@ can_process_fec( fd_replay_tile_t * ctx ) {
15761576 if ( FD_UNLIKELY ( !fd_sched_can_ingest ( ctx -> sched , 1UL ) ) ) return 0 ;
15771577 if ( FD_UNLIKELY ( (fec = fd_reasm_peek ( ctx -> reasm ))== NULL ) ) return 0 ;
15781578
1579+ if ( FD_UNLIKELY ( ctx -> is_leader && fec -> fec_set_idx == 0U && fd_reasm_parent ( ctx -> reasm , fec )-> bank_idx == ctx -> leader_bank -> idx ) ) {
1580+ /* There's a race that's exceedingly rare, where we receive the
1581+ FEC set for the slot right after our leader rotation before we
1582+ freeze the bank for the last slot in our leader rotation.
1583+ Leader slot freezing happens only after if we've received the
1584+ final PoH hash from the poh tile as well as the final FEC set
1585+ for the leader slot. So the race happens when FEC sets are
1586+ delivered and processed sooner than the PoH hash, aka when the
1587+ poh=>shred=>replay path for the block id somehow beats the
1588+ poh=>replay path for the poh hash. To mitigate this race,
1589+ we must block on ingesting the FEC set for the ensuing slot
1590+ before the leader bank freezes, because that would violate
1591+ ordering invariants in banks and sched. */
1592+ FD_TEST ( ctx -> recv_block_id );
1593+ FD_TEST ( !ctx -> recv_poh );
1594+ return 0 ;
1595+ }
1596+
15791597 /* If fec_set_idx is 0, we need a new bank for a new slot. Banks must
15801598 not be full in this case. */
15811599 if ( FD_UNLIKELY ( fd_banks_is_full ( ctx -> banks ) && fec -> fec_set_idx == 0 ) ) return 0 ;
@@ -1807,24 +1825,6 @@ after_credit( fd_replay_tile_t * ctx,
18071825 if ( FD_LIKELY ( can_process_fec ( ctx ) ) ) {
18081826 fd_reasm_fec_t * fec = fd_reasm_peek ( ctx -> reasm );
18091827
1810- if ( FD_UNLIKELY ( ctx -> is_leader && fd_reasm_parent ( ctx -> reasm , fec )-> bank_idx == ctx -> leader_bank -> idx ) ) {
1811- /* There's a race that's exceedingly rare, where we receive the
1812- FEC set for the slot right after our leader rotation before we
1813- freeze the bank for the last slot in our leader rotation.
1814- Leader slot freezing happens only after if we've received the
1815- final PoH hash from the poh tile as well as the final FEC set
1816- for the leader slot. So the race happens when FEC sets are
1817- delivered and processed sooner than the PoH hash, aka when the
1818- poh=>shred=>replay path for the block id somehow beats the
1819- poh=>replay path for the poh hash. We should not process any
1820- FEC set for the ensuing slot before the leader bank freezes,
1821- because that would violate ordering invariants in banks and
1822- sched. */
1823- FD_TEST ( ctx -> recv_block_id );
1824- FD_TEST ( !ctx -> recv_poh );
1825- return ;
1826- }
1827-
18281828 /* If fec->eqvoc is set that means that equivocation mid-block was
18291829 detected in fd_reasm_t. We need to replay up to and including
18301830 the equivocating FEC on a new bank. */
0 commit comments