Skip to content

Commit 5bffa5f

Browse files
committed
replay: fix stem burst violations
1 parent 3ce1d90 commit 5bffa5f

File tree

1 file changed

+39
-31
lines changed

1 file changed

+39
-31
lines changed

src/discof/replay/fd_replay_tile.c

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ struct fd_replay_tile {
160160
fd_txncache_t * txncache;
161161
fd_store_t * store;
162162
fd_banks_t banks[1];
163+
ulong victim_bank_indices[ FD_BANKS_MAX_BANKS ];
164+
ulong victim_bank_cnt;
163165

164166
/* This flag is 1 If we have seen a vote signature that our node has
165167
sent out get rooted at least one time. The value is 0 otherwise.
@@ -1916,7 +1918,8 @@ can_process_fec( fd_replay_tile_t * ctx,
19161918
return 1;
19171919
}
19181920

1919-
static void
1921+
/* Returns 0 on successful FEC ingestion, 1 if the block got marked dead. */
1922+
static int
19201923
insert_fec_set( fd_replay_tile_t * ctx,
19211924
fd_stem_context_t * stem,
19221925
fd_reasm_fec_t * reasm_fec ) {
@@ -1967,7 +1970,7 @@ insert_fec_set( fd_replay_tile_t * ctx,
19671970
fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
19681971
if( FD_UNLIKELY( block_id_ele->latest_fec_idx>=reasm_fec->fec_set_idx ) ) {
19691972
FD_LOG_WARNING(( "dropping FEC set (slot=%lu, fec_set_idx=%u) because it is at least as old as the latest FEC set (slot=%lu, fec_set_idx=%u)", reasm_fec->slot, reasm_fec->fec_set_idx, block_id_ele->slot, block_id_ele->latest_fec_idx ));
1970-
return;
1973+
return 0;
19711974
}
19721975
block_id_ele->latest_fec_idx = reasm_fec->fec_set_idx;
19731976
block_id_ele->latest_mr = reasm_fec->key;
@@ -1982,7 +1985,7 @@ insert_fec_set( fd_replay_tile_t * ctx,
19821985
}
19831986

19841987
/* If we are the leader, we don't need to process the FEC set. */
1985-
if( FD_UNLIKELY( reasm_fec->is_leader ) ) return;
1988+
if( FD_UNLIKELY( reasm_fec->is_leader ) ) return 0;
19861989

19871990
/* Forks form a partial ordering over FEC sets. The Repair tile
19881991
delivers FEC sets in-order per fork, but FEC set ordering across
@@ -2044,11 +2047,12 @@ insert_fec_set( fd_replay_tile_t * ctx,
20442047
ctx->metrics.store_query_missing_mr = reasm_fec->key.ul[0];
20452048
FD_BASE58_ENCODE_32_BYTES( reasm_fec->key.key, key_b58 );
20462049
FD_LOG_WARNING(( "store fec for slot: %lu is on minority fork already pruned by publish. abandoning slice. root: %lu. pruned merkle: %s", reasm_fec->slot, ctx->consensus_root_slot, key_b58 ));
2047-
return;
2050+
return 0;
20482051
}
20492052
sched_fec->fec = store_fec;
20502053
if( FD_UNLIKELY( !fd_sched_fec_ingest( ctx->sched, sched_fec ) ) ) { /* FIXME this critical section is unnecessarily complex. should refactor to just be held for the memcpy and block_offs. */
20512054
mark_bank_dead( ctx, stem, sched_fec->bank_idx );
2055+
return 1;
20522056
}
20532057
} FD_STORE_SLOCK_END;
20542058

@@ -2107,7 +2111,8 @@ process_fec_set( fd_replay_tile_t * ctx,
21072111
path[ path_cnt++ ] = curr;
21082112
}
21092113

2110-
for( ulong i=path_cnt; i>0UL; i-- ) {
2114+
int dead = 0;
2115+
for( ulong i=path_cnt; i>0UL && !dead; i-- ) {
21112116
fd_reasm_fec_t * leaf = path[ i-1 ];
21122117

21132118
/* If there's not capacity in the sched or banks, return early and
@@ -2127,7 +2132,7 @@ process_fec_set( fd_replay_tile_t * ctx,
21272132
FD_LOG_NOTICE(( "backfilling FEC sets for slot %lu from fec_set_idx %u to fec_set_idx %u", leaf->slot, leaf->fec_set_idx, curr->fec_set_idx ));
21282133

21292134
for( ulong j=0UL; j<=leaf->fec_set_idx/FD_FEC_SHRED_CNT; j++ ) {
2130-
insert_fec_set( ctx, stem, slot_fecs[ j ] );
2135+
if( FD_UNLIKELY( dead=insert_fec_set( ctx, stem, slot_fecs[ j ] ) ) ) break;
21312136
}
21322137
}
21332138
}
@@ -2283,6 +2288,31 @@ after_credit( fd_replay_tile_t * ctx,
22832288
return;
22842289
}
22852290

2291+
/* Mark a frontier eviction victim bank as dead. As refcnts on said
2292+
banks are drained, they will be pruned away. */
2293+
if( FD_UNLIKELY( ctx->victim_bank_cnt ) ) {
2294+
*charge_busy = 1;
2295+
*opt_poll_in = 0;
2296+
bank_idx = ctx->victim_bank_cnt-1UL;
2297+
fd_bank_t bank[1];
2298+
FD_TEST( fd_banks_bank_query( bank, ctx->banks, ctx->victim_bank_indices[ bank_idx ] ) );
2299+
if( FD_UNLIKELY( ctx->is_leader && ctx->victim_bank_indices[ bank_idx ]==ctx->leader_bank->data->idx ) ) return;
2300+
mark_bank_dead( ctx, stem, bank->data->idx );
2301+
fd_sched_block_abandon( ctx->sched, bank->data->idx );
2302+
2303+
/* evict it from reasm */
2304+
2305+
fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank->data->idx ];
2306+
fd_reasm_fec_t * fec = fd_reasm_query( ctx->reasm, &block_id_ele->latest_mr );
2307+
FD_TEST( fec );
2308+
fd_reasm_fec_t * evicted_head = fd_reasm_remove( ctx->reasm, fec, ctx->store );
2309+
if( FD_UNLIKELY( ctx->reasm_evicted ) ) {
2310+
/* already have a chain we are evicting. Prepend the new chain to the existing chain */
2311+
fec->child = fd_reasm_pool_idx( ctx->reasm, ctx->reasm_evicted );
2312+
}
2313+
ctx->reasm_evicted = evicted_head;
2314+
}
2315+
22862316
/* If the reassembler has a fec that is ready, we should process it
22872317
and pass it to the scheduler. */
22882318
int evict_banks = 0;
@@ -2296,31 +2326,9 @@ after_credit( fd_replay_tile_t * ctx,
22962326

22972327
if( FD_UNLIKELY( evict_banks ) ) {
22982328
FD_LOG_WARNING(( "banks are full and partially executed frontier banks are being evicted" ));
2299-
ulong frontier_cnt = 0UL;
2300-
ulong frontier_indices[ FD_BANKS_MAX_BANKS ];
2301-
fd_banks_get_frontier( ctx->banks, frontier_indices, &frontier_cnt );
2302-
2303-
/* Mark all frontier banks as dead. As refcnts on said banks are
2304-
drained, they will be pruned away. */
2305-
for( ulong i=0UL; i<frontier_cnt; i++ ) {
2306-
fd_bank_t bank[1];
2307-
FD_TEST( fd_banks_bank_query( bank, ctx->banks, frontier_indices[i] ) );
2308-
if( FD_UNLIKELY( ctx->is_leader && frontier_indices[i]==ctx->leader_bank->data->idx ) ) continue;
2309-
mark_bank_dead( ctx, stem, bank->data->idx );
2310-
fd_sched_block_abandon( ctx->sched, bank->data->idx );
2311-
2312-
/* evict it from reasm */
2313-
2314-
fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank->data->idx ];
2315-
fd_reasm_fec_t * fec = fd_reasm_query( ctx->reasm, &block_id_ele->latest_mr );
2316-
FD_TEST( fec );
2317-
fd_reasm_fec_t * evicted_head = fd_reasm_remove( ctx->reasm, fec, ctx->store );
2318-
if( FD_UNLIKELY( ctx->reasm_evicted ) ) {
2319-
/* already have a chain we are evicting. Prepend the new chain to the existing chain */
2320-
fec->child = fd_reasm_pool_idx( ctx->reasm, ctx->reasm_evicted );
2321-
}
2322-
ctx->reasm_evicted = evicted_head;
2323-
}
2329+
fd_banks_get_frontier( ctx->banks, ctx->victim_bank_indices, &ctx->victim_bank_cnt );
2330+
*charge_busy = 1;
2331+
*opt_poll_in = 0;
23242332
}
23252333

23262334
*charge_busy = replay( ctx, stem );

0 commit comments

Comments
 (0)