Skip to content

Commit 803489f

Browse files
committed
replay: fix stem burst violations
1 parent 3ce1d90 commit 803489f

File tree

1 file changed

+44
-31
lines changed

1 file changed

+44
-31
lines changed

src/discof/replay/fd_replay_tile.c

Lines changed: 44 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ struct fd_replay_tile {
160160
fd_txncache_t * txncache;
161161
fd_store_t * store;
162162
fd_banks_t banks[1];
163+
ulong frontier_indices[ FD_BANKS_MAX_BANKS ];
164+
ulong frontier_cnt;
163165

164166
/* This flag is 1 If we have seen a vote signature that our node has
165167
sent out get rooted at least one time. The value is 0 otherwise.
@@ -1916,7 +1918,8 @@ can_process_fec( fd_replay_tile_t * ctx,
19161918
return 1;
19171919
}
19181920

1919-
static void
1921+
/* Returns 0 on successful FEC ingestion, 1 if the block got marked dead. */
1922+
static int
19201923
insert_fec_set( fd_replay_tile_t * ctx,
19211924
fd_stem_context_t * stem,
19221925
fd_reasm_fec_t * reasm_fec ) {
@@ -1967,7 +1970,7 @@ insert_fec_set( fd_replay_tile_t * ctx,
19671970
fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
19681971
if( FD_UNLIKELY( block_id_ele->latest_fec_idx>=reasm_fec->fec_set_idx ) ) {
19691972
FD_LOG_WARNING(( "dropping FEC set (slot=%lu, fec_set_idx=%u) because it is at least as old as the latest FEC set (slot=%lu, fec_set_idx=%u)", reasm_fec->slot, reasm_fec->fec_set_idx, block_id_ele->slot, block_id_ele->latest_fec_idx ));
1970-
return;
1973+
return 0;
19711974
}
19721975
block_id_ele->latest_fec_idx = reasm_fec->fec_set_idx;
19731976
block_id_ele->latest_mr = reasm_fec->key;
@@ -1982,7 +1985,7 @@ insert_fec_set( fd_replay_tile_t * ctx,
19821985
}
19831986

19841987
/* If we are the leader, we don't need to process the FEC set. */
1985-
if( FD_UNLIKELY( reasm_fec->is_leader ) ) return;
1988+
if( FD_UNLIKELY( reasm_fec->is_leader ) ) return 0;
19861989

19871990
/* Forks form a partial ordering over FEC sets. The Repair tile
19881991
delivers FEC sets in-order per fork, but FEC set ordering across
@@ -2044,16 +2047,18 @@ insert_fec_set( fd_replay_tile_t * ctx,
20442047
ctx->metrics.store_query_missing_mr = reasm_fec->key.ul[0];
20452048
FD_BASE58_ENCODE_32_BYTES( reasm_fec->key.key, key_b58 );
20462049
FD_LOG_WARNING(( "store fec for slot: %lu is on minority fork already pruned by publish. abandoning slice. root: %lu. pruned merkle: %s", reasm_fec->slot, ctx->consensus_root_slot, key_b58 ));
2047-
return;
2050+
return 0;
20482051
}
20492052
sched_fec->fec = store_fec;
20502053
if( FD_UNLIKELY( !fd_sched_fec_ingest( ctx->sched, sched_fec ) ) ) { /* FIXME this critical section is unnecessarily complex. should refactor to just be held for the memcpy and block_offs. */
20512054
mark_bank_dead( ctx, stem, sched_fec->bank_idx );
2055+
return 1;
20522056
}
20532057
} FD_STORE_SLOCK_END;
20542058

20552059
ctx->metrics.store_query_release++;
20562060
fd_histf_sample( ctx->metrics.store_query_work, (ulong)fd_log_wallclock() - work );
2061+
return 0;
20572062
}
20582063

20592064
static void
@@ -2107,7 +2112,8 @@ process_fec_set( fd_replay_tile_t * ctx,
21072112
path[ path_cnt++ ] = curr;
21082113
}
21092114

2110-
for( ulong i=path_cnt; i>0UL; i-- ) {
2115+
int dead = 0;
2116+
for( ulong i=path_cnt; i>0UL && !dead; i-- ) {
21112117
fd_reasm_fec_t * leaf = path[ i-1 ];
21122118

21132119
/* If there's not capacity in the sched or banks, return early and
@@ -2127,7 +2133,7 @@ process_fec_set( fd_replay_tile_t * ctx,
21272133
FD_LOG_NOTICE(( "backfilling FEC sets for slot %lu from fec_set_idx %u to fec_set_idx %u", leaf->slot, leaf->fec_set_idx, curr->fec_set_idx ));
21282134

21292135
for( ulong j=0UL; j<=leaf->fec_set_idx/FD_FEC_SHRED_CNT; j++ ) {
2130-
insert_fec_set( ctx, stem, slot_fecs[ j ] );
2136+
if( FD_UNLIKELY( dead=insert_fec_set( ctx, stem, slot_fecs[ j ] ) ) ) break;
21312137
}
21322138
}
21332139
}
@@ -2283,6 +2289,32 @@ after_credit( fd_replay_tile_t * ctx,
22832289
return;
22842290
}
22852291

2292+
/* Mark a frontier eviction victim bank as dead. As refcnts on said
2293+
banks are drained, they will be pruned away. */
2294+
if( FD_UNLIKELY( ctx->frontier_cnt ) ) {
2295+
*charge_busy = 1;
2296+
*opt_poll_in = 0;
2297+
bank_idx = ctx->frontier_indices[ --ctx->frontier_cnt ];
2298+
fd_bank_t bank[1];
2299+
FD_TEST( fd_banks_bank_query( bank, ctx->banks, bank_idx ) );
2300+
if( FD_UNLIKELY( ctx->is_leader && bank_idx==ctx->leader_bank->data->idx ) ) return;
2301+
mark_bank_dead( ctx, stem, bank->data->idx );
2302+
fd_sched_block_abandon( ctx->sched, bank->data->idx );
2303+
2304+
/* evict it from reasm */
2305+
2306+
fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank->data->idx ];
2307+
fd_reasm_fec_t * fec = fd_reasm_query( ctx->reasm, &block_id_ele->latest_mr );
2308+
FD_TEST( fec );
2309+
fd_reasm_fec_t * evicted_head = fd_reasm_remove( ctx->reasm, fec, ctx->store );
2310+
if( FD_UNLIKELY( ctx->reasm_evicted ) ) {
2311+
/* already have a chain we are evicting. Prepend the new chain to the existing chain */
2312+
fec->child = fd_reasm_pool_idx( ctx->reasm, ctx->reasm_evicted );
2313+
}
2314+
ctx->reasm_evicted = evicted_head;
2315+
return;
2316+
}
2317+
22862318
/* If the reassembler has a fec that is ready, we should process it
22872319
and pass it to the scheduler. */
22882320
int evict_banks = 0;
@@ -2296,31 +2328,10 @@ after_credit( fd_replay_tile_t * ctx,
22962328

22972329
if( FD_UNLIKELY( evict_banks ) ) {
22982330
FD_LOG_WARNING(( "banks are full and partially executed frontier banks are being evicted" ));
2299-
ulong frontier_cnt = 0UL;
2300-
ulong frontier_indices[ FD_BANKS_MAX_BANKS ];
2301-
fd_banks_get_frontier( ctx->banks, frontier_indices, &frontier_cnt );
2302-
2303-
/* Mark all frontier banks as dead. As refcnts on said banks are
2304-
drained, they will be pruned away. */
2305-
for( ulong i=0UL; i<frontier_cnt; i++ ) {
2306-
fd_bank_t bank[1];
2307-
FD_TEST( fd_banks_bank_query( bank, ctx->banks, frontier_indices[i] ) );
2308-
if( FD_UNLIKELY( ctx->is_leader && frontier_indices[i]==ctx->leader_bank->data->idx ) ) continue;
2309-
mark_bank_dead( ctx, stem, bank->data->idx );
2310-
fd_sched_block_abandon( ctx->sched, bank->data->idx );
2311-
2312-
/* evict it from reasm */
2313-
2314-
fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank->data->idx ];
2315-
fd_reasm_fec_t * fec = fd_reasm_query( ctx->reasm, &block_id_ele->latest_mr );
2316-
FD_TEST( fec );
2317-
fd_reasm_fec_t * evicted_head = fd_reasm_remove( ctx->reasm, fec, ctx->store );
2318-
if( FD_UNLIKELY( ctx->reasm_evicted ) ) {
2319-
/* already have a chain we are evicting. Prepend the new chain to the existing chain */
2320-
fec->child = fd_reasm_pool_idx( ctx->reasm, ctx->reasm_evicted );
2321-
}
2322-
ctx->reasm_evicted = evicted_head;
2323-
}
2331+
fd_banks_get_frontier( ctx->banks, ctx->frontier_indices, &ctx->frontier_cnt );
2332+
*charge_busy = 1;
2333+
*opt_poll_in = 0;
2334+
return;
23242335
}
23252336

23262337
*charge_busy = replay( ctx, stem );
@@ -2899,6 +2910,8 @@ unprivileged_init( fd_topo_t * topo,
28992910
fd_bank_data_t * bank_pool = fd_banks_get_bank_pool( ctx->banks->data );
29002911
FD_MGAUGE_SET( REPLAY, MAX_LIVE_BANKS, fd_banks_pool_max( bank_pool ) );
29012912

2913+
ctx->frontier_cnt = 0UL;
2914+
29022915
fd_bank_t bank[1];
29032916
FD_TEST( fd_banks_init_bank( bank, ctx->banks ) );
29042917
fd_bank_slot_set( bank, 0UL );

0 commit comments

Comments
 (0)