Skip to content

Commit 068a281

Browse files
committed
replay: use bitset for exec tile state tracking
Saves about 7 cache lines. Also prepares for an upcoming commit where we need to easily check whether the execution pipeline is fully drained for safe publishing of some shared data structures.
1 parent 35b4c5a commit 068a281

File tree

1 file changed

+16
-39
lines changed

1 file changed

+16
-39
lines changed

src/discof/replay/fd_replay_tile.c

Lines changed: 16 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,6 @@
8484
#define IN_KIND_SNAP (3)
8585
#define IN_KIND_WRITER (4)
8686

87-
#define EXEC_TXN_BUSY (0xA)
88-
#define EXEC_TXN_READY (0xB)
89-
9087
#define BANK_HASH_CMP_LG_MAX (16UL)
9188

9289
struct fd_replay_in_link {
@@ -165,6 +162,8 @@ typedef struct fd_exec_slice fd_exec_slice_t;
165162
#define DEQUE_MAX USHORT_MAX
166163
#include "../../util/tmpl/fd_deque_dynamic.c"
167164

165+
FD_STATIC_ASSERT( FD_PACK_MAX_BANK_TILES<=64UL, exec_bitset );
166+
168167
struct fd_replay_tile_ctx {
169168
fd_wksp_t * wksp;
170169
fd_wksp_t * status_cache_wksp;
@@ -221,8 +220,8 @@ struct fd_replay_tile_ctx {
221220

222221
/* TODO: Some of these arrays should be bitvecs that get masked into. */
223222
ulong exec_cnt;
223+
ulong exec_ready_bitset; /* Is tile ready */
224224
fd_replay_out_link_t exec_out [ FD_PACK_MAX_BANK_TILES ]; /* Sending to exec unexecuted txns */
225-
uchar exec_ready[ FD_PACK_MAX_BANK_TILES ]; /* Is tile ready */
226225
uint prev_ids [ FD_PACK_MAX_BANK_TILES ]; /* Previous txn id if any */
227226
ulong * exec_fseq [ FD_PACK_MAX_BANK_TILES ]; /* fseq of the last executed txn */
228227

@@ -677,9 +676,7 @@ init_after_snapshot( fd_replay_tile_ctx_t * ctx ) {
677676
snapshot_slot = 1UL;
678677

679678
/* Now setup exec tiles for execution */
680-
for( ulong i=0UL; i<ctx->exec_cnt; i++ ) {
681-
ctx->exec_ready[ i ] = EXEC_TXN_READY;
682-
}
679+
ctx->exec_ready_bitset = fd_ulong_mask_lsb( (int)ctx->exec_cnt );
683680
}
684681

685682
ctx->snapshot_slot = snapshot_slot;
@@ -736,12 +733,7 @@ init_after_snapshot( fd_replay_tile_ctx_t * ctx ) {
736733

737734
/* Now that the snapshot(s) are done loading, we can mark all of the
738735
exec tiles as ready. */
739-
for( ulong i=0UL; i<ctx->exec_cnt; i++ ) {
740-
if( ctx->exec_ready[ i ] == EXEC_TXN_BUSY ) {
741-
ctx->exec_ready[ i ] = EXEC_TXN_READY;
742-
}
743-
}
744-
736+
ctx->exec_ready_bitset = fd_ulong_mask_lsb( (int)ctx->exec_cnt );
745737

746738
FD_LOG_NOTICE(( "snapshot slot %lu", snapshot_slot ));
747739
}
@@ -1171,11 +1163,11 @@ handle_writer_state_updates( fd_replay_tile_ctx_t * ctx ) {
11711163
break;
11721164
case FD_WRITER_STATE_TXN_DONE: {
11731165
uint txn_id = fd_writer_fseq_get_txn_id( res );
1174-
ulong exec_tile_id = fd_writer_fseq_get_exec_tile_id( res );
1175-
if( ctx->exec_ready[ exec_tile_id ]==EXEC_TXN_BUSY && ctx->prev_ids[ exec_tile_id ]!=txn_id ) {
1166+
int exec_tile_id = fd_writer_fseq_get_exec_tile_id( res );
1167+
if( fd_ulong_extract_bit( ctx->exec_ready_bitset, exec_tile_id )==0 && ctx->prev_ids[ exec_tile_id ]!=txn_id ) {
11761168
//FD_LOG_DEBUG(( "Ack that exec tile idx=%lu txn id=%u has been finalized by writer tile %lu", exec_tile_id, txn_id, i ));
1177-
ctx->exec_ready[ exec_tile_id ] = EXEC_TXN_READY;
1178-
ctx->prev_ids[ exec_tile_id ] = txn_id;
1169+
ctx->exec_ready_bitset = fd_ulong_set_bit( ctx->exec_ready_bitset, exec_tile_id );
1170+
ctx->prev_ids[ exec_tile_id ] = txn_id;
11791171
fd_fseq_update( ctx->writer_fseq[ i ], FD_WRITER_STATE_READY );
11801172
}
11811173
break;
@@ -1468,18 +1460,6 @@ handle_new_slice( fd_replay_tile_ctx_t * ctx, fd_stem_context_t * stem ) {
14681460
fd_bank_shred_cnt_set( ctx->slot_ctx->bank, fd_bank_shred_cnt_get( ctx->slot_ctx->bank ) + data_cnt );
14691461
}
14701462

1471-
static ulong
1472-
get_free_exec_tiles( fd_replay_tile_ctx_t * ctx, uchar * exec_free_idx ) {
1473-
ulong cnt=0UL;
1474-
for( uchar i=0; i<ctx->exec_cnt; i++ ) {
1475-
if( ctx->exec_ready[ i ]==EXEC_TXN_READY) {
1476-
exec_free_idx[ cnt ] = i;
1477-
cnt++;
1478-
}
1479-
}
1480-
return cnt;
1481-
}
1482-
14831463
static void
14841464
exec_slice_fini_slot( fd_replay_tile_ctx_t * ctx, fd_stem_context_t * stem ) {
14851465

@@ -1603,8 +1583,6 @@ exec_slice_fini_slot( fd_replay_tile_ctx_t * ctx, fd_stem_context_t * stem ) {
16031583

16041584
static void
16051585
exec_and_handle_slice( fd_replay_tile_ctx_t * ctx, fd_stem_context_t * stem ) {
1606-
uchar exec_free_idx[ FD_PACK_MAX_BANK_TILES ];
1607-
ulong free_exec_cnt = get_free_exec_tiles( ctx, exec_free_idx );
16081586

16091587
/* If there are no txns left to execute in the microblock and the
16101588
exec tiles are not busy, then we are ready to either start
@@ -1613,7 +1591,7 @@ exec_and_handle_slice( fd_replay_tile_ctx_t * ctx, fd_stem_context_t * stem ) {
16131591
We have to synchronize on the the microblock boundary because we
16141592
only have the guarantee that all transactions within the same
16151593
microblock can be executed in parallel. */
1616-
if( !fd_slice_exec_txn_ready( &ctx->slice_exec_ctx ) && free_exec_cnt==ctx->exec_cnt ) {
1594+
if( !fd_slice_exec_txn_ready( &ctx->slice_exec_ctx ) && ctx->exec_ready_bitset==fd_ulong_mask_lsb( (int)ctx->exec_cnt ) ) {
16171595
if( fd_slice_exec_microblock_ready( &ctx->slice_exec_ctx ) ) {
16181596
fd_slice_exec_microblock_parse( &ctx->slice_exec_ctx );
16191597
} else if( fd_slice_exec_slice_ready( &ctx->slice_exec_ctx ) ) {
@@ -1630,13 +1608,15 @@ exec_and_handle_slice( fd_replay_tile_ctx_t * ctx, fd_stem_context_t * stem ) {
16301608

16311609
/* At this point, we know that we have some quantity of transactions
16321610
in a microblock that we are ready to execute. */
1633-
for( ulong i=0UL; i<free_exec_cnt; i++ ) {
1611+
for( int i=0; i<fd_ulong_popcnt( ctx->exec_ready_bitset ); i++ ) {
16341612

16351613
if( !fd_slice_exec_txn_ready( &ctx->slice_exec_ctx ) ) {
16361614
return;
16371615
}
16381616

1639-
ulong exec_idx = exec_free_idx[ i ];
1617+
int exec_idx = fd_ulong_find_lsb( ctx->exec_ready_bitset );
1618+
/* Mark the exec tile as busy */
1619+
ctx->exec_ready_bitset = fd_ulong_pop_lsb( ctx->exec_ready_bitset );
16401620

16411621
ulong tsorig = fd_frag_meta_ts_comp( fd_tickcount() );
16421622

@@ -1649,9 +1629,6 @@ exec_and_handle_slice( fd_replay_tile_ctx_t * ctx, fd_stem_context_t * stem ) {
16491629
over all accounts a second time. */
16501630
fd_runtime_update_program_cache( ctx->slot_ctx, &txn_p, ctx->runtime_spad );
16511631

1652-
/* Mark the exec tile as busy */
1653-
ctx->exec_ready[ exec_idx ] = EXEC_TXN_BUSY;
1654-
16551632
/* Dispatch dcache to exec tile */
16561633
fd_replay_out_link_t * exec_out = &ctx->exec_out[ exec_idx ];
16571634
fd_runtime_public_txn_msg_t * exec_msg = (fd_runtime_public_txn_msg_t *)fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
@@ -2017,9 +1994,9 @@ unprivileged_init( fd_topo_t * topo,
20171994
FD_LOG_CRIT(( "too many exec tiles %lu", ctx->exec_cnt ));
20181995
}
20191996

1997+
/* Mark all initial state as not being ready. */
1998+
ctx->exec_ready_bitset = 0UL;
20201999
for( ulong i = 0UL; i < ctx->exec_cnt; i++ ) {
2021-
/* Mark all initial state as not being ready. */
2022-
ctx->exec_ready[ i ] = EXEC_TXN_BUSY;
20232000
ctx->prev_ids[ i ] = FD_EXEC_ID_SENTINEL;
20242001

20252002
ulong exec_fseq_id = fd_pod_queryf_ulong( topo->props, ULONG_MAX, "exec_fseq.%lu", i );

0 commit comments

Comments
 (0)