Skip to content

Commit 8bc6685

Browse files
npatel-jumplidatong
authored andcommitted
repair: worker pooled async signing and ping-pong optimization
Enable asynchronous signing requests in the repair flow. Cleanup and document async repair signing logic. Update for flow control and mapping improvements.
1 parent ea39afe commit 8bc6685

File tree

12 files changed

+1089
-145
lines changed

12 files changed

+1089
-145
lines changed

src/app/firedancer-dev/commands/repair.c

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ repair_topo( config_t * config ) {
6161
ulong net_tile_cnt = config->layout.net_tile_count;
6262
ulong shred_tile_cnt = config->layout.shred_tile_count;
6363
ulong quic_tile_cnt = config->layout.quic_tile_count;
64+
ulong sign_tile_cnt = config->firedancer.layout.sign_tile_count;
6465

6566
fd_topo_t * topo = { fd_topob_new( &config->topo, config->name ) };
6667
topo->max_page_size = fd_cstr_to_shmem_page_sz( config->hugetlbfs.max_page_size );
@@ -132,12 +133,17 @@ repair_topo( config_t * config ) {
132133
/**/ fd_topob_link( topo, "gossip_net", "net_gossip", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
133134

134135
/**/ fd_topob_link( topo, "repair_net", "net_repair", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
135-
/**/ fd_topob_link( topo, "repair_sign", "repair_sign", 128UL, 2048UL, 1UL );
136-
FOR(shred_tile_cnt) fd_topob_link( topo, "shred_repair", "shred_repair", pending_fec_shreds_depth, FD_SHRED_REPAIR_MTU, 2UL );
136+
137+
FOR(shred_tile_cnt) fd_topob_link( topo, "shred_repair", "shred_repair", pending_fec_shreds_depth, FD_SHRED_REPAIR_MTU, 2UL /* at most 2 msgs per after_frag */ );
137138

138139
FOR(shred_tile_cnt) fd_topob_link( topo, "repair_shred", "shred_repair", pending_fec_shreds_depth, sizeof(fd_ed25519_sig_t), 1UL );
139-
/**/ fd_topob_link( topo, "sign_repair", "sign_repair", 128UL, 64UL, 1UL );
140-
/**/ fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_reasm_fec_t), 1UL );
140+
141+
/**/ fd_topob_link( topo, "ping_sign", "repair_sign", 128UL, 2048UL, 1UL );
142+
/**/ fd_topob_link( topo, "sign_ping", "sign_repair", 128UL, sizeof(fd_ed25519_sig_t), 1UL );
143+
FOR(sign_tile_cnt-1) fd_topob_link( topo, "repair_sign", "repair_sign", 128UL, 2048UL, 1UL );
144+
FOR(sign_tile_cnt-1) fd_topob_link( topo, "sign_repair", "sign_repair", 1024UL, sizeof(fd_ed25519_sig_t), 1UL );
145+
146+
/**/ fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_reasm_fec_t), 1UL );
141147
/**/ fd_topob_link( topo, "poh_shred", "poh_shred", 16384UL, USHORT_MAX, 1UL );
142148

143149
/**/ fd_topob_link( topo, "send_txns", "send_txns", 128UL, FD_TXN_MTU, 1UL );
@@ -181,7 +187,7 @@ repair_topo( config_t * config ) {
181187

182188
/* topo, tile_name, tile_wksp, metrics_wksp, cpu_idx, is_agave, uses_keyswitch */
183189
FOR(shred_tile_cnt) fd_topob_tile( topo, "shred", "shred", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 1 );
184-
/**/ fd_topob_tile( topo, "sign", "sign", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 1 );
190+
FOR(sign_tile_cnt) fd_topob_tile( topo, "sign", "sign", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 1 );
185191
/**/ fd_topob_tile( topo, "metric", "metric", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
186192
/**/ fd_topob_tile( topo, "gossip", "gossip", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
187193
fd_topo_tile_t * repair_tile = fd_topob_tile( topo, "repair", "repair", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
@@ -303,12 +309,17 @@ repair_topo( config_t * config ) {
303309
fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "snap_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
304310
FOR(shred_tile_cnt) fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "shred_repair", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
305311
306-
/**/ fd_topob_tile_in( topo, "sign", 0UL, "metric_in", "repair_sign", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
307-
/**/ fd_topob_tile_out( topo, "repair", 0UL, "repair_sign", 0UL );
308-
/**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "sign_repair", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
312+
/**/ fd_topob_tile_in( topo, "sign", 0UL, "metric_in", "ping_sign", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
313+
/**/ fd_topob_tile_out( topo, "repair", 0UL, "ping_sign", 0UL );
309314
/**/ fd_topob_tile_out( topo, "repair", 0UL, "repair_repla", 0UL );
310315
FOR(shred_tile_cnt) fd_topob_tile_out( topo, "repair", 0UL, "repair_shred", i );
311-
/**/ fd_topob_tile_out( topo, "sign", 0UL, "sign_repair", 0UL );
316+
/**/ fd_topob_tile_out( topo, "sign", 0UL, "sign_ping", 0UL );
317+
318+
FOR(sign_tile_cnt-1) fd_topob_tile_out( topo, "repair", 0UL, "repair_sign", i );
319+
FOR(sign_tile_cnt-1) fd_topob_tile_in ( topo, "sign", i+1, "metric_in", "repair_sign", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
320+
FOR(sign_tile_cnt-1) fd_topob_tile_out( topo, "sign", i+1, "sign_repair", i );
321+
FOR(sign_tile_cnt-1) fd_topob_tile_in ( topo, "repair", 0UL, "metric_in", "sign_repair", i, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
322+
/**/ fd_topob_tile_in ( topo, "repair", 0UL, "metric_in", "sign_ping", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
312323

313324
if( 1 ) {
314325
fd_topob_wksp( topo, "scap" );

src/app/firedancer/config/default.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,13 @@ user = ""
691691
# very high TPS rates because the cluster size will be very small.
692692
shred_tile_count = 1
693693

694+
# How many sign tiles to run. Should be set >= 2. This is
695+
# configurable and horizontally scales repair request signing.
696+
# One tile is reserved for synchronous signing across all tiles.
697+
# The remaining tiles distribute the workload of signing repair
698+
# requests.
699+
sign_tile_count = 2
700+
694701
# All memory that will be used in Firedancer is pre-allocated in two
695702
# kinds of pages: huge and gigantic. Huge pages are 2 MiB and gigantic
696703
# pages are 1 GiB. This is done to prevent TLB misses which can have a

0 commit comments

Comments
 (0)