Skip to content

Commit 47d38b2

Browse files
snapshots: lthash tile
1 parent 99a8d3e commit 47d38b2

28 files changed

+1020
-238
lines changed

book/api/metrics-generated.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,17 @@
883883

884884
</div>
885885

886+
## Snaplt Tile
887+
888+
<div class="metrics">
889+
890+
| Metric | Type | Description |
891+
|--------|------|-------------|
892+
| <span class="metrics-name">snaplt_&#8203;state</span> | gauge | State of the tile. 0=hashing, 1=done, 2=shutdown |
893+
| <span class="metrics-name">snaplt_&#8203;accounts_&#8203;hashed</span> | gauge | Number of accounts hashed so far during snapshot loading. Might decrease if snapshot load is aborted and restarted |
894+
895+
</div>
896+
886897
## Ipecho Tile
887898

888899
<div class="metrics">

src/app/firedancer-dev/commands/backtest.c

Lines changed: 127 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "../../../discof/restore/utils/fd_ssmsg.h"
2626
#include "../../../discof/replay/fd_replay_notif.h"
2727
#include "../../../discof/reasm/fd_reasm.h"
28+
#include "../../../discof/restore/utils/fd_ssctrl.h"
2829
#include "../../../flamenco/runtime/fd_runtime_public.h" /* FD_RUNTIME_PUBLIC_ACCOUNT_UPDATE_MSG_MTU */
2930
#include "../main.h"
3031

@@ -37,6 +38,8 @@ static void
3738
backtest_topo( config_t * config ) {
3839
ulong exec_tile_cnt = config->firedancer.layout.exec_tile_count;
3940
ulong writer_tile_cnt = config->firedancer.layout.writer_tile_count;
41+
ulong snaplt_tile_cnt = config->firedancer.layout.snaplt_tile_count;
42+
int snaplt_enabled = !config->development.snapshots.disable_lthash_verification;
4043

4144
int disable_snap_loader = config->tiles.replay.genesis[0] != '\0';
4245

@@ -105,6 +108,14 @@ backtest_topo( config_t * config ) {
105108
snapin_tile->allow_shutdown = 1;
106109
}
107110

111+
if( FD_LIKELY( snaplt_enabled ) ) {
112+
fd_topob_wksp( topo, "snaplt" );
113+
for( ulong i=0UL; i<snaplt_tile_cnt; i++ ) {
114+
fd_topo_tile_t * snaplt_tile = fd_topob_tile( topo, "snaplt", "snaplt", "metric_in", cpu_idx++, 0, 0 );
115+
snaplt_tile->allow_shutdown = 1;
116+
}
117+
}
118+
108119
/**********************************************************************/
109120
/* Setup backtest->replay link (repair_repla) in topo */
110121
/**********************************************************************/
@@ -128,7 +139,11 @@ backtest_topo( config_t * config ) {
128139
fd_topob_wksp( topo, "snapdc_rd" );
129140
fd_topob_wksp( topo, "snapin_rd" );
130141
fd_topob_wksp( topo, "snap_out" );
131-
fd_topob_wksp( topo, "replay_manif" );
142+
143+
if( FD_LIKELY( snaplt_enabled ) ) {
144+
fd_topob_wksp( topo, "snapin_lt" );
145+
fd_topob_wksp( topo, "snaplt_out" );
146+
}
132147
/* TODO: Should be depth of 1 or 2, not 4, but it causes backpressure
133148
from the replay tile parsing the manifest, remove when this is
134149
fixed. */
@@ -139,6 +154,11 @@ backtest_topo( config_t * config ) {
139154
fd_topob_link( topo, "snapdc_rd", "snapdc_rd", 128UL, 0UL, 1UL );
140155
fd_topob_link( topo, "snapin_rd", "snapin_rd", 128UL, 0UL, 1UL );
141156

157+
if( FD_LIKELY( snaplt_enabled ) ) {
158+
fd_topob_link( topo, "snapin_lt", "snapin_lt", 128UL, sizeof(fd_snapshot_existing_account_t), 1UL );
159+
FOR(snaplt_tile_cnt) fd_topob_link( topo, "snaplt_out", "snaplt_out", 128UL, 2048UL, 1UL );
160+
}
161+
142162
fd_topob_tile_out( topo, "snaprd", 0UL, "snap_zstd", 0UL );
143163
fd_topob_tile_in ( topo, "snapdc", 0UL, "metric_in", "snap_zstd", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
144164
fd_topob_tile_out( topo, "snapdc", 0UL, "snap_stream", 0UL );
@@ -152,6 +172,13 @@ backtest_topo( config_t * config ) {
152172
fd_topob_tile_out( topo, "snapin", 0UL, "snapin_rd", 0UL );
153173
}
154174

175+
if( FD_LIKELY( snaplt_enabled ) ) {
176+
fd_topob_tile_out( topo, "snapin", 0UL, "snapin_lt", 0UL );
177+
FOR(snaplt_tile_cnt) fd_topob_tile_in( topo, "snapin", 0UL, "metric_in", "snaplt_out", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
178+
FOR(snaplt_tile_cnt) fd_topob_tile_in( topo, "snaplt", i, "metric_in", "snapin_lt", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
179+
FOR(snaplt_tile_cnt) fd_topob_tile_out( topo, "snaplt", i, "snaplt_out", i );
180+
}
181+
155182
/**********************************************************************/
156183
/* More backtest->replay links in topo */
157184
/**********************************************************************/
@@ -305,14 +332,7 @@ backtest_topo( config_t * config ) {
305332
}
306333

307334
if( FD_LIKELY( !disable_snap_loader ) ) {
308-
/* Replay decoded manifest dcache topo obj */
309-
fd_topo_obj_t * replay_manifest_dcache = fd_topob_obj( topo, "dcache", "replay_manif" );
310-
fd_pod_insertf_ulong( topo->props, 2UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
311-
fd_pod_insert_ulong( topo->props, "manifest_dcache", replay_manifest_dcache->id );
312-
313335
fd_topob_tile_uses( topo, snapin_tile, funk_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
314-
fd_topob_tile_uses( topo, snapin_tile, replay_manifest_dcache, FD_SHMEM_JOIN_MODE_READ_WRITE );
315-
fd_topob_tile_uses( topo, replay_tile, replay_manifest_dcache, FD_SHMEM_JOIN_MODE_READ_ONLY );
316336
}
317337

318338
for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
@@ -397,68 +417,110 @@ backtest_cmd_fn( args_t * args FD_PARAM_UNUSED,
397417
ulong volatile * const snapdc_metrics = fd_metrics_tile( snapdc_tile->metrics );
398418
ulong volatile * const snapin_metrics = fd_metrics_tile( snapin_tile->metrics );
399419

400-
ulong total_off_old = 0UL;
401-
ulong snaprd_backp_old = 0UL;
402-
ulong snaprd_wait_old = 0UL;
403-
ulong snapdc_backp_old = 0UL;
404-
ulong snapdc_wait_old = 0UL;
405-
ulong snapin_backp_old = 0UL;
406-
ulong snapin_wait_old = 0UL;
407-
ulong acc_cnt_old = 0UL;
408-
sleep( 1 );
409-
puts( "-------------backp=(snaprd,snapdc,snapin) busy=(snaprd,snapdc,snapin)---------------" );
410-
long next = start+1000L*1000L*1000L;
411-
for(;;) {
412-
ulong snaprd_status = FD_VOLATILE_CONST( snaprd_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
413-
ulong snapdc_status = FD_VOLATILE_CONST( snapdc_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
414-
ulong snapin_status = FD_VOLATILE_CONST( snapin_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
415-
416-
if( FD_UNLIKELY( snaprd_status==2UL && snapdc_status==2UL && snapin_status == 2UL ) ) break;
417-
418-
long cur = fd_log_wallclock();
419-
if( FD_UNLIKELY( cur<next ) ) {
420-
long sleep_nanos = fd_long_min( 1000L*1000L, next-cur );
421-
FD_TEST( !fd_sys_util_nanosleep( (uint)(sleep_nanos/(1000L*1000L*1000L)), (uint)(sleep_nanos%(1000L*1000L*1000L)) ) );
422-
continue;
423-
}
420+
ulong volatile * snaplt_metrics[ FD_MAX_SNAPLT_TILES ];
421+
ulong snaplt_tile_cnt = fd_topo_tile_name_cnt( topo, "snaplt" );
424422

425-
ulong total_off = snaprd_metrics[ MIDX( GAUGE, SNAPRD, FULL_BYTES_READ ) ] +
426-
snaprd_metrics[ MIDX( GAUGE, SNAPRD, INCREMENTAL_BYTES_READ ) ];
427-
ulong snaprd_backp = snaprd_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ];
428-
ulong snaprd_wait = snaprd_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
429-
snaprd_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snaprd_backp;
430-
ulong snapdc_backp = snapdc_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ];
431-
ulong snapdc_wait = snapdc_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
432-
snapdc_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapdc_backp;
433-
ulong snapin_backp = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ];
434-
ulong snapin_wait = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
435-
snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapin_backp;
436-
437-
ulong acc_cnt = snapin_metrics[ MIDX( GAUGE, SNAPIN, ACCOUNTS_INSERTED ) ];
438-
printf( "bw=%4.0f MB/s backp=(%3.0f%%,%3.0f%%,%3.0f%%) busy=(%3.0f%%,%3.0f%%,%3.0f%%) acc=%3.1f M/s\n",
439-
(double)( total_off-total_off_old )/1e6,
440-
( (double)( snaprd_backp-snaprd_backp_old )*ns_per_tick )/1e7,
441-
( (double)( snapdc_backp-snapdc_backp_old )*ns_per_tick )/1e7,
442-
( (double)( snapin_backp-snapin_backp_old )*ns_per_tick )/1e7,
443-
100-( ( (double)( snaprd_wait-snaprd_wait_old )*ns_per_tick )/1e7 ),
444-
100-( ( (double)( snapdc_wait-snapdc_wait_old )*ns_per_tick )/1e7 ),
445-
100-( ( (double)( snapin_wait-snapin_wait_old )*ns_per_tick )/1e7 ),
446-
(double)( acc_cnt-acc_cnt_old )/1e6 );
447-
fflush( stdout );
448-
total_off_old = total_off;
449-
snaprd_backp_old = snaprd_backp;
450-
snaprd_wait_old = snaprd_wait;
451-
snapdc_backp_old = snapdc_backp;
452-
snapdc_wait_old = snapdc_wait;
453-
snapin_backp_old = snapin_backp;
454-
snapin_wait_old = snapin_wait;
455-
acc_cnt_old = acc_cnt;
456-
457-
next+=1000L*1000L*1000L;
423+
for( ulong i=0UL; i<snaplt_tile_cnt; i++ ) {
424+
ulong snaplt_tile_idx = fd_topo_find_tile( topo, "snaplt", i );
425+
FD_TEST( snaplt_tile_idx!=ULONG_MAX );
426+
fd_topo_tile_t * snaplt_tile = &topo->tiles[ snaplt_tile_idx ];
427+
snaplt_metrics[ i ] = fd_metrics_tile( snaplt_tile->metrics );
428+
}
429+
430+
ulong total_off_old = 0UL;
431+
ulong snaprd_backp_old = 0UL;
432+
ulong snaprd_wait_old = 0UL;
433+
ulong snapdc_backp_old = 0UL;
434+
ulong snapdc_wait_old = 0UL;
435+
ulong snapin_backp_old = 0UL;
436+
ulong snapin_wait_old = 0UL;
437+
ulong snaplt_backp_old = 0UL;
438+
ulong snaplt_wait_old = 0UL;
439+
ulong acc_cnt_old = 0UL;
440+
sleep( 1 );
441+
puts( "" );
442+
puts( "Columns:" );
443+
puts( "- bw: Uncompressed bandwidth" );
444+
puts( "- backp: Backpressured by downstream tile" );
445+
puts( "- stall: Waiting on upstream tile" );
446+
puts( "- acc: Number of accounts" );
447+
puts( "" );
448+
puts( "-------------backp=(snaprd,snapdc,snapin,snaplt) busy=(snaprd,snapdc,snapin,snaplt)---------------" );
449+
long next = start+1000L*1000L*1000L;
450+
for(;;) {
451+
ulong snaprd_status = FD_VOLATILE_CONST( snaprd_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
452+
ulong snapdc_status = FD_VOLATILE_CONST( snapdc_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
453+
ulong snapin_status = FD_VOLATILE_CONST( snapin_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
454+
ulong snaplt_status = ULONG_MAX;
455+
456+
ulong snaplt_status_sum = 0UL;
457+
for( ulong i=0UL; i<snaplt_tile_cnt; i++ ) {
458+
ulong snaplt_status = FD_VOLATILE_CONST( snaplt_metrics[ i ][ MIDX( GAUGE, TILE, STATUS ) ] );
459+
snaplt_status_sum += snaplt_status;
460+
}
461+
if( FD_UNLIKELY( snaplt_status_sum==2UL*snaplt_tile_cnt ) ) snaplt_status = 2UL;
462+
else snaplt_status = snaplt_tile_cnt>0UL ? 1UL : 2UL;
463+
464+
if( FD_UNLIKELY( snaprd_status==2UL && snapdc_status==2UL && snapin_status == 2UL && snaplt_status==2UL ) ) break;
465+
466+
long cur = fd_log_wallclock();
467+
if( FD_UNLIKELY( cur<next ) ) {
468+
long sleep_nanos = fd_long_min( 1000L*1000L, next-cur );
469+
FD_TEST( !fd_sys_util_nanosleep( (uint)(sleep_nanos/(1000L*1000L*1000L)), (uint)(sleep_nanos%(1000L*1000L*1000L)) ) );
470+
continue;
458471
}
472+
473+
ulong total_off = snaprd_metrics[ MIDX( GAUGE, SNAPRD, FULL_BYTES_READ ) ] +
474+
snaprd_metrics[ MIDX( GAUGE, SNAPRD, INCREMENTAL_BYTES_READ ) ];
475+
ulong snaprd_backp = snaprd_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ];
476+
ulong snaprd_wait = snaprd_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
477+
snaprd_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snaprd_backp;
478+
ulong snapdc_backp = snapdc_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ];
479+
ulong snapdc_wait = snapdc_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
480+
snapdc_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapdc_backp;
481+
ulong snapin_backp = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ];
482+
ulong snapin_wait = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
483+
snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapin_backp;
484+
ulong snaplt_backp = 0UL;
485+
ulong snaplt_wait = 0UL;
486+
487+
for( ulong i=0UL; i<snaplt_tile_cnt; i++ ) {
488+
snaplt_backp += snaplt_metrics[ i ][ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ];
489+
}
490+
for( ulong i=0UL; i<snaplt_tile_cnt; i++ ) {
491+
snaplt_wait += snaplt_metrics[ i ][ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
492+
snaplt_metrics[ i ][ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snaplt_backp;
493+
}
494+
495+
ulong acc_cnt = snapin_metrics[ MIDX( GAUGE, SNAPIN, ACCOUNTS_INSERTED ) ];
496+
printf( "bw=%4.0f MB/s backp=(%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%) busy=(%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%) acc=%3.1f M/s\n",
497+
(double)( total_off-total_off_old )/1e6,
498+
( (double)( snaprd_backp-snaprd_backp_old )*ns_per_tick )/1e7,
499+
( (double)( snapdc_backp-snapdc_backp_old )*ns_per_tick )/1e7,
500+
( (double)( snapin_backp-snapin_backp_old )*ns_per_tick )/1e7,
501+
( (double)( snaplt_backp-snaplt_backp_old )*ns_per_tick )/1e7/(double)snaplt_tile_cnt,
502+
100-( ( (double)( snaprd_wait-snaprd_wait_old )*ns_per_tick )/1e7 ),
503+
100-( ( (double)( snapdc_wait-snapdc_wait_old )*ns_per_tick )/1e7 ),
504+
100-( ( (double)( snapin_wait-snapin_wait_old )*ns_per_tick )/1e7 ),
505+
100-( ( (double)( snaplt_wait-snaplt_wait_old )*ns_per_tick )/1e7/(double)snaplt_tile_cnt ),
506+
(double)( acc_cnt-acc_cnt_old )/1e6 );
507+
fflush( stdout );
508+
total_off_old = total_off;
509+
snaprd_backp_old = snaprd_backp;
510+
snaprd_wait_old = snaprd_wait;
511+
snapdc_backp_old = snapdc_backp;
512+
snapdc_wait_old = snapdc_wait;
513+
snapin_backp_old = snapin_backp;
514+
snapin_wait_old = snapin_wait;
515+
snaplt_backp_old = snaplt_backp;
516+
snaplt_wait_old = snaplt_wait;
517+
acc_cnt_old = acc_cnt;
518+
519+
next+=1000L*1000L*1000L;
459520
}
460521

461522
for(;;) pause();
523+
}
462524
}
463525

464526
action_t fd_action_backtest = {

0 commit comments

Comments
 (0)