Skip to content

snapshots: lthash #5954

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions book/api/metrics-generated.md
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,17 @@

</div>

## Snaphs Tile

<div class="metrics">

| Metric | Type | Description |
|--------|------|-------------|
| <span class="metrics-name">snaphs_&#8203;state</span> | gauge | State of the tile. 0=hashing, 1=done, 2=shutdown |
| <span class="metrics-name">snaphs_&#8203;accounts_&#8203;hashed</span> | gauge | Number of accounts hashed so far during snapshot loading. Might decrease if snapshot load is aborted and restarted |

</div>

## Ipecho Tile

<div class="metrics">
Expand Down
56 changes: 43 additions & 13 deletions src/app/firedancer-dev/commands/backtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "../../../util/pod/fd_pod_format.h"
#include "../../../discof/replay/fd_replay_notif.h"
#include "../../../discof/reasm/fd_reasm.h"
#include "../../../discof/restore/utils/fd_ssctrl.h"
#include "../../../flamenco/runtime/fd_runtime_public.h" /* FD_RUNTIME_PUBLIC_ACCOUNT_UPDATE_MSG_MTU */
#include "../main.h"

Expand All @@ -36,6 +37,7 @@ static void
backtest_topo( config_t * config ) {
ulong exec_tile_cnt = config->firedancer.layout.exec_tile_count;
ulong writer_tile_cnt = config->firedancer.layout.writer_tile_count;
ulong hash_tile_cnt = config->firedancer.layout.hash_tile_count;

fd_topo_t * topo = { fd_topob_new( &config->topo, config->name ) };
topo->max_page_size = fd_cstr_to_shmem_page_sz( config->hugetlbfs.max_page_size );
Expand Down Expand Up @@ -99,6 +101,15 @@ backtest_topo( config_t * config ) {
snapdc_tile->allow_shutdown = 1;
snapin_tile->allow_shutdown = 1;

if( FD_LIKELY( hash_tile_cnt ) ) {
fd_topob_wksp( topo, "snaphs" );
}

for( ulong i=0UL; i<hash_tile_cnt; i++ ) {
fd_topo_tile_t * snaphsh_tile = fd_topob_tile( topo, "snaphs", "snaphs", "metric_in", cpu_idx++, 0, 0 );
snaphsh_tile->allow_shutdown = 1;
}

/**********************************************************************/
/* Setup backtest->replay link (repair_repla) in topo */
/**********************************************************************/
Expand All @@ -121,16 +132,23 @@ backtest_topo( config_t * config ) {
fd_topob_wksp( topo, "snapdc_rd" );
fd_topob_wksp( topo, "snapin_rd" );
fd_topob_wksp( topo, "snap_out" );
fd_topob_wksp( topo, "replay_manif" );

if( FD_LIKELY( hash_tile_cnt ) ) {
fd_topob_wksp( topo, "snapin_hsh" );
fd_topob_wksp( topo, "snaphsh_out" );
}

/* TODO: Should be depth of 1 or 2, not 4, but it causes backpressure
from the replay tile parsing the manifest, remove when this is
fixed. */
fd_topob_link( topo, "snap_out", "snap_out", 4UL, 5UL*(1UL<<30UL), 1UL );

fd_topob_link( topo, "snap_zstd", "snap_zstd", 8192UL, 16384UL, 1UL );
fd_topob_link( topo, "snap_stream", "snap_stream", 2048UL, USHORT_MAX, 1UL );
fd_topob_link( topo, "snapdc_rd", "snapdc_rd", 128UL, 0UL, 1UL );
fd_topob_link( topo, "snapin_rd", "snapin_rd", 128UL, 0UL, 1UL );
fd_topob_link( topo, "snapdc_rd", "snapdc_rd", 128UL, 0UL, 1UL );
fd_topob_link( topo, "snapin_rd", "snapin_rd", 128UL, 0UL, 1UL );
FOR(hash_tile_cnt) fd_topob_link( topo, "snapin_hsh", "snapin_hsh", 128UL, sizeof(fd_snapshot_existing_account_t), 1UL );
FOR(hash_tile_cnt) fd_topob_link( topo, "snaphsh_out", "snaphsh_out", 128UL, 2048UL, 1UL );

fd_topob_tile_out( topo, "snaprd", 0UL, "snap_zstd", 0UL );
fd_topob_tile_in ( topo, "snapdc", 0UL, "metric_in", "snap_zstd", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
Expand All @@ -144,6 +162,12 @@ backtest_topo( config_t * config ) {
fd_topob_tile_in( topo, "snaprd", 0UL, "metric_in", "snapin_rd", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
fd_topob_tile_out( topo, "snapin", 0UL, "snapin_rd", 0UL );

FOR(hash_tile_cnt) fd_topob_tile_out( topo, "snapin", 0UL, "snapin_hsh", i );
FOR(hash_tile_cnt) fd_topob_tile_in( topo, "snapin", 0UL, "metric_in", "snaphsh_out", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );

FOR(hash_tile_cnt) fd_topob_tile_in( topo, "snaphs", i, "metric_in", "snapin_hsh", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
FOR(hash_tile_cnt) fd_topob_tile_out( topo, "snaphs", i, "snaphsh_out", i );

/**********************************************************************/
/* More backtest->replay links in topo */
/**********************************************************************/
Expand Down Expand Up @@ -312,15 +336,7 @@ backtest_topo( config_t * config ) {
FD_TEST( fd_pod_insertf_ulong( topo->props, busy_obj->id, "bank_busy.%lu", i ) );
}

/* Replay decoded manifest dcache topo obj */
fd_topo_obj_t * replay_manifest_dcache = fd_topob_obj( topo, "dcache", "replay_manif" );
fd_pod_insertf_ulong( topo->props, 2UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
fd_pod_insert_ulong( topo->props, "manifest_dcache", replay_manifest_dcache->id );

fd_topob_tile_uses( topo, snapin_tile, funk_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
fd_topob_tile_uses( topo, snapin_tile, replay_manifest_dcache, FD_SHMEM_JOIN_MODE_READ_WRITE );
fd_topob_tile_uses( topo, replay_tile, replay_manifest_dcache, FD_SHMEM_JOIN_MODE_READ_ONLY );

for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
fd_topo_tile_t * tile = &topo->tiles[ i ];
if( !fd_topo_configure_tile( tile, config ) ) {
Expand Down Expand Up @@ -398,9 +414,13 @@ backtest_cmd_fn( args_t * args FD_PARAM_UNUSED,
fd_topo_tile_t * snapdc_tile = &topo->tiles[ fd_topo_find_tile( topo, "snapdc", 0UL ) ];
fd_topo_tile_t * snapin_tile = &topo->tiles[ fd_topo_find_tile( topo, "snapin", 0UL ) ];

ulong snaphs_tile_idx = fd_topo_find_tile( topo, "snaphs", 0UL );
fd_topo_tile_t * snaphs_tile = snaphs_tile_idx!=ULONG_MAX ? &topo->tiles[ snaphs_tile_idx ] : NULL;

ulong volatile * const snaprd_metrics = fd_metrics_tile( snaprd_tile->metrics );
ulong volatile * const snapdc_metrics = fd_metrics_tile( snapdc_tile->metrics );
ulong volatile * const snapin_metrics = fd_metrics_tile( snapin_tile->metrics );
ulong volatile * const snaphs_metrics = snaphs_tile ? fd_metrics_tile( snaphs_tile->metrics ) : NULL;

ulong total_off_old = 0UL;
ulong snaprd_backp_old = 0UL;
Expand All @@ -409,6 +429,8 @@ backtest_cmd_fn( args_t * args FD_PARAM_UNUSED,
ulong snapdc_wait_old = 0UL;
ulong snapin_backp_old = 0UL;
ulong snapin_wait_old = 0UL;
ulong snaphs_backp_old = 0UL;
ulong snaphs_wait_old = 0UL;
ulong acc_cnt_old = 0UL;
sleep( 1 );
puts( "-------------backp=(snaprd,snapdc,snapin) busy=(snaprd,snapdc,snapin)---------------" );
Expand All @@ -417,8 +439,9 @@ backtest_cmd_fn( args_t * args FD_PARAM_UNUSED,
ulong snaprd_status = FD_VOLATILE_CONST( snaprd_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
ulong snapdc_status = FD_VOLATILE_CONST( snapdc_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
ulong snapin_status = FD_VOLATILE_CONST( snapin_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
ulong snaphs_status = snaphs_metrics ? FD_VOLATILE_CONST( snaphs_metrics[ MIDX( GAUGE, TILE, STATUS ) ] ) : 2UL;

if( FD_UNLIKELY( snaprd_status==2UL && snapdc_status==2UL && snapin_status == 2UL ) ) break;
if( FD_UNLIKELY( snaprd_status==2UL && snapdc_status==2UL && snapin_status == 2UL && snaphs_status==2UL ) ) break;

long cur = fd_log_wallclock();
if( FD_UNLIKELY( cur<next ) ) {
Expand All @@ -438,16 +461,21 @@ backtest_cmd_fn( args_t * args FD_PARAM_UNUSED,
ulong snapin_backp = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ];
ulong snapin_wait = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapin_backp;
ulong snaphs_backp = snaphs_metrics ? snaphs_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ] : 0UL;
ulong snaphs_wait = snaphs_metrics ? snaphs_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
snaphs_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snaphs_backp : 0UL;

ulong acc_cnt = snapin_metrics[ MIDX( GAUGE, SNAPIN, ACCOUNTS_INSERTED ) ];
printf( "bw=%4.0f MB/s backp=(%3.0f%%,%3.0f%%,%3.0f%%) busy=(%3.0f%%,%3.0f%%,%3.0f%%) acc=%3.1f M/s\n",
printf( "bw=%4.0f MB/s backp=(%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%) busy=(%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%) acc=%3.1f M/s\n",
(double)( total_off-total_off_old )/1e6,
( (double)( snaprd_backp-snaprd_backp_old )*ns_per_tick )/1e7,
( (double)( snapdc_backp-snapdc_backp_old )*ns_per_tick )/1e7,
( (double)( snapin_backp-snapin_backp_old )*ns_per_tick )/1e7,
( (double)( snaphs_backp-snaphs_backp_old )*ns_per_tick )/1e7,
100-( ( (double)( snaprd_wait-snaprd_wait_old )*ns_per_tick )/1e7 ),
100-( ( (double)( snapdc_wait-snapdc_wait_old )*ns_per_tick )/1e7 ),
100-( ( (double)( snapin_wait-snapin_wait_old )*ns_per_tick )/1e7 ),
100-( ( (double)( snaphs_wait-snaphs_wait_old )*ns_per_tick )/1e7 ),
(double)( acc_cnt-acc_cnt_old )/1e6 );
fflush( stdout );
total_off_old = total_off;
Expand All @@ -457,6 +485,8 @@ backtest_cmd_fn( args_t * args FD_PARAM_UNUSED,
snapdc_wait_old = snapdc_wait;
snapin_backp_old = snapin_backp;
snapin_wait_old = snapin_wait;
snaphs_backp_old = snaphs_backp;
snaphs_wait_old = snaphs_wait;
acc_cnt_old = acc_cnt;

next+=1000L*1000L*1000L;
Expand Down
43 changes: 39 additions & 4 deletions src/app/firedancer-dev/commands/snapshot_load.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "../../../disco/metrics/fd_metrics.h"
#include "../../../disco/topo/fd_topob.h"
#include "../../../util/tile/fd_tile_private.h"
#include "../../../discof/restore/utils/fd_ssctrl.h"
#include "../../../discof/restore/utils/fd_ssmsg.h"

#include <sys/resource.h>
Expand All @@ -26,6 +27,7 @@ snapshot_load_topo( config_t * config,
fd_topo_t * topo = &config->topo;
fd_topob_new( &config->topo, config->name );
topo->max_page_size = fd_cstr_to_shmem_page_sz( config->hugetlbfs.max_page_size );
ulong hash_tile_cnt = config->firedancer.layout.hash_tile_count;

fd_topob_wksp( topo, "funk" );
fd_topo_obj_t * funk_obj = setup_topo_funk( topo, "funk",
Expand All @@ -37,7 +39,7 @@ snapshot_load_topo( config_t * config,
static ushort tile_to_cpu[ FD_TILE_MAX ] = {0};
if( args->snapshot_load.tile_cpus[0] ) {
ulong cpu_cnt = fd_tile_private_cpus_parse( args->snapshot_load.tile_cpus, tile_to_cpu );
if( FD_UNLIKELY( cpu_cnt<4UL ) ) FD_LOG_ERR(( "--tile-cpus specifies %lu CPUs, but need at least 4", cpu_cnt ));
if( FD_UNLIKELY( cpu_cnt<4UL + hash_tile_cnt ) ) FD_LOG_ERR(( "--tile-cpus specifies %lu CPUs, but need at least %lu", cpu_cnt, 4UL + hash_tile_cnt ));
}

/* metrics tile *****************************************************/
Expand Down Expand Up @@ -77,6 +79,20 @@ snapshot_load_topo( config_t * config,
fd_topo_tile_t * snapin_tile = fd_topob_tile( topo, "snapin", "snapin", "snapin", tile_to_cpu[3], 0, 0 );
snapin_tile->allow_shutdown = 1;

if( FD_LIKELY( hash_tile_cnt ) ) {
fd_topob_wksp( topo, "snaphs" );
fd_topob_wksp( topo, "snapin_hsh" );
fd_topob_wksp( topo, "snaphsh_out" );
}
#define FOR(cnt) for( ulong i=0UL; i<cnt; i++ )
for( ulong i=0UL; i<hash_tile_cnt; i++ ) {
fd_topo_tile_t * snaphsh_tile = fd_topob_tile( topo, "snaphs", "snaphs", "metric_in", tile_to_cpu[4 + i], 0, 0 );
snaphsh_tile->allow_shutdown = 1;
}

FOR(hash_tile_cnt) fd_topob_link( topo, "snapin_hsh", "snapin_hsh", 128UL, sizeof(fd_snapshot_existing_account_t), 1UL );
FOR(hash_tile_cnt) fd_topob_link( topo, "snaphsh_out", "snaphsh_out", 128UL, 2048UL, 1UL );

/* uncompressed stream -> snapin tile */
fd_topob_tile_in ( topo, "snapin", 0UL, "metric_in", "snap_stream", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );

Expand All @@ -100,6 +116,11 @@ snapshot_load_topo( config_t * config,
fd_topob_tile_in( topo, "snaprd", 0UL, "metric_in", "snapin_rd", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
fd_topob_tile_out( topo, "snapin", 0UL, "snapin_rd", 0UL );

FOR(hash_tile_cnt) fd_topob_tile_out( topo, "snapin", 0UL, "snapin_hsh", i );
FOR(hash_tile_cnt) fd_topob_tile_in( topo, "snapin", 0UL, "metric_in", "snaphsh_out", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
FOR(hash_tile_cnt) fd_topob_tile_in( topo, "snaphs", i, "metric_in", "snapin_hsh", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
FOR(hash_tile_cnt) fd_topob_tile_out( topo, "snaphs", i, "snaphsh_out", i );

for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
fd_topo_tile_t * tile = &topo->tiles[ i ];
if( !fd_topo_configure_tile( tile, config ) ) {
Expand Down Expand Up @@ -158,9 +179,13 @@ snapshot_load_cmd_fn( args_t * args,
fd_topo_tile_t * snapdc_tile = &topo->tiles[ fd_topo_find_tile( topo, "snapdc", 0UL ) ];
fd_topo_tile_t * snapin_tile = &topo->tiles[ fd_topo_find_tile( topo, "snapin", 0UL ) ];

ulong snaphs_tile_idx = fd_topo_find_tile( topo, "snaphs", 0UL );
fd_topo_tile_t * snaphs_tile = snaphs_tile_idx!=ULONG_MAX ? &topo->tiles[ snaphs_tile_idx ] : NULL;

ulong volatile * const snaprd_metrics = fd_metrics_tile( snaprd_tile->metrics );
ulong volatile * const snapdc_metrics = fd_metrics_tile( snapdc_tile->metrics );
ulong volatile * const snapin_metrics = fd_metrics_tile( snapin_tile->metrics );
ulong volatile * const snaphs_metrics = snaphs_tile ? fd_metrics_tile( snaphs_tile->metrics ) : NULL;

ulong total_off_old = 0UL;
ulong snaprd_backp_old = 0UL;
Expand All @@ -169,6 +194,8 @@ snapshot_load_cmd_fn( args_t * args,
ulong snapdc_wait_old = 0UL;
ulong snapin_backp_old = 0UL;
ulong snapin_wait_old = 0UL;
ulong snaphs_backp_old = 0UL;
ulong snaphs_wait_old = 0UL;
ulong acc_cnt_old = 0UL;
sleep( 1 );
puts( "" );
Expand All @@ -178,14 +205,15 @@ snapshot_load_cmd_fn( args_t * args,
puts( "- stall: Waiting on upstream tile" );
puts( "- acc: Number of accounts" );
puts( "" );
puts( "-------------backp=(snaprd,snapdc,snapin) busy=(snaprd,snapdc,snapin)---------------" );
puts( "-------------backp=(snaprd,snapdc,snapin,snaphs) busy=(snaprd,snapdc,snapin,snaphs)---------------" );
long next = start+1000L*1000L*1000L;
for(;;) {
ulong snaprd_status = FD_VOLATILE_CONST( snaprd_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
ulong snapdc_status = FD_VOLATILE_CONST( snapdc_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
ulong snapin_status = FD_VOLATILE_CONST( snapin_metrics[ MIDX( GAUGE, TILE, STATUS ) ] );
ulong snaphs_status = snaphs_metrics ? FD_VOLATILE_CONST( snaphs_metrics[ MIDX( GAUGE, TILE, STATUS ) ] ) : 2UL;

if( FD_UNLIKELY( snaprd_status==2UL && snapdc_status==2UL && snapin_status == 2UL ) ) break;
if( FD_UNLIKELY( snaprd_status==2UL && snapdc_status==2UL && snapin_status == 2UL && snaphs_status==2UL ) ) break;

long cur = fd_log_wallclock();
if( FD_UNLIKELY( cur<next ) ) {
Expand All @@ -205,16 +233,21 @@ snapshot_load_cmd_fn( args_t * args,
ulong snapin_backp = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ];
ulong snapin_wait = snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
snapin_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snapin_backp;
ulong snaphs_backp = snaphs_metrics ? snaphs_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_BACKPRESSURE_PREFRAG ) ] : 0UL;
ulong snaphs_wait = snaphs_metrics ? snaphs_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_PREFRAG ) ] +
snaphs_metrics[ MIDX( COUNTER, TILE, REGIME_DURATION_NANOS_CAUGHT_UP_POSTFRAG ) ] + snaphs_backp : 0UL;

ulong acc_cnt = snapin_metrics[ MIDX( GAUGE, SNAPIN, ACCOUNTS_INSERTED ) ];
printf( "bw=%4.0f MB/s backp=(%3.0f%%,%3.0f%%,%3.0f%%) busy=(%3.0f%%,%3.0f%%,%3.0f%%) acc=%3.1f M/s\n",
printf( "bw=%4.0f MB/s backp=(%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%) busy=(%3.0f%%,%3.0f%%,%3.0f%%,%3.0f%%) acc=%3.1f M/s\n",
(double)( total_off-total_off_old )/1e6,
( (double)( snaprd_backp-snaprd_backp_old )*ns_per_tick )/1e7,
( (double)( snapdc_backp-snapdc_backp_old )*ns_per_tick )/1e7,
( (double)( snapin_backp-snapin_backp_old )*ns_per_tick )/1e7,
( (double)( snaphs_backp-snaphs_backp_old )*ns_per_tick )/1e7,
100-( ( (double)( snaprd_wait-snaprd_wait_old )*ns_per_tick )/1e7 ),
100-( ( (double)( snapdc_wait-snapdc_wait_old )*ns_per_tick )/1e7 ),
100-( ( (double)( snapin_wait-snapin_wait_old )*ns_per_tick )/1e7 ),
100-( ( (double)( snaphs_wait-snaphs_wait_old )*ns_per_tick )/1e7 ),
(double)( acc_cnt-acc_cnt_old )/1e6 );
fflush( stdout );
total_off_old = total_off;
Expand All @@ -224,6 +257,8 @@ snapshot_load_cmd_fn( args_t * args,
snapdc_wait_old = snapdc_wait;
snapin_backp_old = snapin_backp;
snapin_wait_old = snapin_wait;
snaphs_backp_old = snaphs_backp;
snaphs_wait_old = snaphs_wait;
acc_cnt_old = acc_cnt;

next+=1000L*1000L*1000L;
Expand Down
2 changes: 2 additions & 0 deletions src/app/firedancer-dev/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ extern fd_topo_run_tile_t fd_tile_shredcap;
extern fd_topo_run_tile_t fd_tile_snaprd;
extern fd_topo_run_tile_t fd_tile_snapdc;
extern fd_topo_run_tile_t fd_tile_snapin;
extern fd_topo_run_tile_t fd_tile_snaphs;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also add to main.c in firedancer


fd_topo_run_tile_t * TILES[] = {
&fd_tile_net,
Expand Down Expand Up @@ -150,6 +151,7 @@ fd_topo_run_tile_t * TILES[] = {
&fd_tile_snaprd,
&fd_tile_snapdc,
&fd_tile_snapin,
&fd_tile_snaphs,
&fd_tile_ipecho,
NULL,
};
Expand Down
8 changes: 8 additions & 0 deletions src/app/firedancer/config/default.toml
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,14 @@ user = ""
# requests.
sign_tile_count = 2

# How many snapshot hash tiles to run. The snapshot hash tiles are
Copy link
Contributor

@mmcgee-jump mmcgee-jump Aug 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few things,

  • hash_tile_count is very vague and could refer anything. Be more specific, in this case the convention is <tilename>_tile_count so snaplt_tile_count ?
  • Allowing someone to turn this off is probably not desirable, and we should just make this config production ready now. I'd suggest something like,
Suggested change
# How many snapshot hash tiles to run. The snapshot hash tiles are
[layout]
# How many snapshot lthash tiles to run. Snapshot hash tiles verify
# the contents of accounts in the loaded snapshot, in parallel. Too
# few and snapshot loading might be delayed. Once the hash of the
# accounts in a snapshot is calculated, if it does not match the
# validator will abort with an error.
snaplt_tile_count = 1
[development]
[snapshots]
# Set to true to disable verification of the lthash in the
# loaded snapshot. This is not safe or supported in production
# and should only be used for development and testing purposes.
#
# If lthash verification is disabled, the validator will not
# start any lthash tiles and the value of `snaplt_tile_count`
# in the layout will be ignored.
disable_lthash_verification = false

Once you have this, you can:

  • Check that lthash_tile_count > 0
  • Check that the development option is not enabled when running against a production cluster (same as some other options, e.g. we check this for --no-sandbox).

# responsible for verifying the hash of all accounts in the loaded
# snapshot via lthash (lattice hashing). Currently, set to 0 by
# default because it is too slow to run in the full client. TODO:
# enable snapshot hash tiles in the full client and update this
# comment.
hash_tile_count = 0
Comment on lines +701 to +707
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make it clear that if the hash tile count is 0, no hashing will be done? Wondering if we should instead have a snapshots.verify flag instead to enable/disable the hashing, instead of hash_tile_count = 0 => no hashing.


# All memory that will be used in Firedancer is pre-allocated in two
# kinds of pages: huge and gigantic. Huge pages are 2 MiB and gigantic
# pages are 1 GiB. This is done to prevent TLB misses which can have a
Expand Down
Loading
Loading