From 48715e4595ff7828d9f581b5b208091bd2a9ede4 Mon Sep 17 00:00:00 2001 From: jherrera-jump Date: Mon, 16 Mar 2026 20:33:15 +0000 Subject: [PATCH] gui: add wait for supermajority --- book/api/websocket.md | 141 ++++++++++++++++++++++- src/app/firedancer/topology.c | 3 + src/disco/gui/fd_gui.c | 61 +++++++++- src/disco/gui/fd_gui.h | 22 +++- src/disco/gui/fd_gui_peers.c | 209 +++++++++++++++++++++++++++++++++- src/disco/gui/fd_gui_peers.h | 47 ++++++++ src/disco/gui/fd_gui_printf.c | 123 ++++++++++++++++++-- src/disco/gui/fd_gui_printf.h | 13 +++ src/disco/gui/fd_gui_tile.c | 20 +++- src/disco/topo/fd_topo.h | 3 + 10 files changed, 614 insertions(+), 28 deletions(-) diff --git a/book/api/websocket.md b/book/api/websocket.md index 7f36d012cbf..ce2d8a91332 100644 --- a/book/api/websocket.md +++ b/book/api/websocket.md @@ -523,7 +523,7 @@ Some interesting transitions are, "topic": "summary", "key": "boot_progress", "value": { - "phase": "loading_full_snapshot", + "phase": "waiting_for_supermajority", "joining_gossip_elapsed_seconds": 5, "loading_full_snapshot_elapsed_seconds": 7.8, "loading_full_snapshot_reset_count": 0, @@ -545,7 +545,14 @@ Some interesting transitions are, "loading_incremental_snapshot_decompress_bytes_compressed": null, "loading_incremental_snapshot_insert_bytes_decompressed": null, "loading_incremental_snapshot_insert_accounts": null, - "catching_up_elapsed": null, + "wait_for_supermajority_bank_hash": "2CeCyRoYmcctDmbXWrSUfTT4aQkGVCnArAmbdmQ5dGFi", + "wait_for_supermajority_shred_version": "37500", + "wait_for_supermajority_attempt": 1, + "wait_for_supermajority_total_stake": "1", + "wait_for_supermajority_connected_stake": "1", + "wait_for_supermajority_total_peers": 1, + "wait_for_supermajority_connected_peers": 1, + "catching_up_elapsed_seconds": null, "catching_up_first_replay_slot": null, } } @@ -556,7 +563,7 @@ Some interesting transitions are, **`BootProgress`** | Field | Type | Description | |-----------------------------------------------------------------------|-----------------|-------------| -| phase | `string` | One of `joining_gossip`, `loading_full_snapshot`, `loading_incremental_snapshot`, `catching_up`, or `running`. This indicates the current phase of the boot process | +| phase | `string` | One of `joining_gossip`, `loading_full_snapshot`, `loading_incremental_snapshot`, `catching_up`, `waiting_for_supermajority`, or `running`. This indicates the current phase of the boot process | | joining_gossip_elapsed_seconds | `number` | If the phase is `joining_gossip`, this is the duration, in seconds, spent joining the gossip network | | loading_{full\|incremental}_snapshot_elapsed_seconds | `number` | If the phase is at least `loading_{full\|incremental}_snapshot`, this is the elapsed time, in seconds, spent reading (either downloading or reading from disk) the snapshot since the last reset | | loading_{full\|incremental}_snapshot_reset_count | `number\|null` | If the phase is at least `loading_{full\|incremental}_snapshot` or later, this is the number of times the load for the snapshot failed and the phase was restarted from scratch. A snapshot load may fail due to an unreliable or underperforming network connection. Otherwise, `null` | @@ -568,10 +575,31 @@ Some interesting transitions are, | loading_{full\|incremental}_snapshot_decompress_bytes_compressed | `number\|null` | If the phase is at least `loading_{full\|incremental}_snapshot`, this is the (compressed) number of bytes processed by decompress from the snapshot so far. Otherwise, `null` | | loading_{full\|incremental}_snapshot_insert_bytes_decompressed | `number\|null` | If the phase is at least `loading_{full\|incremental}_snapshot`, this is the (decompressed) number of bytes processed from the snapshot by the snapshot insert time so far. Otherwise, `null` | | loading_{full\|incremental}_snapshot_insert_accounts | `number\|null` | If the phase is at least `loading_{full\|incremental}_snapshot`, this is the current number of inserted accounts from the snapshot into the validator's accounts database. Otherwise, `null` | +| wait_for_supermajority_bank_hash | `string\|null` | If the client was configured to include the `waiting_for_supermajority` phase at startup, this is the expected bank hash of the snapshot bank. This ensures all validators join the cluster with the same starting state. `null` if wait for supermajority is not enabled | +| wait_for_supermajority_shred_version | `string\|null` | If the client was configured to include the `waiting_for_supermajority` phase at startup, this is the expected shred version it was configured with. Shred version is functionally a hash of (genesis_hash, cluster_restart_history) which ensures only nodes which explicitly agree on the restart slot and restart attempt count can communicate with each other. `null` if wait for supermajority is not configured | +| wait_for_supermajority_attempt | `number\|null` | If the client was configured to include the `waiting_for_supermajority` phase at startup, this is the number of times this cluster has been restarted onto the snapshot slot, including the current attempt. `null` if wait for supermajority is not configured | +| wait_for_supermajority_total_stake | `string\|null` | If the phase is at least `waiting_for_supermajority`, this is the total network stake in lamports used to determine the 80% restart threshold | +| wait_for_supermajority_connected_stake | `string\|null` | If the phase is at least `waiting_for_supermajority`, this is the network stake in lamports that is currently active on gossip and waiting for the restart threshold | +| wait_for_supermajority_total_peers | `number\|null` | If the phase is at least `waiting_for_supermajority`, this is the total number of peers with an active stake | +| wait_for_supermajority_connected_peers | `number\|null` | If the phase is at least `waiting_for_supermajority`, this is the number of peers with an active stake currently active on gossip and waiting for the restart threshold | | catching_up_elapsed_seconds | `number` | If the phase is `catching_up`, this is the duration, in seconds, the validator has spent catching up to the current slot | | catching_up_first_replay_slot | `number` | If the phase is `catching_up`, this is the first slot that exited the replay pipeline after booting | +The `wait_for_supermajority_*` fields will be `null` if the +client is not configured to wait for a cluster restart, which is the +case for typical client usage. + +The `wait_for_supermajority_*_stake` stake fields are derived +differently from the `gossip.network_stats.health` activated stake +(which is from the start of the epoch). These fields account for any +stake that is activating/deactivating in the current epoch and any stake +that was explicitly undelegated prior to restart (e.g. inactive testnet +participants or bad actors). + +During the `waiting_for_supermajority` phase, per-peer offline status +is available via the `wait_for_supermajority.peer_{add|remove}` message. + #### `summary.schedule_strategy` | frequency | type | example | |------------|----------|---------| @@ -1748,7 +1776,8 @@ identity is no longer in these three data sources, it will be removed. }, "vote": [ { - "vote_pubkey": "8ri9HeWZv4Dcf4BD46pVPjmefzJLpbtfdAtyxyeG4enL", + "vote_account": "8ri9HeWZv4Dcf4BD46pVPjmefzJLpbtfdAtyxyeG4enL", + "prev_stake": "0", "activated_stake": "5812", "last_vote": 281795801, "root_slot": 281795770, @@ -1789,8 +1818,9 @@ identity is no longer in these three data sources, it will be removed. **`PeerUpdateVoteAccount`** | Field | Type | Description | |-----------------|----------------|-------------| -| vote_pubkey | `string` | The public key of vote account, encoded in base58 | +| vote_account | `string` | The public key of vote account, encoded in base58 | | activated_stake | `string` | The amount of stake in lamports that is activated on this vote account for the current epoch. Warming up or cooling down stake that was delegating during this epoch is not included | +| prev_stake | `string\|null` | The amount of stake in lamports that is activated on this vote account at the start of the previous epoch. Will be `null` on Frankendancer (unsupported) and when no previous epoch exists (e.g. genesis or first epoch) | | last_vote | `number\|null` | The last vote by the vote account that was landed on chain, as seen by this validator. If the vote account has not yet landed any votes on the chain this will be `null` | | root_slot | `number\|null` | The last slot that was rooted by the vote account, based on the vote history. If the vote account has not yet rooted any slots this will be `null` | | epoch_credits | `number` | The number of credits earned by the vote account during the current epoch | @@ -1831,6 +1861,107 @@ full and includes this node itself, nodes with a different `shred_version`, nodes publishing corrupt or bad information, and so on. +### wait_for_supermajority +Messages published during the wait-for-supermajority phase. These +messages are only published if the client is configured to boot with +the `waiting_for_supermajority` phase enabled. + +#### `wait_for_supermajority.stakes` +| frequency | type | example | +|-----------|-------------------------------|---------| +| *Once* | `WaitForSupermajorityEpoch` | below | + +Sent once per connection, after the snapshot is fully loaded and +validator info has been parsed from the ConfigProgram accounts in +the snapshot. + +::: details Example + +```json +{ + "topic": "wait_for_supermajority", + "key": "stakes", + "value": { + "staked_pubkeys": [ + "Fe4StcZSQ228dKK2hni7aCP7ZprNhj8QKWzFe5usGFYF", + "2CeCyRoYQcctDmbXWrSUfTT4aQkGVCnArAmbdmQ5QGFi", + "6JPDr4URdEDP5MqPgmDT6jk2nToyMUzNU27qsGxrRgKz" + ], + "staked_lamports": [ + "360", + "240", + "180" + ], + "infos": [ + null, + null, + { + "name": "Validator", + "details": "", + "website": "", + "icon_url": "", + "keybase_username": "" + } + ] + } +} +``` + +::: + +**`WaitForSupermajorityEpoch`** +| Field | Type | Description | +|-----------------|----------------------------|-------------| +| staked_pubkeys | `string[]` | Identity pubkeys of all staked validators in the snapshot epoch, base58 encoded | +| staked_lamports | `string[]` | A list with the same length as `staked_pubkeys`. `staked_lamports[ i ]` is the number of lamports staked on `staked_pubkeys[ i ]` | +| infos | `(PeerUpdateInfo\|null)[]` | A list with the same length as `staked_pubkeys`. Each element is a `PeerUpdateInfo` object if the validator has published self-reported info via ConfigProgram in the snapshot, or `null` otherwise | + +#### `wait_for_supermajority.peer_add` +| frequency | type | example | +|-----------------|------------|---------| +| *Once* + *Live* | `string[]` | below | + +::: details Example + +```json +{ + "topic": "wait_for_supermajority", + "key": "peer_add", + "value": [ + "Fe4StcZSQ228dKK2hni7aCP7ZprNhj8QKWzFe5usGFYF", + "2CeCyRoYQcctDmbXWrSUfTT4aQkGVCnArAmbdmQ5QGFi" + ] +} +``` + +::: + +Value is a flat array of base58-encoded identity pubkeys that have come +online since the last message (or all currently-online peers on initial +connect). + +#### `wait_for_supermajority.peer_remove` +| frequency | type | example | +|-----------|------------|---------| +| *Live* | `string[]` | below | + +::: details Example + +```json +{ + "topic": "wait_for_supermajority", + "key": "peer_remove", + "value": [ + "9aE6Bp1hbDpMFKqnWGUMbfxfMPXswPbkNwNrSjhpFiSN" + ] +} +``` + +::: + +Value is a flat array of base58-encoded identity pubkeys that have gone +offline (activity timeout expired) since the last message. + ### slot Slots are opportunities for a leader to produce a block. A slot can be in one of five levels, and in typical operation a slot moves through diff --git a/src/app/firedancer/topology.c b/src/app/firedancer/topology.c index 7e49fc80526..3ccd1004357 100644 --- a/src/app/firedancer/topology.c +++ b/src/app/firedancer/topology.c @@ -1312,6 +1312,7 @@ fd_topo_initialize( config_t * config ) { if( FD_LIKELY( snapshots_enabled ) ) { /**/ fd_topob_tile_in( topo, "gui", 0UL, "metric_in", "snapct_gui", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); /**/ fd_topob_tile_in( topo, "gui", 0UL, "metric_in", "snapin_gui", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); + /**/ fd_topob_tile_in( topo, "gui", 0UL, "metric_in", "snapin_manif", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); } if( FD_UNLIKELY( config->tiles.bundle.enabled ) ) { /**/ fd_topob_tile_in( topo, "gui", 0UL, "metric_in", "bundle_status", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED ); @@ -1820,6 +1821,8 @@ fd_topo_configure_tile( fd_topo_tile_t * tile, tile->gui.schedule_strategy = config->tiles.pack.schedule_strategy_enum; tile->gui.websocket_compression = 1; tile->gui.frontend_release_channel = config->development.gui.frontend_release_channel_enum; + fd_cstr_ncpy( tile->gui.wfs_bank_hash, config->firedancer.consensus.wait_for_supermajority_with_bank_hash, sizeof(tile->gui.wfs_bank_hash) ); + tile->gui.expected_shred_version = config->consensus.expected_shred_version; } else if( FD_UNLIKELY( !strcmp( tile->name, "rpc" ) ) ) { diff --git a/src/disco/gui/fd_gui.c b/src/disco/gui/fd_gui.c index 6eb8a956510..3026ed4845e 100644 --- a/src/disco/gui/fd_gui.c +++ b/src/disco/gui/fd_gui.c @@ -3,6 +3,7 @@ #include "fd_gui_metrics.h" #include "../metrics/fd_metrics.h" +#include "../../discof/gossip/fd_gossip_tile.h" #include "../plugin/fd_plugin.h" #include "../../ballet/base58/fd_base58.h" @@ -41,6 +42,8 @@ fd_gui_new( void * shmem, int snapshots_enabled, int is_voting, int schedule_strategy, + char const * wfs_expected_bank_hash_cstr, + ushort expected_shred_version, fd_topo_t * topo, long now ) { @@ -101,18 +104,29 @@ fd_gui_new( void * shmem, gui->summary.version = version; gui->summary.cluster = cluster; gui->summary.startup_time_nanos = gui->next_sample_400millis; + gui->summary.expected_shred_version = expected_shred_version; + gui->summary.wfs_enabled = 0; + gui->summary.wfs_bank_hash[ 0UL ] = '\0'; if( FD_UNLIKELY( is_full_client ) ) { + fd_cstr_ncpy( gui->summary.wfs_bank_hash, wfs_expected_bank_hash_cstr, sizeof(gui->summary.wfs_bank_hash) ); + gui->summary.wfs_enabled = !!strcmp( wfs_expected_bank_hash_cstr, "" ); + if( FD_UNLIKELY( snapshots_enabled ) ) { gui->summary.boot_progress.phase = FD_GUI_BOOT_PROGRESS_TYPE_JOINING_GOSSIP; gui->summary.boot_progress.joining_gossip_time_nanos = gui->next_sample_400millis; + memset( gui->summary.boot_progress.loading_snapshot, 0, sizeof(gui->summary.boot_progress.loading_snapshot) ); for( ulong i=0UL; isummary.boot_progress.loading_snapshot[ i ].reset_cnt = ULONG_MAX; /* ensures other fields are reset initially */ - gui->summary.boot_progress.loading_snapshot[ i ].read_path[ 0 ] = '\0'; - gui->summary.boot_progress.loading_snapshot[ i ].insert_path[ 0 ] = '\0'; + gui->summary.boot_progress.loading_snapshot[ i ].slot = ULONG_MAX; } gui->summary.boot_progress.catching_up_time_nanos = 0L; gui->summary.boot_progress.catching_up_first_replay_slot = ULONG_MAX; + gui->summary.boot_progress.wfs_total_stake = 0UL; + gui->summary.boot_progress.wfs_connected_stake = 0UL; + gui->summary.boot_progress.wfs_total_peers = 0UL; + gui->summary.boot_progress.wfs_connected_peers = 0UL; + gui->summary.boot_progress.wfs_attempt = 0UL; } else { fd_memset( &gui->summary.boot_progress, 0, sizeof(gui->summary.boot_progress) ); gui->summary.boot_progress.phase = FD_GUI_BOOT_PROGRESS_TYPE_RUNNING; @@ -787,13 +801,32 @@ fd_gui_run_boot_progress( fd_gui_t * gui, long now ) { fd_topo_tile_t const * snapin = &gui->topo->tiles[ fd_topo_find_tile( gui->topo, "snapin", 0UL ) ]; volatile ulong * snapin_metrics = fd_metrics_tile( snapin->metrics ); + fd_topo_tile_t const * gossip = &gui->topo->tiles[ fd_topo_find_tile( gui->topo, "gossip", 0UL ) ]; + volatile ulong * gossip_metrics = fd_metrics_tile( gossip->metrics ); + ulong snapshot_phase = snapct_metrics[ MIDX( GAUGE, SNAPCT, STATE ) ]; + ulong wfs_state = gossip_metrics[ MIDX( GAUGE, GOSSIP, WFS_STATE ) ]; /* state transitions */ if( FD_UNLIKELY( gui->summary.slot_caught_up!=ULONG_MAX ) ) { gui->summary.boot_progress.phase = FD_GUI_BOOT_PROGRESS_TYPE_RUNNING; - } else if( FD_LIKELY( snapshot_phase == FD_SNAPCT_STATE_SHUTDOWN && gui->summary.slots_max_turbine[ 0 ].slot!=ULONG_MAX && gui->summary.slot_completed!=ULONG_MAX ) ) { - gui->summary.boot_progress.phase = FD_GUI_BOOT_PROGRESS_TYPE_CATCHING_UP; + } else if( FD_LIKELY( snapshot_phase == FD_SNAPCT_STATE_SHUTDOWN && wfs_state==FD_GOSSIP_WFS_STATE_DONE && gui->summary.slots_max_turbine[ 0 ].slot!=ULONG_MAX && gui->summary.slot_completed!=ULONG_MAX ) ) { + if( FD_UNLIKELY( gui->summary.wfs_enabled ) ) { + if( FD_UNLIKELY( gui->summary.slot_caught_up==ULONG_MAX ) ) { + ulong snap_inc = gui->summary.boot_progress.loading_snapshot[ FD_GUI_BOOT_PROGRESS_INCREMENTAL_SNAPSHOT_IDX ].slot; + ulong snap_full = gui->summary.boot_progress.loading_snapshot[ FD_GUI_BOOT_PROGRESS_FULL_SNAPSHOT_IDX ].slot; + gui->summary.slot_caught_up = fd_ulong_if( snap_inc!=ULONG_MAX, snap_inc, snap_full ); + gui->summary.boot_progress.catching_up_time_nanos = now; + + fd_gui_printf_slot_caught_up( gui ); + fd_http_server_ws_broadcast( gui->http ); + } + gui->summary.boot_progress.phase = FD_GUI_BOOT_PROGRESS_TYPE_RUNNING; + } else { + gui->summary.boot_progress.phase = FD_GUI_BOOT_PROGRESS_TYPE_CATCHING_UP; + } + } else if( FD_UNLIKELY( snapshot_phase == FD_SNAPCT_STATE_SHUTDOWN && wfs_state==FD_GOSSIP_WFS_STATE_WAIT ) ) { + gui->summary.boot_progress.phase = FD_GUI_BOOT_PROGRESS_TYPE_WAITING_FOR_SUPERMAJORITY; } else if( FD_LIKELY( snapshot_phase==FD_SNAPCT_STATE_READING_FULL_FILE || snapshot_phase==FD_SNAPCT_STATE_FLUSHING_FULL_FILE_FINI || snapshot_phase==FD_SNAPCT_STATE_FLUSHING_FULL_FILE_DONE @@ -860,6 +893,13 @@ fd_gui_run_boot_progress( fd_gui_t * gui, long now ) { break; } + case FD_GUI_BOOT_PROGRESS_TYPE_WAITING_FOR_SUPERMAJORITY: { + gui->summary.boot_progress.wfs_total_stake = gossip_metrics[ MIDX( GAUGE, GOSSIP, WFS_STAKE_TOTAL ) ]; + gui->summary.boot_progress.wfs_connected_stake = gossip_metrics[ MIDX( GAUGE, GOSSIP, WFS_STAKE_ONLINE ) ]; + gui->summary.boot_progress.wfs_total_peers = gossip_metrics[ MIDX( GAUGE, GOSSIP, WFS_STAKED_PEERS_TOTAL ) ]; + gui->summary.boot_progress.wfs_connected_peers = gossip_metrics[ MIDX( GAUGE, GOSSIP, WFS_STAKED_PEERS_ONLINE ) ]; + break; + } case FD_GUI_BOOT_PROGRESS_TYPE_CATCHING_UP: { gui->summary.boot_progress.catching_up_time_nanos = now; break; @@ -2473,6 +2513,19 @@ fd_gui_handle_snapshot_update( fd_gui_t * gui, fd_cstr_printf_check( gui->summary.boot_progress.loading_snapshot[ snapshot_idx ].read_path, sizeof(gui->summary.boot_progress.loading_snapshot[ snapshot_idx ].read_path), NULL, "%s", msg->read_path ); } +void +fd_gui_stage_snapshot_manifest( fd_gui_t * gui, + fd_snapshot_manifest_t const * manifest ) { + ulong attempt = 0UL; + for( ulong i=0UL; ihard_forks_len; i++ ) { + if( FD_UNLIKELY( manifest->hard_forks[ i ]==manifest->slot ) ) { + attempt = manifest->hard_forks_cnts[ i ]; + break; + } + } + gui->summary.boot_progress.wfs_attempt = attempt; +} + static void fd_gui_handle_reset_slot( fd_gui_t * gui, ulong reset_slot, long now ) { FD_TEST( reset_slot!=ULONG_MAX ); diff --git a/src/disco/gui/fd_gui.h b/src/disco/gui/fd_gui.h index 12b020e63d7..994e2726146 100644 --- a/src/disco/gui/fd_gui.h +++ b/src/disco/gui/fd_gui.h @@ -10,6 +10,7 @@ #include "../../disco/fd_txn_p.h" #include "../../disco/bundle/fd_bundle_tile.h" #include "../../discof/restore/fd_snapct_tile.h" +#include "../../discof/restore/utils/fd_ssmsg.h" #include "../../discof/tower/fd_tower_tile.h" #include "../../discof/replay/fd_replay_tile.h" #include "../../choreo/tower/fd_tower.h" @@ -108,8 +109,9 @@ struct fd_gui_validator_info { #define FD_GUI_BOOT_PROGRESS_TYPE_JOINING_GOSSIP (1) #define FD_GUI_BOOT_PROGRESS_TYPE_LOADING_FULL_SNAPSHOT (2) #define FD_GUI_BOOT_PROGRESS_TYPE_LOADING_INCREMENTAL_SNAPSHOT (3) -#define FD_GUI_BOOT_PROGRESS_TYPE_CATCHING_UP (4) -#define FD_GUI_BOOT_PROGRESS_TYPE_RUNNING (5) +#define FD_GUI_BOOT_PROGRESS_TYPE_WAITING_FOR_SUPERMAJORITY (4) +#define FD_GUI_BOOT_PROGRESS_TYPE_CATCHING_UP (5) +#define FD_GUI_BOOT_PROGRESS_TYPE_RUNNING (6) #define FD_GUI_BOOT_PROGRESS_FULL_SNAPSHOT_IDX (0UL) #define FD_GUI_BOOT_PROGRESS_INCREMENTAL_SNAPSHOT_IDX (1UL) @@ -559,6 +561,10 @@ struct fd_gui { char const * version; char const * cluster; + char wfs_bank_hash[ FD_BASE58_ENCODED_32_SZ ]; + ushort expected_shred_version; + int wfs_enabled; + ulong vote_distance; int vote_state; @@ -616,6 +622,12 @@ struct fd_gui { ulong insert_accounts_current; } loading_snapshot[ FD_GUI_BOOT_PROGRESS_SNAPSHOT_CNT ]; + ulong wfs_total_stake; + ulong wfs_connected_stake; + ulong wfs_total_peers; + ulong wfs_connected_peers; + ulong wfs_attempt; + long catching_up_time_nanos; ulong catching_up_first_replay_slot; } boot_progress; @@ -794,6 +806,8 @@ fd_gui_new( void * shmem, int snapshots_enabled, int is_voting, int schedule_strategy, + char const * wfs_expected_bank_hash_cstr, + ushort expected_shred_version, fd_topo_t * topo, long now ); @@ -897,6 +911,10 @@ void fd_gui_handle_snapshot_update( fd_gui_t * gui, fd_snapct_update_t const * msg ); +void +fd_gui_stage_snapshot_manifest( fd_gui_t * gui, + fd_snapshot_manifest_t const * manifest ); + void fd_gui_handle_leader_schedule( fd_gui_t * gui, fd_stake_weight_msg_t const * leader_schedule, diff --git a/src/disco/gui/fd_gui_peers.c b/src/disco/gui/fd_gui_peers.c index 7ea4415793d..09f45e57e4e 100644 --- a/src/disco/gui/fd_gui_peers.c +++ b/src/disco/gui/fd_gui_peers.c @@ -4,11 +4,14 @@ #include "fd_gui_metrics.h" #include "../../disco/metrics/fd_metrics_base.h" +#include "../../disco/shred/fd_stake_ci.h" FD_IMPORT_BINARY( dbip_f, "src/disco/gui/dbip.bin.zst" ); #define LOGGING 0 +#define FD_GUI_WFS_ACTIVITY_TIMEOUT_NANOS (15L*1000L*1000L*1000L) + FD_FN_CONST ulong fd_gui_peers_align( void ) { ulong a = 128UL; @@ -200,6 +203,7 @@ fd_gui_peers_new( void * shmem, fd_http_server_t * http, fd_topo_t * topo, ulong max_ws_conn_cnt, + char const * wfs_expected_bank_hash_cstr, long now ) { if( FD_UNLIKELY( !shmem ) ) { FD_LOG_WARNING(( "NULL shmem" )); @@ -237,6 +241,8 @@ fd_gui_peers_new( void * shmem, ctx->http = http; ctx->topo = topo; + ctx->wfs_enabled = !!strcmp( wfs_expected_bank_hash_cstr, "" ); + ctx->max_ws_conn_cnt = max_ws_conn_cnt; ctx->open_ws_conn_cnt = 0UL; ctx->active_ws_conn_id = ULONG_MAX; @@ -270,6 +276,11 @@ fd_gui_peers_new( void * shmem, build_geoip_trie( ctx, _dbip_nodes, (uchar *)dbip_f, dbip_f_sz, &ctx->dbip, FD_GUI_GEOIP_DBIP_MAX_NODES ); #endif + ctx->wfs_peers_cnt = 0UL; + ctx->wfs_peers_valid = 0; + ctx->wfs_stakes_sent = 0; + wfs_fresh_dlist_join( wfs_fresh_dlist_new( ctx->wfs_fresh_dlist ) ); + return shmem; } @@ -630,6 +641,53 @@ geoip_lookup( fd_gui_ip_db_t const * ip_db, #endif +#define SORT_NAME wfs_peer_sort +#define SORT_KEY_T fd_gui_wfs_peer_t +#define SORT_BEFORE(a,b) (memcmp( (a).identity_key.uc, (b).identity_key.uc, 32UL )<0) +#include "../../util/tmpl/fd_sort.c" + +static void +wfs_handle_contact_info_update( fd_gui_peers_ctx_t * peers, + fd_pubkey_t const * identity, + long now ) { + if( FD_LIKELY( !peers->wfs_peers_valid ) ) return; + + ulong idx = wfs_peer_sort_split( peers->wfs_peers, peers->wfs_peers_cnt, (fd_gui_wfs_peer_t){ .identity_key = *identity } ); + if( FD_UNLIKELY( idx>=peers->wfs_peers_cnt || memcmp( identity->uc, peers->wfs_peers[ idx ].identity_key.uc, sizeof(fd_pubkey_t) ) ) ) return; + + fd_gui_wfs_peer_t * wp = &peers->wfs_peers[ idx ]; + wp->update_time_nanos = now; + + if( !wp->is_online ) { + wp->is_online = 1; + wfs_fresh_dlist_idx_push_tail( peers->wfs_fresh_dlist, idx, peers->wfs_peers ); + + fd_gui_peers_printf_wfs_add( peers, &idx, 1UL ); + fd_http_server_ws_broadcast( peers->http ); + } else { + wfs_fresh_dlist_idx_remove( peers->wfs_fresh_dlist, idx, peers->wfs_peers ); + wfs_fresh_dlist_idx_push_tail( peers->wfs_fresh_dlist, idx, peers->wfs_peers ); + } +} + +static void +wfs_handle_contact_info_remove( fd_gui_peers_ctx_t * peers, + fd_pubkey_t const * identity ) { + if( FD_LIKELY( !peers->wfs_peers_valid ) ) return; + + ulong idx = wfs_peer_sort_split( peers->wfs_peers, peers->wfs_peers_cnt, (fd_gui_wfs_peer_t){ .identity_key = *identity } ); + if( FD_UNLIKELY( idx>=peers->wfs_peers_cnt || memcmp( identity->uc, peers->wfs_peers[ idx ].identity_key.uc, 32UL ) ) ) return; + + fd_gui_wfs_peer_t * wp = &peers->wfs_peers[ idx ]; + if( wp->is_online ) { + wfs_fresh_dlist_idx_remove( peers->wfs_fresh_dlist, idx, peers->wfs_peers ); + wp->is_online = 0; + + fd_gui_peers_printf_wfs_remove( peers, &idx, 1UL ); + fd_http_server_ws_broadcast( peers->http ); + } +} + void fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers, fd_gossip_update_message_t const * update, @@ -655,7 +713,7 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers, /* A new pubkey is not allowed to overwrite an existing valid index */ FD_LOG_ERR(( "invariant violation: peer->pubkey.uc=%s != update->origin=%s ", ci_pk, og_pk )); } - FD_TEST( peer==fd_gui_peers_node_pubkey_map_ele_query_const( peers->node_pubkey_map, (fd_pubkey_t * )update->origin, NULL, peers->contact_info_table ) ); + FD_TEST( peer==fd_gui_peers_node_pubkey_map_ele_query_const( peers->node_pubkey_map, (fd_pubkey_t const * )update->origin, NULL, peers->contact_info_table ) ); fd_gui_peers_node_t * peer_sock = fd_gui_peers_node_sock_map_ele_query( peers->node_sock_map, &peer->contact_info.sockets[ FD_GOSSIP_CONTACT_INFO_SOCKET_GOSSIP ], NULL, peers->contact_info_table ); int found = 0; for( fd_gui_peers_node_t * p = peer_sock; !!p; p=(fd_gui_peers_node_t *)fd_gui_peers_node_sock_map_ele_next_const( p, NULL, peers->contact_info_table ) ) { @@ -668,7 +726,9 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers, #endif /* update does nothing */ if( FD_UNLIKELY( fd_gui_peers_contact_info_eq( &peer->contact_info, update->contact_info->value ) ) ) { - peer->wallclock_nanos = FD_MILLI_TO_NANOSEC( update->wallclock ); + peer->wallclock_nanos = FD_MILLI_TO_NANOSEC( update->wallclock ); + peer->update_time_nanos = now; + wfs_handle_contact_info_update( peers, (fd_pubkey_t const *)update->origin, now ); break; } @@ -696,6 +756,8 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers, /* broadcast update to WebSocket clients */ fd_gui_peers_printf_nodes( peers, (int[]){ FD_GUI_PEERS_NODE_UPDATE }, (ulong[]){ update->contact_info->idx }, 1UL ); fd_http_server_ws_broadcast( peers->http ); + + wfs_handle_contact_info_update( peers, (fd_pubkey_t const *)update->origin, now ); } else { #if LOGGING char _pk[ FD_BASE58_ENCODED_32_SZ ]; @@ -711,6 +773,7 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers, peer->has_vote_info = 0; peer->delinquent = 0; peer->stake = ULONG_MAX; + peer->prev_stake = ULONG_MAX; fd_gui_config_parse_info_t * info = fd_gui_peers_node_info_map_ele_query( peers->node_info_map, fd_type_pun_const(update->origin ), NULL, peers->node_info_pool ); if( FD_LIKELY( info ) ) fd_memcpy( peer->name, info->name, sizeof(info->name) ); @@ -747,6 +810,8 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers, /* broadcast update to WebSocket clients */ fd_gui_peers_printf_nodes( peers, (int[]){ FD_GUI_PEERS_NODE_ADD }, (ulong[]){ update->contact_info->idx }, 1UL ); fd_http_server_ws_broadcast( peers->http ); + + wfs_handle_contact_info_update( peers, (fd_pubkey_t const *)update->origin, now ); } break; } @@ -763,7 +828,7 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers, #ifdef FD_GUI_USE_HANDHOLDING /* invariant checks */ FD_TEST( peer->valid ); /* Should have already been in the table */ - FD_TEST( peer==fd_gui_peers_node_pubkey_map_ele_query_const( peers->node_pubkey_map, (fd_pubkey_t * )update->origin, NULL, peers->contact_info_table ) ); + FD_TEST( peer==fd_gui_peers_node_pubkey_map_ele_query_const( peers->node_pubkey_map, (fd_pubkey_t const * )update->origin, NULL, peers->contact_info_table ) ); fd_gui_peers_node_t * peer_sock = fd_gui_peers_node_sock_map_ele_query( peers->node_sock_map, &peer->contact_info.sockets[ FD_GOSSIP_CONTACT_INFO_SOCKET_GOSSIP ], NULL, peers->contact_info_table ); int found = 0; for( fd_gui_peers_node_t const * p = peer_sock; !!p; p=(fd_gui_peers_node_t const *)fd_gui_peers_node_sock_map_ele_next_const( p, NULL, peers->contact_info_table ) ) { @@ -774,6 +839,8 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers, } FD_TEST( found ); #endif + wfs_handle_contact_info_remove( peers, (fd_pubkey_t const *)update->origin ); + fd_gui_peers_live_table_idx_remove ( peers->live_table, update->contact_info_remove->idx, peers->contact_info_table ); fd_gui_peers_bandwidth_tracking_idx_remove ( peers->bw_tracking, update->contact_info_remove->idx, peers->contact_info_table ); fd_gui_peers_node_sock_map_idx_remove_fast ( peers->node_sock_map, update->contact_info_remove->idx, peers->contact_info_table ); @@ -868,6 +935,11 @@ fd_gui_peers_handle_epoch_info( fd_gui_peers_ctx_t * peers, /* sort for deduplication */ fd_gui_peers_voter_sort_iden_desc_inplace( peers->epochs[ epoch_idx ].stakes, peers->epochs[ epoch_idx ].stakes_cnt ); + /* The prev epoch's stakes are already vote_desc sorted (invariant + maintained at the bottom of this function). We binary search into + it below when setting prev_stake for each peer. */ + ulong prev_epoch_idx = (epoch_idx + 1UL) % 2UL; + ulong updated_cnt = 0UL; ulong i=0UL; while( iepochs[ epoch_idx ].stakes_cnt ) { @@ -878,11 +950,27 @@ fd_gui_peers_handle_epoch_info( fd_gui_peers_ctx_t * peers, peers->contact_info_table ); if( FD_UNLIKELY( peer_idx==ULONG_MAX ) ) continue; + ulong new_prev_stake = ULONG_MAX; + if( FD_LIKELY( peers->epochs[ prev_epoch_idx ].epoch!=ULONG_MAX && peers->epochs[ prev_epoch_idx ].epoch+1UL==peers->epochs[ epoch_idx ].epoch ) ) { + fd_gui_peers_voter_t * prev_stakes = peers->epochs[ prev_epoch_idx ].stakes; + ulong prev_stakes_cnt = peers->epochs[ prev_epoch_idx ].stakes_cnt; + ulong prev_idx = fd_gui_peers_voter_sort_vote_desc_split( + prev_stakes, prev_stakes_cnt, + (fd_gui_peers_voter_t){ .weight = { .vote_key = best->weight.vote_key } } ); + if( FD_LIKELY( prev_idxweight.vote_key.uc, + sizeof(fd_pubkey_t) ) ) ) { + new_prev_stake = prev_stakes[ prev_idx ].weight.stake; + } + } + fd_gui_peers_node_t * peer = &peers->contact_info_table[ peer_idx ]; int vote_eq = peer->has_vote_info && !memcmp( peer->vote_account.uc, best->weight.vote_key.uc, sizeof(fd_pubkey_t) ) - && peer->stake==best->weight.stake; + && peer->stake==best->weight.stake + && peer->prev_stake==new_prev_stake; if( FD_LIKELY( vote_eq ) ) continue; fd_gui_peers_live_table_idx_remove( peers->live_table, peer_idx, peers->contact_info_table ); @@ -890,6 +978,7 @@ fd_gui_peers_handle_epoch_info( fd_gui_peers_ctx_t * peers, peer->has_vote_info = 1; peer->vote_account = best->weight.vote_key; peer->stake = best->weight.stake; + peer->prev_stake = new_prev_stake; fd_gui_peers_live_table_idx_insert( peers->live_table, peer_idx, peers->contact_info_table ); @@ -1016,6 +1105,82 @@ fd_gui_peers_handle_config_account( fd_gui_peers_ctx_t * peers, fd_gui_peers_node_info_map_ele_insert( peers->node_info_map, node_info, peers->node_info_pool ); } +void +fd_gui_peers_stage_snapshot_manifest( fd_gui_peers_ctx_t * peers, + fd_snapshot_manifest_t const * manifest, + long now ) { + + if( FD_LIKELY( !peers->wfs_enabled ) ) return; + + fd_vote_stake_weight_t * vote_scratch = peers->scratch.manifest_vote_weights; + ulong vote_scratch_cnt = 0UL; + ulong vote_accounts_sz = manifest->vote_accounts_len; + if( FD_UNLIKELY( vote_accounts_sz>40200UL ) ) { + FD_LOG_WARNING(( "exceeded 40200UL vote accounts" )); + vote_accounts_sz = 40200UL; + } + for( ulong i=0UL; ivote_accounts[ i ].stake==0UL ) ) continue; + fd_memcpy( vote_scratch[ vote_scratch_cnt ].id_key.uc, manifest->vote_accounts[ i ].node_account_pubkey, sizeof(fd_pubkey_t) ); + fd_memcpy( vote_scratch[ vote_scratch_cnt ].vote_key.uc, manifest->vote_accounts[ i ].vote_account_pubkey, sizeof(fd_pubkey_t) ); + vote_scratch[ vote_scratch_cnt ].stake = manifest->vote_accounts[ i ].stake; + vote_scratch_cnt++; + } + + /* Mirrors gossip WFS logic */ + fd_stake_weight_t * id_weights = peers->scratch.manifest_id_weights; + ulong id_cnt = compute_id_weights_from_vote_weights( id_weights, vote_scratch, vote_scratch_cnt ); + + /* Restore invariant: sorted by identity key */ + fd_stake_weight_key_sort_inplace( id_weights, id_cnt ); + + for( ulong i=0UL; iwfs_peers[ i ].identity_key = id_weights[ i ].key; + peers->wfs_peers[ i ].stake = id_weights[ i ].stake; + peers->wfs_peers[ i ].fresh_prev = ULONG_MAX; + peers->wfs_peers[ i ].fresh_next = ULONG_MAX; + + ulong peer_idx = fd_gui_peers_node_pubkey_map_idx_query( peers->node_pubkey_map, &id_weights[ i ].key, ULONG_MAX,peers->contact_info_table ); + if( peer_idx!=ULONG_MAX && peers->contact_info_table[ peer_idx ].update_time_nanos > now - FD_GUI_WFS_ACTIVITY_TIMEOUT_NANOS ) { + peers->wfs_peers[ i ].is_online = 1; + peers->wfs_peers[ i ].update_time_nanos = peers->contact_info_table[ peer_idx ].update_time_nanos; + } else { + peers->wfs_peers[ i ].is_online = 0; + peers->wfs_peers[ i ].update_time_nanos = 0L; + } + } + peers->wfs_peers_cnt = id_cnt; +} + +void +fd_gui_peers_commit_snapshot_manifest( fd_gui_peers_ctx_t * peers ) { + if( FD_UNLIKELY( !peers->wfs_enabled ) ) return; + + wfs_fresh_dlist_join( wfs_fresh_dlist_new( peers->wfs_fresh_dlist ) ); + + /* Emit the wait_for_supermajority.stakes message with stakes and + infos. By this point all config accounts have been processed so + node_info_map is populated. */ + fd_gui_peers_printf_wfs_stakes( peers ); + fd_http_server_ws_broadcast( peers->http ); + peers->wfs_stakes_sent = 1; + + ulong added_cnt = 0UL; + for( ulong i=0UL; iwfs_peers_cnt; i++ ) { + /* Peers are technically added here not ordered by timestamp, but it's + not an issue since a) all timestamps should be similar b) the dlist + will eventually be correct as subsequent updates come in. */ + if( FD_UNLIKELY( peers->wfs_peers[ i ].is_online ) ) { + peers->scratch.wfs_peers[ added_cnt++ ] = i; + wfs_fresh_dlist_idx_push_tail( peers->wfs_fresh_dlist, i, peers->wfs_peers ); + } + } + if( FD_LIKELY( added_cnt ) ) { + fd_gui_peers_printf_wfs_add( peers, peers->scratch.wfs_peers, added_cnt ); + fd_http_server_ws_broadcast( peers->http ); + } + peers->wfs_peers_valid = 1; +} static void fd_gui_peers_viewport_snap( fd_gui_peers_ctx_t * peers, ulong ws_conn_id ) { @@ -1282,6 +1447,24 @@ int fd_gui_peers_poll( fd_gui_peers_ctx_t * peers, long now ) { int did_work = 0; + ulong evicted_cnt = 0UL; + while( FD_UNLIKELY( peers->wfs_peers_valid && !wfs_fresh_dlist_is_empty( peers->wfs_fresh_dlist, peers->wfs_peers ) ) ) { + ulong head_idx = wfs_fresh_dlist_idx_peek_head( peers->wfs_fresh_dlist, peers->wfs_peers ); + fd_gui_wfs_peer_t * oldest = &peers->wfs_peers[ head_idx ]; + if( oldest->update_time_nanos > now - FD_GUI_WFS_ACTIVITY_TIMEOUT_NANOS ) break; + + wfs_fresh_dlist_idx_pop_head( peers->wfs_fresh_dlist, peers->wfs_peers ); + oldest->is_online = 0; + + peers->scratch.wfs_peers[ evicted_cnt++ ] = head_idx; + if( FD_UNLIKELY( evicted_cnt>=256UL ) ) break; + } + if( FD_UNLIKELY( evicted_cnt ) ) { + fd_gui_peers_printf_wfs_remove( peers, peers->scratch.wfs_peers, evicted_cnt ); + fd_http_server_ws_broadcast( peers->http ); + return 1; /* preserve STEM_BURST */ + } + /* update client viewports in a round-robin */ if( FD_UNLIKELY( fd_gui_peers_ws_conn_rr_advance( peers, now ) ) ) { FD_TEST( peers->client_viewports[ peers->active_ws_conn_id ].connected ); @@ -1382,7 +1565,7 @@ fd_gui_peers_poll( fd_gui_peers_ctx_t * peers, long now ) { fd_http_server_ws_broadcast( peers->http ); peers->next_gossip_stats_update_nanos = now + (FD_GUI_PEERS_GOSSIP_STATS_UPDATE_INTERVAL_MILLIS * 1000000L); - did_work = 1; + return 1; /* preserve STEM_BURST */ } return did_work; @@ -1401,6 +1584,22 @@ fd_gui_peers_ws_open( fd_gui_peers_ctx_t * peers, fd_gui_peers_printf_node_all( peers ); FD_TEST( !fd_http_server_ws_send( peers->http, ws_conn_id ) ); + + if( FD_UNLIKELY( peers->wfs_stakes_sent ) ) { + fd_gui_peers_printf_wfs_stakes( peers ); + FD_TEST( !fd_http_server_ws_send( peers->http, ws_conn_id ) ); + } + + if( FD_UNLIKELY( peers->wfs_peers_valid ) ) { + ulong added_cnt = 0UL; + for( ulong i=0UL; iwfs_peers_cnt; i++ ) { + if( FD_UNLIKELY( peers->wfs_peers[ i ].is_online ) ) peers->scratch.wfs_peers[ added_cnt++ ] = i; + } + if( FD_LIKELY( added_cnt ) ) { + fd_gui_peers_printf_wfs_add( peers, peers->scratch.wfs_peers, added_cnt ); + FD_TEST( !fd_http_server_ws_send( peers->http, ws_conn_id ) ); + } + } } void diff --git a/src/disco/gui/fd_gui_peers.h b/src/disco/gui/fd_gui_peers.h index ffd8f8de00d..bd6f4f90b87 100644 --- a/src/disco/gui/fd_gui_peers.h +++ b/src/disco/gui/fd_gui_peers.h @@ -21,6 +21,7 @@ #include "../../flamenco/runtime/fd_runtime_const.h" #include "../../waltz/http/fd_http_server.h" +#include "../../discof/restore/utils/fd_ssmsg.h" #include "../topo/fd_topo.h" #if FD_HAS_ZSTD @@ -85,6 +86,23 @@ struct fd_gui_geoip_node { typedef struct fd_gui_geoip_node fd_gui_geoip_node_t; +struct fd_gui_wfs_peer { + fd_pubkey_t identity_key; + ulong stake; + int is_online; + long update_time_nanos; + + ulong fresh_prev; + ulong fresh_next; +}; +typedef struct fd_gui_wfs_peer fd_gui_wfs_peer_t; + +#define DLIST_NAME wfs_fresh_dlist +#define DLIST_ELE_T fd_gui_wfs_peer_t +#define DLIST_PREV fresh_prev +#define DLIST_NEXT fresh_next +#include "../../util/tmpl/fd_dlist.c" + #define FD_GUI_PEERS_NODE_NOP (0) #define FD_GUI_PEERS_NODE_ADD (1) #define FD_GUI_PEERS_NODE_UPDATE (2) @@ -154,6 +172,7 @@ struct fd_gui_peers_node { fd_pubkey_t vote_account; int delinquent; ulong stake; + ulong prev_stake; uchar country_code_idx; uint city_name_idx; @@ -392,6 +411,13 @@ struct fd_gui_peers_ctx { int actions[ FD_CONTACT_INFO_TABLE_SIZE ]; ulong idxs [ FD_CONTACT_INFO_TABLE_SIZE ]; }; + struct { + ulong wfs_peers[ 40200UL ]; + }; + struct { + fd_stake_weight_t manifest_id_weights [ 40200UL ]; + fd_vote_stake_weight_t manifest_vote_weights[ 40200UL ]; + }; } scratch; #if FD_HAS_ZSTD @@ -399,10 +425,22 @@ struct fd_gui_peers_ctx { #endif fd_gui_ip_db_t dbip; + + int wfs_enabled; + fd_gui_wfs_peer_t wfs_peers[ 40200UL ]; + ulong wfs_peers_cnt; + int wfs_peers_valid; + int wfs_stakes_sent; + wfs_fresh_dlist_t wfs_fresh_dlist[ 1 ]; }; typedef struct fd_gui_peers_ctx fd_gui_peers_ctx_t; +/* FIXME: see src/discof/restore/utils/fd_ssmsg.h */ +FD_STATIC_ASSERT( sizeof(((fd_gui_peers_ctx_t *)NULL)->wfs_peers)/sizeof(((fd_gui_peers_ctx_t *)NULL)->wfs_peers[0])== + sizeof(((struct fd_snapshot_manifest *)NULL)->vote_accounts)/sizeof(((struct fd_snapshot_manifest *)NULL)->vote_accounts[0]), + wfs_peers_vote_accounts ); + FD_PROTOTYPES_BEGIN FD_FN_CONST ulong @@ -416,6 +454,7 @@ fd_gui_peers_new( void * shmem, fd_http_server_t * http, fd_topo_t * topo, ulong max_ws_conn_cnt, + char const * wfs_expected_bank_hash_cstr, long now ); fd_gui_peers_ctx_t * @@ -473,6 +512,14 @@ fd_gui_peers_handle_config_account( fd_gui_peers_ctx_t * peers, uchar const * data, ulong sz ); +void +fd_gui_peers_stage_snapshot_manifest( fd_gui_peers_ctx_t * peers, + fd_snapshot_manifest_t const * manifest, + long now ); + +void +fd_gui_peers_commit_snapshot_manifest( fd_gui_peers_ctx_t * peers ); + /* fd_gui_peers_ws_message handles incoming websocket request payloads requesting peer-related responses. ws_conn_id is the connection id of the requester. data is a pointer to the start of the diff --git a/src/disco/gui/fd_gui_printf.c b/src/disco/gui/fd_gui_printf.c index 5bf1d5daf37..378f3034fd6 100644 --- a/src/disco/gui/fd_gui_printf.c +++ b/src/disco/gui/fd_gui_printf.c @@ -1201,6 +1201,7 @@ fd_gui_printf_peer( fd_gui_t * gui, char vote_account_base58[ FD_BASE58_ENCODED_32_SZ ]; fd_base58_encode_32( gui->vote_account.vote_accounts[ vote_idxs[ i ] ].vote_account->uc, NULL, vote_account_base58 ); jsonp_string( gui->http, "vote_account", vote_account_base58 ); + jsonp_null( gui->http, "prev_stake" ); jsonp_ulong_as_str( gui->http, "activated_stake", gui->vote_account.vote_accounts[ vote_idxs[ i ] ].activated_stake ); jsonp_ulong( gui->http, "last_vote", gui->vote_account.vote_accounts[ vote_idxs[ i ] ].last_vote ); jsonp_ulong( gui->http, "root_slot", gui->vote_account.vote_accounts[ vote_idxs[ i ] ].root_slot ); @@ -1296,6 +1297,8 @@ peers_printf_node( fd_gui_peers_ctx_t * peers, char vote_account_base58[ FD_BASE58_ENCODED_32_SZ ]; fd_base58_encode_32( peer->vote_account.uc, NULL, vote_account_base58 ); jsonp_string( peers->http, "vote_account", vote_account_base58 ); + if( FD_UNLIKELY( peer->prev_stake==ULONG_MAX ) ) jsonp_null ( peers->http, "prev_stake" ); + else jsonp_ulong_as_str( peers->http, "prev_stake", peer->prev_stake ); jsonp_ulong_as_str( peers->http, "activated_stake", fd_ulong_if( peer->stake==ULONG_MAX, 0UL, peer->stake ) ); jsonp_ulong( peers->http, "last_vote", 0UL ); /* todo: deprecate */ jsonp_ulong( peers->http, "epoch_credits", 0UL ); /* todo: deprecate */ @@ -2163,19 +2166,20 @@ fd_gui_printf_boot_progress( fd_gui_t * gui ) { jsonp_open_envelope( gui->http, "summary", "boot_progress" ); jsonp_open_object( gui->http, "value" ); switch( gui->summary.boot_progress.phase ) { - case FD_GUI_BOOT_PROGRESS_TYPE_JOINING_GOSSIP: jsonp_string( gui->http, "phase", "joining_gossip" ); break; - case FD_GUI_BOOT_PROGRESS_TYPE_LOADING_FULL_SNAPSHOT: jsonp_string( gui->http, "phase", "loading_full_snapshot" ); break; + case FD_GUI_BOOT_PROGRESS_TYPE_JOINING_GOSSIP: jsonp_string( gui->http, "phase", "joining_gossip" ); break; + case FD_GUI_BOOT_PROGRESS_TYPE_LOADING_FULL_SNAPSHOT: jsonp_string( gui->http, "phase", "loading_full_snapshot" ); break; case FD_GUI_BOOT_PROGRESS_TYPE_LOADING_INCREMENTAL_SNAPSHOT: jsonp_string( gui->http, "phase", "loading_incremental_snapshot" ); break; - case FD_GUI_BOOT_PROGRESS_TYPE_CATCHING_UP: jsonp_string( gui->http, "phase", "catching_up" ); break; - case FD_GUI_BOOT_PROGRESS_TYPE_RUNNING: jsonp_string( gui->http, "phase", "running" ); break; + case FD_GUI_BOOT_PROGRESS_TYPE_WAITING_FOR_SUPERMAJORITY: jsonp_string( gui->http, "phase", "waiting_for_supermajority" ); break; + case FD_GUI_BOOT_PROGRESS_TYPE_CATCHING_UP: jsonp_string( gui->http, "phase", "catching_up" ); break; + case FD_GUI_BOOT_PROGRESS_TYPE_RUNNING: jsonp_string( gui->http, "phase", "running" ); break; default: FD_LOG_ERR(( "unknown phase %d", gui->summary.boot_progress.phase )); } jsonp_double( gui->http, "joining_gossip_elapsed_seconds", (double)(gui->summary.boot_progress.joining_gossip_time_nanos - gui->summary.startup_time_nanos) / 1e9 ); -#define HANDLE_SNAPSHOT_STATE(snapshot_type, snapshot_type_upper) \ - if( FD_LIKELY( gui->summary.boot_progress.phase>=FD_GUI_BOOT_PROGRESS_TYPE_LOADING_##snapshot_type_upper##_SNAPSHOT )) { \ - ulong snapshot_idx = FD_GUI_BOOT_PROGRESS_##snapshot_type_upper##_SNAPSHOT_IDX; \ +#define HANDLE_SNAPSHOT_STATE(snapshot_type, snapshot_type_upper) { \ + ulong snapshot_idx = FD_GUI_BOOT_PROGRESS_##snapshot_type_upper##_SNAPSHOT_IDX; \ + if( FD_LIKELY( gui->summary.boot_progress.phase>=FD_GUI_BOOT_PROGRESS_TYPE_LOADING_##snapshot_type_upper##_SNAPSHOT && gui->summary.boot_progress.loading_snapshot[ snapshot_idx ].slot!=ULONG_MAX )) { \ jsonp_double ( gui->http, "loading_" FD_STRINGIFY(snapshot_type) "_snapshot_elapsed_seconds", (double)(gui->summary.boot_progress.loading_snapshot[ snapshot_idx ].sample_time_nanos - gui->summary.boot_progress.loading_snapshot[ snapshot_idx ].reset_time_nanos) / 1e9 ); \ jsonp_ulong ( gui->http, "loading_" FD_STRINGIFY(snapshot_type) "_snapshot_reset_count", gui->summary.boot_progress.loading_snapshot[ snapshot_idx ].reset_cnt ); \ jsonp_ulong ( gui->http, "loading_" FD_STRINGIFY(snapshot_type) "_snapshot_slot", gui->summary.boot_progress.loading_snapshot[ snapshot_idx ].slot ); \ @@ -2197,12 +2201,41 @@ fd_gui_printf_boot_progress( fd_gui_t * gui ) { jsonp_null( gui->http, "loading_" FD_STRINGIFY(snapshot_type) "_snapshot_decompress_bytes_compressed" ); \ jsonp_null( gui->http, "loading_" FD_STRINGIFY(snapshot_type) "_snapshot_insert_bytes_decompressed" ); \ jsonp_null( gui->http, "loading_" FD_STRINGIFY(snapshot_type) "_snapshot_insert_accounts" ); \ - } + } \ + } HANDLE_SNAPSHOT_STATE(full, FULL) HANDLE_SNAPSHOT_STATE(incremental, INCREMENTAL) #undef HANDLE_SNAPSHOT_STATE + if( FD_LIKELY( gui->summary.wfs_enabled ) ) { + jsonp_string ( gui->http, "wait_for_supermajority_bank_hash", gui->summary.wfs_bank_hash ); + char shred_version_str[ 8 ]; + FD_TEST( fd_cstr_printf_check( shred_version_str, sizeof(shred_version_str), NULL, "%hu", gui->summary.expected_shred_version ) ); + jsonp_string ( gui->http, "wait_for_supermajority_shred_version", shred_version_str ); + if( FD_LIKELY( gui->summary.boot_progress.phase>=FD_GUI_BOOT_PROGRESS_TYPE_WAITING_FOR_SUPERMAJORITY ) ) { + jsonp_ulong ( gui->http, "wait_for_supermajority_attempt", gui->summary.boot_progress.wfs_attempt ); + jsonp_ulong_as_str( gui->http, "wait_for_supermajority_total_stake", gui->summary.boot_progress.wfs_total_stake ); + jsonp_ulong_as_str( gui->http, "wait_for_supermajority_connected_stake", gui->summary.boot_progress.wfs_connected_stake ); + jsonp_ulong ( gui->http, "wait_for_supermajority_total_peers", gui->summary.boot_progress.wfs_total_peers ); + jsonp_ulong ( gui->http, "wait_for_supermajority_connected_peers", gui->summary.boot_progress.wfs_connected_peers ); + } else { + jsonp_null( gui->http, "wait_for_supermajority_attempt" ); + jsonp_null( gui->http, "wait_for_supermajority_total_stake" ); + jsonp_null( gui->http, "wait_for_supermajority_connected_stake" ); + jsonp_null( gui->http, "wait_for_supermajority_total_peers" ); + jsonp_null( gui->http, "wait_for_supermajority_connected_peers" ); + } + } else { + jsonp_null( gui->http, "wait_for_supermajority_bank_hash" ); + jsonp_null( gui->http, "wait_for_supermajority_shred_version" ); + jsonp_null( gui->http, "wait_for_supermajority_attempt" ); + jsonp_null( gui->http, "wait_for_supermajority_total_stake" ); + jsonp_null( gui->http, "wait_for_supermajority_connected_stake" ); + jsonp_null( gui->http, "wait_for_supermajority_total_peers" ); + jsonp_null( gui->http, "wait_for_supermajority_connected_peers" ); + } + if( FD_LIKELY( gui->summary.boot_progress.phase>=FD_GUI_BOOT_PROGRESS_TYPE_CATCHING_UP ) ) jsonp_double( gui->http, "catching_up_elapsed_seconds", (double)(gui->summary.boot_progress.catching_up_time_nanos - gui->summary.boot_progress.loading_snapshot[ FD_GUI_BOOT_PROGRESS_INCREMENTAL_SNAPSHOT_IDX ].sample_time_nanos) / 1e9 ); else jsonp_null ( gui->http, "catching_up_elapsed_seconds" ); @@ -2592,3 +2625,77 @@ fd_gui_printf_slot_query_shreds( fd_gui_t * gui, jsonp_close_object( gui->http ); jsonp_close_envelope( gui->http ); } + +void +fd_gui_peers_printf_wfs_add( fd_gui_peers_ctx_t * peers, + ulong const * idxs, + ulong cnt ) { + jsonp_open_envelope( peers->http, "wait_for_supermajority", "peer_add" ); + jsonp_open_array( peers->http, "value" ); + for( ulong i=0UL; iwfs_peers[ idxs[ i ] ]; + char identity_base58[ FD_BASE58_ENCODED_32_SZ ]; + fd_base58_encode_32( wp->identity_key.uc, NULL, identity_base58 ); + jsonp_string( peers->http, NULL, identity_base58 ); + } + jsonp_close_array( peers->http ); + jsonp_close_envelope( peers->http ); +} + +void +fd_gui_peers_printf_wfs_remove( fd_gui_peers_ctx_t * peers, + ulong const * idxs, + ulong cnt ) { + jsonp_open_envelope( peers->http, "wait_for_supermajority", "peer_remove" ); + jsonp_open_array( peers->http, "value" ); + for( ulong i=0UL; iwfs_peers[ idxs[ i ] ]; + char identity_base58[ FD_BASE58_ENCODED_32_SZ ]; + fd_base58_encode_32( wp->identity_key.uc, NULL, identity_base58 ); + jsonp_string( peers->http, NULL, identity_base58 ); + } + jsonp_close_array( peers->http ); + jsonp_close_envelope( peers->http ); +} + +void +fd_gui_peers_printf_wfs_stakes( fd_gui_peers_ctx_t * peers ) { + jsonp_open_envelope( peers->http, "wait_for_supermajority", "stakes" ); + jsonp_open_object( peers->http, "value" ); + + jsonp_open_array( peers->http, "staked_pubkeys" ); + for( ulong i=0UL; iwfs_peers_cnt; i++ ) { + char identity_base58[ FD_BASE58_ENCODED_32_SZ ]; + fd_base58_encode_32( peers->wfs_peers[ i ].identity_key.uc, NULL, identity_base58 ); + jsonp_string( peers->http, NULL, identity_base58 ); + } + jsonp_close_array( peers->http ); + + jsonp_open_array( peers->http, "staked_lamports" ); + for( ulong i=0UL; iwfs_peers_cnt; i++ ) { + jsonp_ulong_as_str( peers->http, NULL, peers->wfs_peers[ i ].stake ); + } + jsonp_close_array( peers->http ); + + jsonp_open_array( peers->http, "infos" ); + for( ulong i=0UL; iwfs_peers_cnt; i++ ) { + fd_gui_config_parse_info_t * info = + fd_gui_peers_node_info_map_ele_query( + peers->node_info_map, &peers->wfs_peers[ i ].identity_key, NULL, peers->node_info_pool ); + if( info ) { + jsonp_open_object( peers->http, NULL ); + jsonp_string( peers->http, "name", info->name ); + jsonp_string( peers->http, "details", info->details ); + jsonp_string( peers->http, "website", info->website ); + jsonp_string( peers->http, "icon_url", info->icon_uri ); + jsonp_string( peers->http, "keybase_username", info->keybase_username ); + jsonp_close_object( peers->http ); + } else { + jsonp_null( peers->http, NULL ); + } + } + jsonp_close_array( peers->http ); + + jsonp_close_object( peers->http ); + jsonp_close_envelope( peers->http ); +} diff --git a/src/disco/gui/fd_gui_printf.h b/src/disco/gui/fd_gui_printf.h index 08426a86632..b7bd1ed2ef7 100644 --- a/src/disco/gui/fd_gui_printf.h +++ b/src/disco/gui/fd_gui_printf.h @@ -171,4 +171,17 @@ fd_gui_printf_peers_view_resize( fd_gui_peers_ctx_t * peers, ulong sz ); void fd_gui_peers_printf_gossip_stats( fd_gui_peers_ctx_t * peers ); +void +fd_gui_peers_printf_wfs_add( fd_gui_peers_ctx_t * peers, + ulong const * idxs, + ulong cnt ); + +void +fd_gui_peers_printf_wfs_remove( fd_gui_peers_ctx_t * peers, + ulong const * idxs, + ulong cnt ); + +void +fd_gui_peers_printf_wfs_stakes( fd_gui_peers_ctx_t * peers ); + #endif /* HEADER_fd_src_disco_gui_fd_gui_printf_h */ diff --git a/src/disco/gui/fd_gui_tile.c b/src/disco/gui/fd_gui_tile.c index df48350111e..7dfb0671d60 100644 --- a/src/disco/gui/fd_gui_tile.c +++ b/src/disco/gui/fd_gui_tile.c @@ -18,7 +18,6 @@ static fd_http_static_file_t * STATIC_FILES; #include "generated/fd_gui_tile_seccomp.h" -#include "../../choreo/tower/fd_tower_serdes.h" #include "../../disco/tiles.h" #include "../../disco/keyguard/fd_keyload.h" #include "../../disco/keyguard/fd_keyswitch.h" @@ -53,6 +52,7 @@ static fd_http_static_file_t * STATIC_FILES; #define IN_KIND_SNAPIN (15UL) /* firedancer only */ #define IN_KIND_EXECRP_REPLAY (16UL) /* firedancer only */ #define IN_KIND_BUNDLE (17UL) +#define IN_KIND_SNAPIN_MANIF (18UL) /* firedancer only */ FD_IMPORT_BINARY( firedancer_svg, "book/public/fire.svg" ); @@ -278,7 +278,7 @@ during_frag( fd_gui_ctx_t * ctx, if( FD_LIKELY( sig!=REPLAY_SIG_SLOT_COMPLETED && sig!=REPLAY_SIG_BECAME_LEADER ) ) return; } - if( FD_UNLIKELY( chunkin[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) + if( FD_UNLIKELY( (sz>0UL && (chunkin[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark)) || sz>ctx->in[ in_idx ].mtu ) ) FD_LOG_ERR(( "in_kind %lu chunk %lu %lu corrupt, not in range [%lu,%lu] or too large (%lu)", ctx->in_kind[ in_idx ], chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark, ctx->in[ in_idx ].mtu )); switch( ctx->in_kind[ in_idx ] ) { @@ -384,6 +384,17 @@ after_frag( fd_gui_ctx_t * ctx, fd_gui_peers_handle_config_account( ctx->peers, src, sz ); break; } + case IN_KIND_SNAPIN_MANIF: { + FD_TEST( ctx->is_full_client ); + + if( fd_ssmsg_sig_message( sig )==FD_SSMSG_DONE ) { + fd_gui_peers_commit_snapshot_manifest( ctx->peers ); + } else { + fd_gui_stage_snapshot_manifest( ctx->gui, (fd_snapshot_manifest_t const *)src ); + fd_gui_peers_stage_snapshot_manifest( ctx->peers, (fd_snapshot_manifest_t const *)src, fd_clock_now( ctx->clock ) ); + } + break; + } case IN_KIND_GENESI_OUT: { FD_TEST( ctx->is_full_client ); fd_genesis_meta_t const * meta = (fd_genesis_meta_t const *)src; @@ -725,8 +736,8 @@ unprivileged_init( fd_topo_t * topo, FD_TEST( fd_cstr_printf_check( ctx->version_string, sizeof( ctx->version_string ), NULL, "%s", fdctl_version_string ) ); ctx->topo = topo; - ctx->peers = fd_gui_peers_join( fd_gui_peers_new( _peers, ctx->gui_server, ctx->topo, http_param.max_ws_connection_cnt, fd_clock_now( ctx->clock) ) ); - ctx->gui = fd_gui_join( fd_gui_new( _gui, ctx->gui_server, ctx->version_string, tile->gui.cluster, ctx->identity_key, ctx->has_vote_key, ctx->vote_key->uc, ctx->is_full_client, ctx->snapshots_enabled, tile->gui.is_voting, tile->gui.schedule_strategy, ctx->topo, fd_clock_now( ctx->clock ) ) ); + ctx->peers = fd_gui_peers_join( fd_gui_peers_new( _peers, ctx->gui_server, ctx->topo, http_param.max_ws_connection_cnt, tile->gui.wfs_bank_hash, fd_clock_now( ctx->clock ) ) ); + ctx->gui = fd_gui_join( fd_gui_new( _gui, ctx->gui_server, ctx->version_string, tile->gui.cluster, ctx->identity_key, ctx->has_vote_key, ctx->vote_key->uc, ctx->is_full_client, ctx->snapshots_enabled, tile->gui.is_voting, tile->gui.schedule_strategy, tile->gui.wfs_bank_hash, tile->gui.expected_shred_version, ctx->topo, fd_clock_now( ctx->clock ) ) ); FD_TEST( ctx->gui ); ctx->keyswitch = fd_keyswitch_join( fd_topo_obj_laddr( topo, tile->id_keyswitch_obj_id ) ); @@ -768,6 +779,7 @@ unprivileged_init( fd_topo_t * topo, else if( FD_LIKELY( !strcmp( link->name, "replay_epoch" ) ) ) ctx->in_kind[ i ] = IN_KIND_EPOCH; /* full client only */ else if( FD_LIKELY( !strcmp( link->name, "genesi_out" ) ) ) ctx->in_kind[ i ] = IN_KIND_GENESI_OUT; /* full client only */ else if( FD_LIKELY( !strcmp( link->name, "snapin_gui" ) ) ) ctx->in_kind[ i ] = IN_KIND_SNAPIN; /* full client only */ + else if( FD_LIKELY( !strcmp( link->name, "snapin_manif" ) ) ) ctx->in_kind[ i ] = IN_KIND_SNAPIN_MANIF; /* full client only */ else if( FD_LIKELY( !strcmp( link->name, "execrp_replay" ) ) ) ctx->in_kind[ i ] = IN_KIND_EXECRP_REPLAY; /* full client only */ else if( FD_LIKELY( !strcmp( link->name, "bundle_status" ) ) ) ctx->in_kind[ i ] = IN_KIND_BUNDLE; /* full client only */ else FD_LOG_ERR(( "gui tile has unexpected input link %lu %s", i, link->name )); diff --git a/src/disco/topo/fd_topo.h b/src/disco/topo/fd_topo.h index a1908441722..31fd57585bd 100644 --- a/src/disco/topo/fd_topo.h +++ b/src/disco/topo/fd_topo.h @@ -366,6 +366,9 @@ struct fd_topo_tile { int websocket_compression; int frontend_release_channel; ulong tile_cnt; + + char wfs_bank_hash[ FD_BASE58_ENCODED_32_SZ ]; + ushort expected_shred_version; } gui; struct {