Skip to content

Commit 471b51d

Browse files
committed
fix me
1 parent 7637022 commit 471b51d

File tree

6 files changed

+275
-3
lines changed

6 files changed

+275
-3
lines changed

book/api/websocket.md

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,9 @@ stake that is activating/deactivating in the current epoch and any stake
597597
that was explicilty undelegated prior to restart (e.g. inactive testnet
598598
participants or bad actors).
599599

600+
During the `waiting_for_supermajority` phase, per-peer offline status
601+
is available via the [`wfs_offline_peers`](#wfs_offline_peers) topic.
602+
600603
#### `summary.schedule_strategy`
601604
| frequency | type | example |
602605
|------------|----------|---------|
@@ -1858,6 +1861,60 @@ full and includes this node itself, nodes with a different
18581861
`shred_version`, nodes publishing corrupt or bad information, and so
18591862
on.
18601863

1864+
### wfs_offline_peers
1865+
The `wfs_offline_peers` topic publishes edge-triggered add/remove
1866+
messages for staked peers that are **not** currently active on gossip.
1867+
1868+
The message is only published if the client is configured to boot with
1869+
the `waiting_for_supermajority` phase enabled.
1870+
1871+
#### `wfs_offline_peers.update`
1872+
| frequency | type | example |
1873+
|------------------|-----------------------------------|---------|
1874+
| *Once* + *Live* | `ClusterRestartOfflinePeerUpdate` | below |
1875+
1876+
::: details Example
1877+
1878+
**Initial state (all peers offline):**
1879+
```json
1880+
{
1881+
"topic": "wfs_offline_peers",
1882+
"key": "update",
1883+
"value": {
1884+
"add": [
1885+
{
1886+
"identity": "Fe4StcZSQ228dKK2hni7aCP7ZprNhj8QKWzFe5usGFYF",
1887+
"stake": "5812",
1888+
"info": null
1889+
}
1890+
],
1891+
"remove": [
1892+
{ "identity": "9aE6Bp1hbDpMFKqnWGUMbfxfMPXswPbkNwNrSjhpFiSN" }
1893+
]
1894+
}
1895+
}
1896+
```
1897+
1898+
:::
1899+
1900+
**`ClusterRestartOfflinePeerUpdate`**
1901+
| Field | Type | Description |
1902+
|-----------------|------------------------|-------------|
1903+
| identity | `string` | Identity public key of the validator, encoded in base58 |
1904+
| stake | `string` | Total stake in lamports for this identity (aggregated across all vote accounts), derived from the snapshot manifest |
1905+
| info | `PeerUpdateInfo\|null` | Self-reported validator information from the ConfigProgram, if available, and `null` otherwise |
1906+
1907+
**`ClusterRestartOfflinePeerRemove`**
1908+
| Field | Type | Description |
1909+
|----------|----------|-------------|
1910+
| identity | `string` | Identity public key of the validator, encoded in base58 |
1911+
1912+
**`ClusterRestartOfflinePeersUpdate`**
1913+
| Field | Type | Description |
1914+
|--------|-------------------------------------|-------------|
1915+
| add | `ClusterRestartOfflinePeerUpdate[]` | List of peers that became offline (or all offline peers on initial connect) |
1916+
| remove | `ClusterRestartOfflinePeerRemove[]` | List of peers that came back online and are no longer offline |
1917+
18611918
### slot
18621919
Slots are opportunities for a leader to produce a block. A slot can be
18631920
in one of five levels, and in typical operation a slot moves through

src/disco/gui/fd_gui_peers.c

Lines changed: 123 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "fd_gui_metrics.h"
55

66
#include "../../disco/metrics/fd_metrics_base.h"
7+
#include "../../disco/shred/fd_stake_ci.h"
78

89
FD_IMPORT_BINARY( dbip_f, "src/disco/gui/dbip.bin.zst" );
910

@@ -270,6 +271,9 @@ fd_gui_peers_new( void * shmem,
270271
build_geoip_trie( ctx, _dbip_nodes, (uchar *)dbip_f, dbip_f_sz, &ctx->dbip, FD_GUI_GEOIP_DBIP_MAX_NODES );
271272
#endif
272273

274+
ctx->wfs_peers_cnt = 0UL;
275+
wfs_fresh_dlist_join( wfs_fresh_dlist_new( ctx->wfs_fresh_dlist ) );
276+
273277
return shmem;
274278
}
275279

@@ -630,6 +634,55 @@ geoip_lookup( fd_gui_ip_db_t const * ip_db,
630634

631635
#endif
632636

637+
#define SORT_NAME wfs_peer_sort
638+
#define SORT_KEY_T fd_gui_wfs_offline_peer_t
639+
#define SORT_BEFORE(a,b) (memcmp( (a).identity_key.uc, (b).identity_key.uc, 32UL )<0)
640+
#include "../../util/tmpl/fd_sort.c"
641+
642+
static void
643+
wfs_handle_contact_info_update( fd_gui_peers_ctx_t * peers,
644+
fd_pubkey_t const * identity,
645+
long wallclock_nanos ) {
646+
if( FD_LIKELY( !peers->wfs_peers_cnt ) ) return;
647+
648+
ulong idx = wfs_peer_sort_split( peers->wfs_peers, peers->wfs_peers_cnt, (fd_gui_wfs_offline_peer_t){ .identity_key = *identity } );
649+
if( FD_UNLIKELY( idx>=peers->wfs_peers_cnt || memcmp( identity->uc, peers->wfs_peers[ idx ].identity_key.uc, sizeof(fd_pubkey_t) ) ) ) return;
650+
651+
fd_gui_wfs_offline_peer_t * wp = &peers->wfs_peers[ idx ];
652+
wp->wallclock_nanos = wallclock_nanos;
653+
654+
if( !wp->is_online ) {
655+
wp->is_online = 1;
656+
wfs_fresh_dlist_idx_push_tail( peers->wfs_fresh_dlist, idx, peers->wfs_peers );
657+
658+
fd_gui_peers_printf_wfs_offline_update( peers, NULL, 0UL, &idx, 1UL );
659+
fd_http_server_ws_broadcast( peers->http );
660+
} else {
661+
wfs_fresh_dlist_idx_remove( peers->wfs_fresh_dlist, idx, peers->wfs_peers );
662+
wfs_fresh_dlist_idx_push_tail( peers->wfs_fresh_dlist, idx, peers->wfs_peers );
663+
}
664+
}
665+
666+
static void
667+
wfs_handle_contact_info_remove( fd_gui_peers_ctx_t * peers,
668+
fd_pubkey_t const * identity ) {
669+
if( FD_LIKELY( !peers->wfs_peers_cnt ) ) return;
670+
671+
ulong idx = wfs_peer_sort_split( peers->wfs_peers, peers->wfs_peers_cnt,
672+
(fd_gui_wfs_offline_peer_t){ .identity_key = *identity } );
673+
if( FD_UNLIKELY( idx>=peers->wfs_peers_cnt ||
674+
memcmp( identity->uc, peers->wfs_peers[ idx ].identity_key.uc, 32UL ) ) ) return;
675+
676+
fd_gui_wfs_offline_peer_t * wp = &peers->wfs_peers[ idx ];
677+
if( wp->is_online ) {
678+
wfs_fresh_dlist_idx_remove( peers->wfs_fresh_dlist, idx, peers->wfs_peers );
679+
wp->is_online = 0;
680+
681+
fd_gui_peers_printf_wfs_offline_update( peers, &idx, 1UL, NULL, 0UL );
682+
fd_http_server_ws_broadcast( peers->http );
683+
}
684+
}
685+
633686
void
634687
fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers,
635688
fd_gossip_update_message_t const * update,
@@ -655,7 +708,7 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers,
655708
/* A new pubkey is not allowed to overwrite an existing valid index */
656709
FD_LOG_ERR(( "invariant violation: peer->pubkey.uc=%s != update->origin=%s ", ci_pk, og_pk ));
657710
}
658-
FD_TEST( peer==fd_gui_peers_node_pubkey_map_ele_query_const( peers->node_pubkey_map, (fd_pubkey_t * )update->origin, NULL, peers->contact_info_table ) );
711+
FD_TEST( peer==fd_gui_peers_node_pubkey_map_ele_query_const( peers->node_pubkey_map, (fd_pubkey_t const * )update->origin, NULL, peers->contact_info_table ) );
659712
fd_gui_peers_node_t * peer_sock = fd_gui_peers_node_sock_map_ele_query( peers->node_sock_map, &peer->contact_info.sockets[ FD_GOSSIP_CONTACT_INFO_SOCKET_GOSSIP ], NULL, peers->contact_info_table );
660713
int found = 0;
661714
for( fd_gui_peers_node_t * p = peer_sock; !!p; p=(fd_gui_peers_node_t *)fd_gui_peers_node_sock_map_ele_next_const( p, NULL, peers->contact_info_table ) ) {
@@ -669,6 +722,7 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers,
669722
/* update does nothing */
670723
if( FD_UNLIKELY( fd_gui_peers_contact_info_eq( &peer->contact_info, update->contact_info->value ) ) ) {
671724
peer->wallclock_nanos = FD_MILLI_TO_NANOSEC( update->wallclock );
725+
wfs_handle_contact_info_update( peers, (fd_pubkey_t const *)update->origin, FD_MILLI_TO_NANOSEC( update->wallclock ) );
672726
break;
673727
}
674728

@@ -696,6 +750,8 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers,
696750
/* broadcast update to WebSocket clients */
697751
fd_gui_peers_printf_nodes( peers, (int[]){ FD_GUI_PEERS_NODE_UPDATE }, (ulong[]){ update->contact_info->idx }, 1UL );
698752
fd_http_server_ws_broadcast( peers->http );
753+
754+
wfs_handle_contact_info_update( peers, (fd_pubkey_t const *)update->origin, FD_MILLI_TO_NANOSEC( update->wallclock ) );
699755
} else {
700756
#if LOGGING
701757
char _pk[ FD_BASE58_ENCODED_32_SZ ];
@@ -748,6 +804,8 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers,
748804
/* broadcast update to WebSocket clients */
749805
fd_gui_peers_printf_nodes( peers, (int[]){ FD_GUI_PEERS_NODE_ADD }, (ulong[]){ update->contact_info->idx }, 1UL );
750806
fd_http_server_ws_broadcast( peers->http );
807+
808+
wfs_handle_contact_info_update( peers, (fd_pubkey_t const *)update->origin, FD_MILLI_TO_NANOSEC( update->wallclock ) );
751809
}
752810
break;
753811
}
@@ -764,7 +822,7 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers,
764822
#ifdef FD_GUI_USE_HANDHOLDING
765823
/* invariant checks */
766824
FD_TEST( peer->valid ); /* Should have already been in the table */
767-
FD_TEST( peer==fd_gui_peers_node_pubkey_map_ele_query_const( peers->node_pubkey_map, (fd_pubkey_t * )update->origin, NULL, peers->contact_info_table ) );
825+
FD_TEST( peer==fd_gui_peers_node_pubkey_map_ele_query_const( peers->node_pubkey_map, (fd_pubkey_t const * )update->origin, NULL, peers->contact_info_table ) );
768826
fd_gui_peers_node_t * peer_sock = fd_gui_peers_node_sock_map_ele_query( peers->node_sock_map, &peer->contact_info.sockets[ FD_GOSSIP_CONTACT_INFO_SOCKET_GOSSIP ], NULL, peers->contact_info_table );
769827
int found = 0;
770828
for( fd_gui_peers_node_t const * p = peer_sock; !!p; p=(fd_gui_peers_node_t const *)fd_gui_peers_node_sock_map_ele_next_const( p, NULL, peers->contact_info_table ) ) {
@@ -775,6 +833,8 @@ fd_gui_peers_handle_gossip_update( fd_gui_peers_ctx_t * peers,
775833
}
776834
FD_TEST( found );
777835
#endif
836+
wfs_handle_contact_info_remove( peers, (fd_pubkey_t *)update->origin );
837+
778838
fd_gui_peers_live_table_idx_remove ( peers->live_table, update->contact_info_remove->idx, peers->contact_info_table );
779839
fd_gui_peers_bandwidth_tracking_idx_remove ( peers->bw_tracking, update->contact_info_remove->idx, peers->contact_info_table );
780840
fd_gui_peers_node_sock_map_idx_remove_fast ( peers->node_sock_map, update->contact_info_remove->idx, peers->contact_info_table );
@@ -1030,6 +1090,38 @@ fd_gui_peers_handle_config_account( fd_gui_peers_ctx_t * peers,
10301090
fd_gui_peers_node_info_map_ele_insert( peers->node_info_map, node_info, peers->node_info_pool );
10311091
}
10321092

1093+
void
1094+
fd_gui_peers_handle_manifest( fd_gui_peers_ctx_t * peers,
1095+
fd_snapshot_manifest_t const * manifest ) {
1096+
fd_vote_stake_weight_t scratch[ FD_RUNTIME_MAX_VOTE_ACCOUNTS ];
1097+
ulong scratch_cnt = 0UL;
1098+
for( ulong i=0UL; i<manifest->vote_accounts_len; i++ ) {
1099+
if( FD_UNLIKELY( manifest->vote_accounts[ i ].stake==0UL ) ) continue;
1100+
fd_memcpy( scratch[ scratch_cnt ].id_key.uc, manifest->vote_accounts[ i ].node_account_pubkey, 32UL );
1101+
fd_memcpy( scratch[ scratch_cnt ].vote_key.uc, manifest->vote_accounts[ i ].vote_account_pubkey, 32UL );
1102+
scratch[ scratch_cnt ].stake = manifest->vote_accounts[ i ].stake;
1103+
scratch_cnt++;
1104+
}
1105+
1106+
/* Mirrors gossip WFS logic */
1107+
fd_stake_weight_t id_weights[ FD_RUNTIME_MAX_VOTE_ACCOUNTS ];
1108+
ulong id_cnt = compute_id_weights_from_vote_weights( id_weights, scratch, scratch_cnt );
1109+
1110+
/* Restore invariant: sorted by identity key */
1111+
fd_stake_weight_key_sort_inplace( id_weights, id_cnt );
1112+
1113+
for( ulong i=0UL; i<id_cnt; i++ ) {
1114+
peers->wfs_peers[ i ].identity_key = id_weights[ i ].key;
1115+
peers->wfs_peers[ i ].stake = id_weights[ i ].stake;
1116+
peers->wfs_peers[ i ].is_online = 0;
1117+
peers->wfs_peers[ i ].wallclock_nanos = 0L;
1118+
peers->wfs_peers[ i ].fresh_prev = ULONG_MAX;
1119+
peers->wfs_peers[ i ].fresh_next = ULONG_MAX;
1120+
}
1121+
peers->wfs_peers_cnt = id_cnt;
1122+
wfs_fresh_dlist_join( wfs_fresh_dlist_new( peers->wfs_fresh_dlist ) );
1123+
}
1124+
10331125

10341126
static void
10351127
fd_gui_peers_viewport_snap( fd_gui_peers_ctx_t * peers, ulong ws_conn_id ) {
@@ -1296,6 +1388,24 @@ int
12961388
fd_gui_peers_poll( fd_gui_peers_ctx_t * peers, long now ) {
12971389
int did_work = 0;
12981390

1391+
ulong evicted_cnt = 0UL;
1392+
while( FD_UNLIKELY( peers->wfs_peers_cnt && !wfs_fresh_dlist_is_empty( peers->wfs_fresh_dlist, peers->wfs_peers ) ) ) {
1393+
ulong head_idx = wfs_fresh_dlist_idx_peek_head( peers->wfs_fresh_dlist, peers->wfs_peers );
1394+
fd_gui_wfs_offline_peer_t * oldest = &peers->wfs_peers[ head_idx ];
1395+
if( oldest->wallclock_nanos > now - 15L*1000L*1000L*1000L ) break;
1396+
1397+
wfs_fresh_dlist_idx_pop_head( peers->wfs_fresh_dlist, peers->wfs_peers );
1398+
oldest->is_online = 0;
1399+
1400+
peers->scratch.wfs_peers[ evicted_cnt++ ] = head_idx;
1401+
if( FD_UNLIKELY( evicted_cnt>=256UL ) ) break;
1402+
}
1403+
if( FD_UNLIKELY( evicted_cnt ) ) {
1404+
fd_gui_peers_printf_wfs_offline_update( peers, peers->scratch.wfs_peers, evicted_cnt, NULL, 0UL );
1405+
fd_http_server_ws_broadcast( peers->http );
1406+
return 1; /* preserve STEM_BURST */
1407+
}
1408+
12991409
/* update client viewports in a round-robin */
13001410
if( FD_UNLIKELY( fd_gui_peers_ws_conn_rr_advance( peers, now ) ) ) {
13011411
FD_TEST( peers->client_viewports[ peers->active_ws_conn_id ].connected );
@@ -1396,7 +1506,7 @@ fd_gui_peers_poll( fd_gui_peers_ctx_t * peers, long now ) {
13961506
fd_http_server_ws_broadcast( peers->http );
13971507

13981508
peers->next_gossip_stats_update_nanos = now + (FD_GUI_PEERS_GOSSIP_STATS_UPDATE_INTERVAL_MILLIS * 1000000L);
1399-
did_work = 1;
1509+
return 1; /* preserve STEM_BURST */
14001510
}
14011511

14021512
return did_work;
@@ -1415,6 +1525,16 @@ fd_gui_peers_ws_open( fd_gui_peers_ctx_t * peers,
14151525

14161526
fd_gui_peers_printf_node_all( peers );
14171527
FD_TEST( !fd_http_server_ws_send( peers->http, ws_conn_id ) );
1528+
1529+
/* Send initial WFS offline peer snapshot if in WFS phase */
1530+
if( FD_UNLIKELY( peers->wfs_peers_cnt ) ) {
1531+
ulong added_cnt = 0UL;
1532+
for( ulong i=0UL; i<peers->wfs_peers_cnt; i++ ) {
1533+
if( FD_UNLIKELY( peers->wfs_peers[ i ].is_online ) ) peers->scratch.wfs_peers[ added_cnt++ ] = i;
1534+
}
1535+
fd_gui_peers_printf_wfs_offline_update( peers, peers->scratch.wfs_peers, peers->wfs_peers_cnt, NULL, 0UL );
1536+
FD_TEST( !fd_http_server_ws_send( peers->http, ws_conn_id ) );
1537+
}
14181538
}
14191539

14201540
void

src/disco/gui/fd_gui_peers.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "../../flamenco/runtime/fd_runtime_const.h"
2222

2323
#include "../../waltz/http/fd_http_server.h"
24+
#include "../../discof/restore/utils/fd_ssmsg.h"
2425
#include "../topo/fd_topo.h"
2526

2627
#if FD_HAS_ZSTD
@@ -85,6 +86,23 @@ struct fd_gui_geoip_node {
8586

8687
typedef struct fd_gui_geoip_node fd_gui_geoip_node_t;
8788

89+
struct fd_gui_wfs_offline_peer {
90+
fd_pubkey_t identity_key;
91+
ulong stake;
92+
int is_online;
93+
long wallclock_nanos;
94+
95+
ulong fresh_prev;
96+
ulong fresh_next;
97+
};
98+
typedef struct fd_gui_wfs_offline_peer fd_gui_wfs_offline_peer_t;
99+
100+
#define DLIST_NAME wfs_fresh_dlist
101+
#define DLIST_ELE_T fd_gui_wfs_offline_peer_t
102+
#define DLIST_PREV fresh_prev
103+
#define DLIST_NEXT fresh_next
104+
#include "../../util/tmpl/fd_dlist.c"
105+
88106
#define FD_GUI_PEERS_NODE_NOP (0)
89107
#define FD_GUI_PEERS_NODE_ADD (1)
90108
#define FD_GUI_PEERS_NODE_UPDATE (2)
@@ -393,13 +411,20 @@ struct fd_gui_peers_ctx {
393411
int actions[ FD_CONTACT_INFO_TABLE_SIZE ];
394412
ulong idxs [ FD_CONTACT_INFO_TABLE_SIZE ];
395413
};
414+
struct {
415+
ulong wfs_peers[ FD_RUNTIME_MAX_VOTE_ACCOUNTS ];
416+
};
396417
} scratch;
397418

398419
#if FD_HAS_ZSTD
399420
ZSTD_DCtx * zstd_dctx;
400421
#endif
401422

402423
fd_gui_ip_db_t dbip;
424+
425+
fd_gui_wfs_offline_peer_t wfs_peers[ FD_RUNTIME_MAX_VOTE_ACCOUNTS ];
426+
ulong wfs_peers_cnt;
427+
wfs_fresh_dlist_t wfs_fresh_dlist[ 1 ];
403428
};
404429

405430
typedef struct fd_gui_peers_ctx fd_gui_peers_ctx_t;
@@ -474,6 +499,10 @@ fd_gui_peers_handle_config_account( fd_gui_peers_ctx_t * peers,
474499
uchar const * data,
475500
ulong sz );
476501

502+
void
503+
fd_gui_peers_handle_manifest( fd_gui_peers_ctx_t * peers,
504+
fd_snapshot_manifest_t const * manifest );
505+
477506
/* fd_gui_peers_ws_message handles incoming websocket request payloads
478507
requesting peer-related responses. ws_conn_id is the connection id
479508
of the requester. data is a pointer to the start of the

0 commit comments

Comments
 (0)