9
9
create_client_config, create_client_endpoint, QuicClientCertificate , QuicError ,
10
10
} ,
11
11
transaction_batch:: TransactionBatch ,
12
- workers_cache:: { WorkerInfo , WorkersCache , WorkersCacheError } ,
12
+ workers_cache:: { maybe_shutdown_worker, WorkerInfo , WorkersCache , WorkersCacheError } ,
13
+ SendTransactionStats ,
13
14
} ,
14
15
log:: * ,
15
16
quinn:: Endpoint ,
@@ -39,6 +40,25 @@ pub enum ConnectionWorkersSchedulerError {
39
40
LeaderReceiverDropped ,
40
41
}
41
42
43
+ /// [`Fanout`] is a configuration struct that specifies how many leaders should
44
+ /// be targeted when sending transactions and connecting.
45
+ ///
46
+ /// Note, that the unit is number of leaders per
47
+ /// [`NUM_CONSECUTIVE_LEADER_SLOTS`]. It means that if the leader schedule is
48
+ /// [L1, L1, L1, L1, L1, L1, L1, L1, L2, L2, L2, L2], the leaders per
49
+ /// consecutive leader slots are [L1, L1, L2], so there are 3 of them.
50
+ ///
51
+ /// The idea of having a separate `connect` parameter is to create a set of
52
+ /// nodes to connect to in advance in order to hide the latency of opening new
53
+ /// connection. Hence, `connect` must be greater or equal to `send`
54
+ pub struct Fanout {
55
+ /// The number of leaders to target for sending transactions.
56
+ pub send : usize ,
57
+
58
+ /// The number of leaders to target for establishing connections.
59
+ pub connect : usize ,
60
+ }
61
+
42
62
/// Configuration for the [`ConnectionWorkersScheduler`].
43
63
///
44
64
/// This struct holds the necessary settings to initialize and manage connection
@@ -66,10 +86,8 @@ pub struct ConnectionWorkersSchedulerConfig {
66
86
/// connection failure.
67
87
pub max_reconnect_attempts : usize ,
68
88
69
- /// The number of slots to look ahead during the leader estimation
70
- /// procedure. Determines how far into the future leaders are estimated,
71
- /// allowing connections to be established with those leaders in advance.
72
- pub lookahead_slots : u64 ,
89
+ /// Configures the number of leaders to connect to and send transactions to.
90
+ pub leaders_fanout : Fanout ,
73
91
}
74
92
75
93
impl ConnectionWorkersScheduler {
@@ -90,7 +108,7 @@ impl ConnectionWorkersScheduler {
90
108
skip_check_transaction_age,
91
109
worker_channel_size,
92
110
max_reconnect_attempts,
93
- lookahead_slots ,
111
+ leaders_fanout ,
94
112
} : ConnectionWorkersSchedulerConfig ,
95
113
mut leader_updater : Box < dyn LeaderUpdater > ,
96
114
mut transaction_receiver : mpsc:: Receiver < TransactionBatch > ,
@@ -99,6 +117,7 @@ impl ConnectionWorkersScheduler {
99
117
let endpoint = Self :: setup_endpoint ( bind, validator_identity) ?;
100
118
debug ! ( "Client endpoint bind address: {:?}" , endpoint. local_addr( ) ) ;
101
119
let mut workers = WorkersCache :: new ( num_connections, cancel. clone ( ) ) ;
120
+ let mut send_stats_per_addr = SendTransactionStatsPerAddr :: new ( ) ;
102
121
103
122
loop {
104
123
let transaction_batch = tokio:: select! {
@@ -114,50 +133,49 @@ impl ConnectionWorkersScheduler {
114
133
break ;
115
134
}
116
135
} ;
117
- let updated_leaders = leader_updater. next_leaders ( lookahead_slots) ;
118
- let new_leader = & updated_leaders[ 0 ] ;
119
- let future_leaders = & updated_leaders[ 1 ..] ;
120
- if !workers. contains ( new_leader) {
121
- debug ! ( "No existing workers for {new_leader:?}, starting a new one." ) ;
122
- let worker = Self :: spawn_worker (
123
- & endpoint,
124
- new_leader,
125
- worker_channel_size,
126
- skip_check_transaction_age,
127
- max_reconnect_attempts,
128
- ) ;
129
- workers. push ( * new_leader, worker) . await ;
130
- }
131
136
132
- tokio:: select! {
133
- send_res = workers. send_transactions_to_address( new_leader, transaction_batch) => match send_res {
134
- Ok ( ( ) ) => ( ) ,
135
- Err ( WorkersCacheError :: ShutdownError ) => {
136
- debug!( "Connection to {new_leader} was closed, worker cache shutdown" ) ;
137
- }
138
- Err ( err) => {
139
- warn!( "Connection to {new_leader} was closed, worker error: {err}" ) ;
140
- // If we has failed to send batch, it will be dropped.
141
- }
142
- } ,
143
- ( ) = cancel. cancelled( ) => {
144
- debug!( "Cancelled: Shutting down" ) ;
145
- break ;
146
- }
147
- } ;
137
+ let updated_leaders = leader_updater. next_leaders ( leaders_fanout. connect ) ;
148
138
149
- // Regardless of who is leader, add future leaders to the cache to
150
- // hide the latency of opening the connection.
151
- for peer in future_leaders {
139
+ let ( fanout_leaders, connect_leaders) =
140
+ split_leaders ( & updated_leaders, & leaders_fanout) ;
141
+ // add future leaders to the cache to hide the latency of opening
142
+ // the connection.
143
+ for peer in connect_leaders {
152
144
if !workers. contains ( peer) {
145
+ let stats = send_stats_per_addr. entry ( peer. ip ( ) ) . or_default ( ) ;
153
146
let worker = Self :: spawn_worker (
154
147
& endpoint,
155
148
peer,
156
149
worker_channel_size,
157
150
skip_check_transaction_age,
158
151
max_reconnect_attempts,
152
+ stats. clone ( ) ,
159
153
) ;
160
- workers. push ( * peer, worker) . await ;
154
+ maybe_shutdown_worker ( workers. push ( * peer, worker) ) ;
155
+ }
156
+ }
157
+
158
+ for new_leader in fanout_leaders {
159
+ if !workers. contains ( new_leader) {
160
+ warn ! ( "No existing worker for {new_leader:?}, skip sending to this leader." ) ;
161
+ continue ;
162
+ }
163
+
164
+ let send_res =
165
+ workers. try_send_transactions_to_address ( new_leader, transaction_batch. clone ( ) ) ;
166
+ match send_res {
167
+ Ok ( ( ) ) => ( ) ,
168
+ Err ( WorkersCacheError :: ShutdownError ) => {
169
+ debug ! ( "Connection to {new_leader} was closed, worker cache shutdown" ) ;
170
+ }
171
+ Err ( WorkersCacheError :: ReceiverDropped ) => {
172
+ // Remove the worker from the cache, if the peer has disconnected.
173
+ maybe_shutdown_worker ( workers. pop ( * new_leader) ) ;
174
+ }
175
+ Err ( err) => {
176
+ warn ! ( "Connection to {new_leader} was closed, worker error: {err}" ) ;
177
+ // If we has failed to send batch, it will be dropped.
178
+ }
161
179
}
162
180
}
163
181
}
@@ -166,7 +184,7 @@ impl ConnectionWorkersScheduler {
166
184
167
185
endpoint. close ( 0u32 . into ( ) , b"Closing connection" ) ;
168
186
leader_updater. stop ( ) . await ;
169
- Ok ( workers . transaction_stats ( ) . clone ( ) )
187
+ Ok ( send_stats_per_addr )
170
188
}
171
189
172
190
/// Sets up the QUIC endpoint for the scheduler to handle connections.
@@ -191,6 +209,7 @@ impl ConnectionWorkersScheduler {
191
209
worker_channel_size : usize ,
192
210
skip_check_transaction_age : bool ,
193
211
max_reconnect_attempts : usize ,
212
+ stats : Arc < SendTransactionStats > ,
194
213
) -> WorkerInfo {
195
214
let ( txs_sender, txs_receiver) = mpsc:: channel ( worker_channel_size) ;
196
215
let endpoint = endpoint. clone ( ) ;
@@ -202,12 +221,31 @@ impl ConnectionWorkersScheduler {
202
221
txs_receiver,
203
222
skip_check_transaction_age,
204
223
max_reconnect_attempts,
224
+ stats,
205
225
) ;
206
226
let handle = tokio:: spawn ( async move {
207
227
worker. run ( ) . await ;
208
- worker. transaction_stats ( ) . clone ( )
209
228
} ) ;
210
229
211
230
WorkerInfo :: new ( txs_sender, handle, cancel)
212
231
}
213
232
}
233
+
234
+ /// Splits `leaders` into two slices based on the `fanout` configuration:
235
+ /// * the first slice contains the leaders to which transactions will be sent,
236
+ /// * the second vector contains the leaders, used to warm up connections. This
237
+ /// slice includes the the first set.
238
+ fn split_leaders < ' leaders > (
239
+ leaders : & ' leaders [ SocketAddr ] ,
240
+ fanout : & Fanout ,
241
+ ) -> ( & ' leaders [ SocketAddr ] , & ' leaders [ SocketAddr ] ) {
242
+ let Fanout { send, connect } = fanout;
243
+ assert ! ( send <= connect) ;
244
+ let send_count = ( * send) . min ( leaders. len ( ) ) ;
245
+ let connect_count = ( * connect) . min ( leaders. len ( ) ) ;
246
+
247
+ let send_slice = & leaders[ ..send_count] ;
248
+ let connect_slice = & leaders[ ..connect_count] ;
249
+
250
+ ( send_slice, connect_slice)
251
+ }
0 commit comments