1
- use eyre:: { eyre , Result } ;
1
+ use eyre:: Result ;
2
2
use std:: time:: Duration ;
3
3
use tokio_util:: sync:: CancellationToken ;
4
4
@@ -7,54 +7,48 @@ use crate::{utils::DriaMessage, DriaComputeNode};
7
7
impl DriaComputeNode {
8
8
/// Runs the main loop of the compute node.
9
9
/// This method is not expected to return until cancellation occurs for the given token.
10
- pub async fn run ( & mut self , cancellation : CancellationToken ) -> Result < ( ) > {
10
+ pub async fn run ( & mut self , cancellation : CancellationToken ) {
11
11
/// Number of seconds between refreshing for diagnostic prints.
12
12
const DIAGNOSTIC_REFRESH_INTERVAL_SECS : u64 = 30 ;
13
13
/// Number of seconds between refreshing the available nodes.
14
14
const AVAILABLE_NODES_REFRESH_INTERVAL_SECS : u64 = 10 * 60 ;
15
+ /// Number of seconds between each heartbeat sent to the RPC.
16
+ const HEARTBEAT_INTERVAL_SECS : u64 = 60 ;
15
17
16
18
// prepare durations for sleeps
17
19
let mut diagnostic_refresh_interval =
18
20
tokio:: time:: interval ( Duration :: from_secs ( DIAGNOSTIC_REFRESH_INTERVAL_SECS ) ) ;
19
- diagnostic_refresh_interval. tick ( ) . await ; // move one tick
20
21
let mut available_node_refresh_interval =
21
22
tokio:: time:: interval ( Duration :: from_secs ( AVAILABLE_NODES_REFRESH_INTERVAL_SECS ) ) ;
22
- available_node_refresh_interval. tick ( ) . await ; // move one tick
23
+ let mut heartbeat_interval =
24
+ tokio:: time:: interval ( Duration :: from_secs ( HEARTBEAT_INTERVAL_SECS ) ) ;
25
+
26
+ // move each one tick
27
+ available_node_refresh_interval. tick ( ) . await ;
28
+ diagnostic_refresh_interval. tick ( ) . await ;
29
+ heartbeat_interval. tick ( ) . await ;
23
30
24
31
loop {
25
32
tokio:: select! {
26
33
// a task is completed by the worker & should be responded to the requesting peer
27
34
task_response_msg_opt = self . task_output_rx. recv( ) => {
28
- let task_response_msg = task_response_msg_opt. ok_or(
29
- eyre!( "Publish channel closed unexpectedly, we still have {} batch and {} single tasks." , self . pending_tasks_batch. len( ) , self . pending_tasks_single. len( ) )
30
- ) ?; {
31
- if let Err ( e) = self . handle_task_response( task_response_msg) . await {
32
- log:: error!( "Error responding to task: {:?}" , e) ;
33
- }
35
+ if let Some ( task_response_msg) = task_response_msg_opt {
36
+ if let Err ( e) = self . send_task_output( task_response_msg) . await {
37
+ log:: error!( "Error responding to task: {:?}" , e) ;
38
+ }
39
+ } else {
40
+ log:: error!( "task_output_rx channel closed unexpectedly, we still have {} batch and {} single tasks." , self . pending_tasks_batch. len( ) , self . pending_tasks_single. len( ) ) ;
41
+ break ;
34
42
}
35
43
} ,
36
44
37
- // a GossipSub message is received from the channel
38
- // // this is expected to be sent by the p2p client
39
- // gossipsub_msg_opt = self.gossip_message_rx.recv() => {
40
- // let (propagation_peer_id, message_id, message) = gossipsub_msg_opt.ok_or(eyre!("message_rx channel closed unexpectedly"))?;
41
-
42
- // // handle the message, returning a message acceptance for the received one
43
- // let acceptance = self.handle_message((propagation_peer_id, &message_id, message)).await;
44
-
45
- // // validate the message based on the acceptance
46
- // // cant do anything but log if this gives an error as well
47
- // if let Err(e) = self.p2p.validate_message(&message_id, &propagation_peer_id, acceptance).await {
48
- // log::error!("Error validating message {}: {:?}", message_id, e);
49
- // }
50
-
51
- // },
52
-
53
- // a Request is received from the channel, sent by p2p client
54
- request_msg_opt = self . request_rx. recv( ) => {
55
- let request = request_msg_opt. ok_or( eyre!( "request_rx channel closed unexpectedly" ) ) ?;
56
- if let Err ( e) = self . handle_request( request) . await {
57
- log:: error!( "Error handling request: {:?}" , e) ;
45
+ // a Request or Response is received by the p2p client
46
+ reqres_msg_opt = self . reqres_rx. recv( ) => {
47
+ if let Some ( ( peer_id, message) ) = reqres_msg_opt {
48
+ self . handle_reqres( peer_id, message) . await ;
49
+ } else {
50
+ log:: error!( "reqres_rx channel closed unexpectedly." ) ;
51
+ break ;
58
52
}
59
53
} ,
60
54
@@ -64,19 +58,28 @@ impl DriaComputeNode {
64
58
// available nodes are refreshed every now and then
65
59
_ = available_node_refresh_interval. tick( ) => self . handle_available_nodes_refresh( ) . await ,
66
60
61
+ _ = heartbeat_interval. tick( ) => {
62
+ if let Err ( e) = self . send_heartbeat( ) . await {
63
+ log:: error!( "Error making heartbeat: {:?}" , e) ;
64
+ }
65
+ } ,
66
+
67
67
// check if the cancellation token is cancelled
68
68
// this is expected to be cancelled by the main thread with signal handling
69
- _ = cancellation. cancelled( ) => break ,
69
+ _ = cancellation. cancelled( ) => {
70
+ log:: info!( "Cancellation received, shutting down the node." ) ;
71
+ break ;
72
+ } ,
70
73
}
71
74
}
72
75
73
76
// print one final diagnostic as a summary
74
77
self . handle_diagnostic_refresh ( ) . await ;
75
78
76
79
// shutdown channels
77
- self . shutdown ( ) . await ? ;
78
-
79
- Ok ( ( ) )
80
+ if let Err ( e ) = self . shutdown ( ) . await {
81
+ log :: error! ( "Could not shutdown the node gracefully: {:?}" , e ) ;
82
+ }
80
83
}
81
84
82
85
/// Shorthand method to create a signed message with the given data and topic.
@@ -95,9 +98,12 @@ impl DriaComputeNode {
95
98
log:: debug!( "Sending shutdown command to p2p client." ) ;
96
99
self . p2p . shutdown ( ) . await ?;
97
100
98
- log:: debug!( "Closing task response channel." ) ;
101
+ log:: debug!( "Closing task output channel." ) ;
99
102
self . task_output_rx . close ( ) ;
100
103
104
+ log:: debug!( "Closing reqres channel." ) ;
105
+ self . reqres_rx . close ( ) ;
106
+
101
107
Ok ( ( ) )
102
108
}
103
109
}
0 commit comments