add reset timer logic

erhant · erhant · commit 683b8a9560e8 · 2025-05-30T15:05:20.000+03:00
diff --git a/compute/src/node/core.rs b/compute/src/node/core.rs
@@ -76,7 +76,16 @@ impl DriaComputeNode {
                 _ = diagnostic_refresh_interval.tick() => self.handle_diagnostic_refresh().await,
 
                 // check RPC, and get a new one if we are disconnected
-                _ = rpc_liveness_refresh_interval.tick() => self.handle_rpc_liveness_check().await,
+                _ = rpc_liveness_refresh_interval.tick() => {
+                    let is_connected = self.handle_rpc_liveness_check().await;
+                    if !is_connected {
+                        // make sure we reset the heartbeat and specs intervals so that
+                        // we dont wait the entire duration for this new connection
+                        log::info!("Connecting was re-attempted, resetting timers.");
+                        heartbeat_interval.reset_after(Duration::from_secs(5));
+                        specs_interval.reset_after(Duration::from_secs(5));
+                    }
+                },
 
                 // log points every now and then
                 _ = points_refresh_interval.tick() => self.handle_points_refresh().await,
diff --git a/compute/src/node/diagnostic.rs b/compute/src/node/diagnostic.rs
@@ -85,9 +85,10 @@ impl DriaComputeNode {
 
     /// Dials the existing RPC node if we are not connected to it.
     ///
-    /// If there is an error while doing that,
-    /// it will try to get a new RPC node and dial it.
-    pub(crate) async fn handle_rpc_liveness_check(&mut self) {
+    /// If there is an error while doing that, it will try to get a new RPC node and dial it.
+    ///
+    /// Returns `true` if the RPC is connected, `false` otherwise.
+    pub(crate) async fn handle_rpc_liveness_check(&mut self) -> bool {
         log::debug!("Checking RPC connections for diagnostics.");
 
         // check if we are connected
@@ -124,6 +125,9 @@ impl DriaComputeNode {
         } else {
             log::debug!("Connection with {} is intact.", self.dria_rpc.peer_id);
         }
+
+        // return the connection status
+        is_connected
     }
 
     /// Updates the points for the given address.
diff --git a/executor/src/executors/mod.rs b/executor/src/executors/mod.rs
@@ -1,6 +1,6 @@
-use crate::ModelProvider;
+use crate::{Model, ModelProvider, TaskBody};
 use rig::completion::PromptError;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 
 mod ollama;
 use ollama::OllamaClient;
@@ -35,7 +35,7 @@ impl DriaExecutor {
     }
 
     /// Executes the given task using the appropriate provider.
-    pub async fn execute(&self, task: crate::TaskBody) -> Result<String, PromptError> {
+    pub async fn execute(&self, task: TaskBody) -> Result<String, PromptError> {
         match self {
             DriaExecutor::Ollama(provider) => provider.execute(task).await,
             DriaExecutor::OpenAI(provider) => provider.execute(task).await,
@@ -47,7 +47,10 @@ impl DriaExecutor {
     /// Checks if the requested models exist and are available in the provider's account.
     ///
     /// For Ollama in particular, it also checks if the models are performant enough.
-    pub async fn check(&self, models: &mut HashSet<crate::Model>) -> eyre::Result<()> {
+    pub async fn check(
+        &self,
+        models: &mut HashSet<Model>,
+    ) -> eyre::Result<HashMap<Model, ModelPerformanceMetric>> {
         match self {
             DriaExecutor::Ollama(provider) => provider.check(models).await,
             DriaExecutor::OpenAI(provider) => provider.check(models).await,
@@ -56,3 +59,9 @@ impl DriaExecutor {
         }
     }
 }
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub enum ModelPerformanceMetric {
+    Latency(f64), // in seconds
+    TPS(f64),     // (eval) tokens per second
+}