firstbatchxyz
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compute/src/main.rs‎
Lines changed: 11 additions & 5 deletions b/‎compute/src/main.rs‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎compute/src/node/mod.rs‎
Lines changed: 4 additions & 2 deletions b/‎compute/src/node/mod.rs‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎compute/src/utils/specs.rs‎
Lines changed: 33 additions & 8 deletions b/‎compute/src/utils/specs.rs‎
Lines changed: 33 additions & 8 deletions
diff --git a/‎executor/Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎executor/Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎executor/src/executors/gemini.rs‎
Lines changed: 17 additions & 4 deletions b/‎executor/src/executors/gemini.rs‎
Lines changed: 17 additions & 4 deletions
diff --git a/‎executor/src/executors/mod.rs‎
Lines changed: 2 additions & 7 deletions b/‎executor/src/executors/mod.rs‎
Lines changed: 2 additions & 7 deletions
diff --git a/‎executor/src/executors/ollama.rs‎
Lines changed: 48 additions & 35 deletions b/‎executor/src/executors/ollama.rs‎
Lines changed: 48 additions & 35 deletions
@@ -86,21 +86,27 @@ async fn main() -> Result<()> {
 
     // check services & models, will exit if there is an error
     // since service check can take time, we allow early-exit here as well
-    tokio::select! {
+    let model_perf = tokio::select! {
         result = config.executors.check_services() => result,
         _ = cancellation.cancelled() => {
             log::info!("Service check cancelled, exiting.");
             return Ok(());
         }
     }?;
-    log::warn!(
-        "Using models: {}",
-        config.executors.get_model_names().join(", ")
+    log::info!(
+        "Using models: {}\n{}",
+        config.executors.get_model_names().join(", "),
+        model_perf
+            .iter()
+            .map(|(model, perf)| format!("{}: {}", model, perf))
+            .collect::<Vec<_>>()
+            .join("\n")
     );
 
     // create the node
     let batch_size = config.batch_size;
-    let (mut node, p2p, worker_batch, worker_single) = DriaComputeNode::new(config).await?;
+    let (mut node, p2p, worker_batch, worker_single) =
+        DriaComputeNode::new(config, model_perf).await?;
 
     // spawn p2p client first
     log::info!("Spawning peer-to-peer client thread.");
 
@@ -1,7 +1,8 @@
+use dkn_executor::Model;
 use dkn_p2p::{
     libp2p::PeerId, DriaP2PClient, DriaP2PCommander, DriaP2PProtocol, DriaReqResMessage,
 };
-use dkn_utils::crypto::secret_to_keypair;
+use dkn_utils::{crypto::secret_to_keypair, payloads::SpecModelPerformance};
 use eyre::Result;
 use std::collections::{HashMap, HashSet};
 use tokio::sync::mpsc;
@@ -68,6 +69,7 @@ impl DriaComputeNode {
     /// Returns the node instance and p2p client together. P2p MUST be run in a separate task before this node is used at all.
     pub async fn new(
         mut config: DriaComputeNodeConfig,
+        model_perf: HashMap<Model, SpecModelPerformance>,
     ) -> Result<(
         DriaComputeNode,
         DriaP2PClient,
@@ -124,7 +126,7 @@ impl DriaComputeNode {
         let model_names = config.executors.get_model_names();
         let points_client = DriaPointsClient::new(&config.address, &config.network)?;
 
-        let spec_collector = SpecCollector::new(model_names.clone(), config.version);
+        let spec_collector = SpecCollector::new(model_names.clone(), model_perf, config.version);
         Ok((
             DriaComputeNode {
                 config,
 
@@ -1,4 +1,10 @@
-use dkn_utils::{payloads::Specs, SemanticVersion};
+use std::collections::HashMap;
+
+use dkn_executor::Model;
+use dkn_utils::{
+    payloads::{SpecModelPerformance, Specs},
+    SemanticVersion,
+};
 use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind};
 
 pub struct SpecCollector {
@@ -7,6 +13,8 @@ pub struct SpecCollector {
     system: sysinfo::System,
     /// Used models.
     models: Vec<String>,
+    /// Model performances
+    model_perf: HashMap<String, SpecModelPerformance>,
     /// Version string.
     version: String,
     // GPU adapter infos, showing information about the available GPUs.
@@ -20,10 +28,18 @@ pub struct SpecCollector {
 // }
 
 impl SpecCollector {
-    pub fn new(models: Vec<String>, version: SemanticVersion) -> Self {
+    pub fn new(
+        models: Vec<String>,
+        model_perf: HashMap<Model, SpecModelPerformance>,
+        version: SemanticVersion,
+    ) -> Self {
         SpecCollector {
             system: sysinfo::System::new_with_specifics(Self::get_refresh_specifics()),
             models,
+            model_perf: model_perf
+                .into_iter()
+                .map(|(k, v)| (k.to_string(), v))
+                .collect(),
             version: version.to_string(),
             // gpus: wgpu::Instance::default()
             //     .enumerate_adapters(wgpu::Backends::all())
@@ -55,6 +71,7 @@ impl SpecCollector {
             lookup: public_ip_address::perform_lookup(None).await.ok(),
             models: self.models.clone(),
             version: self.version.clone(),
+            model_perf: self.model_perf.clone(),
             // gpus: self.gpus.clone(),
         }
     }
@@ -64,13 +81,18 @@ mod tests {
     use super::*;
 
     #[tokio::test]
-    async fn test_print_specs() {
+    async fn test_specs_serialization() {
         let mut spec_collector = SpecCollector::new(
-            vec!["gpt-4o".to_string()],
+            vec![Model::Gemma3_4b.to_string()],
+            HashMap::from_iter([
+                (Model::Gemma3_4b, SpecModelPerformance::PassedWithTPS(100.0)),
+                (Model::GPT4oMini, SpecModelPerformance::NotFound),
+                (Model::Gemma3_27b, SpecModelPerformance::ExecutionFailed),
+            ]),
             SemanticVersion {
-                major: 0,
-                minor: 1,
-                patch: 0,
+                major: 4,
+                minor: 5,
+                patch: 1,
             },
         );
         let specs = spec_collector.collect().await;
@@ -82,6 +104,9 @@ mod tests {
         assert!(!specs.arch.is_empty());
         assert!(specs.lookup.is_some());
         assert!(!specs.models.is_empty());
-        assert_eq!(specs.version, "0.1.0");
+        assert_eq!(specs.version, "4.5.1");
+
+        // should be serializable to JSON
+        assert!(serde_json::to_string_pretty(&specs).is_ok())
     }
 }
@@ -29,6 +29,7 @@ thiserror.workspace = true
 enum-iterator = "2.1.0"
 rig-core = "0.11.1"
 ollama-rs = { version = "0.3.0", features = ["tokio", "rustls", "stream"] }
+dkn-utils = { path = "../utils" }
 
 [dev-dependencies]
 # only used for tests
 
@@ -1,11 +1,12 @@
+use dkn_utils::payloads::SpecModelPerformance;
 use eyre::{eyre, Context, Result};
 use reqwest::Client;
 use rig::{
     completion::{Chat, PromptError},
     providers::gemini,
 };
 use serde::Deserialize;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 
 use crate::{Model, TaskBody};
 
@@ -43,8 +44,12 @@ impl GeminiClient {
     }
 
     /// Check if requested models exist & are available in the OpenAI account.
-    pub async fn check(&self, models: &mut HashSet<Model>) -> Result<()> {
+    pub async fn check(
+        &self,
+        models: &mut HashSet<Model>,
+    ) -> Result<HashMap<Model, SpecModelPerformance>> {
         let mut models_to_remove = Vec::new();
+        let mut model_performances = HashMap::new();
         log::info!("Checking Gemini requirements");
 
         // check if models exist and select those that are available
@@ -61,7 +66,10 @@ impl GeminiClient {
                     requested_model
                 );
                 models_to_remove.push(requested_model);
-            } else
+                model_performances.insert(requested_model, SpecModelPerformance::NotFound);
+                continue;
+            }
+
             // make a dummy request
             if let Err(err) = self
                 .execute(TaskBody::new_prompt("What is 2 + 2?", requested_model))
@@ -73,15 +81,20 @@ impl GeminiClient {
                     err
                 );
                 models_to_remove.push(requested_model);
+                model_performances.insert(requested_model, SpecModelPerformance::ExecutionFailed);
+                continue;
             }
+
+            // record the performance of the model
+            model_performances.insert(requested_model, SpecModelPerformance::Passed);
         }
 
         // remove models that are not available
         for model in models_to_remove.iter() {
             models.remove(model);
         }
 
-        Ok(())
+        Ok(model_performances)
     }
 
     /// Returns the list of models available to this account.
 
@@ -1,4 +1,5 @@
 use crate::{Model, ModelProvider, TaskBody};
+use dkn_utils::payloads::SpecModelPerformance;
 use rig::completion::PromptError;
 use std::collections::{HashMap, HashSet};
 
@@ -50,7 +51,7 @@ impl DriaExecutor {
     pub async fn check(
         &self,
         models: &mut HashSet<Model>,
-    ) -> eyre::Result<HashMap<Model, ModelPerformanceMetric>> {
+    ) -> eyre::Result<HashMap<Model, SpecModelPerformance>> {
         match self {
             DriaExecutor::Ollama(provider) => provider.check(models).await,
             DriaExecutor::OpenAI(provider) => provider.check(models).await,
@@ -59,9 +60,3 @@ impl DriaExecutor {
         }
     }
 }
-
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
-pub enum ModelPerformanceMetric {
-    Latency(f64), // in seconds
-    TPS(f64),     // (eval) tokens per second
-}
@@ -1,7 +1,9 @@
+use dkn_utils::payloads::SpecModelPerformance;
 use eyre::{Context, Result};
 use ollama_rs::generation::completion::request::GenerationRequest;
 use rig::completion::{Chat, PromptError};
 use rig::providers::ollama;
+use std::collections::HashMap;
 use std::time::Duration;
 use std::{collections::HashSet, env};
 
@@ -78,7 +80,10 @@ impl OllamaClient {
     }
 
     /// Check if requested models exist in Ollama & test them using a dummy prompt.
-    pub async fn check(&self, models: &mut HashSet<Model>) -> Result<()> {
+    pub async fn check(
+        &self,
+        models: &mut HashSet<Model>,
+    ) -> Result<HashMap<Model, SpecModelPerformance>> {
         log::info!(
             "Checking Ollama requirements (auto-pull {}, timeout: {}s, min tps: {})",
             if self.auto_pull { "on" } else { "off" },
@@ -101,6 +106,7 @@ impl OllamaClient {
         // check external models & pull them if available
         // iterate over models and remove bad ones
         let mut models_to_remove = Vec::new();
+        let mut model_performances = HashMap::new();
         for model in models.iter() {
             // pull the model if it is not in the local models
             if !local_models.contains(&model.to_string()) {
@@ -117,8 +123,13 @@ impl OllamaClient {
             }
 
             // test its performance
-            if !self.test_performance(model).await {
+            let perf = self.measure_tps_with_warmup(model).await;
+            if let SpecModelPerformance::PassedWithTPS(_) = perf {
+                model_performances.insert(*model, perf);
+            } else {
+                // if its anything but PassedWithTPS, remove the model
                 models_to_remove.push(*model);
+                model_performances.insert(*model, perf);
             }
         }
 
@@ -133,7 +144,7 @@ impl OllamaClient {
             log::info!("Ollama checks are finished, using models: {:#?}", models);
         }
 
-        Ok(())
+        Ok(model_performances)
     }
 
     /// Pulls a model from Ollama.
@@ -154,7 +165,7 @@ impl OllamaClient {
     ///
     /// This is to see if a given system can execute tasks for their chosen models,
     /// e.g. if they have enough RAM/CPU and such.
-    pub async fn test_performance(&self, model: &Model) -> bool {
+    pub async fn measure_tps_with_warmup(&self, model: &Model) -> SpecModelPerformance {
         const TEST_PROMPT: &str = "Please write a poem about Kapadokya.";
         const WARMUP_PROMPT: &str = "Write a short poem about hedgehogs and squirrels.";
 
@@ -171,44 +182,46 @@ impl OllamaClient {
             .await
         {
             log::warn!("Ignoring model {model}: {err}");
-            return false;
+            return SpecModelPerformance::ExecutionFailed;
         }
 
         // then, run a sample generation with timeout and measure tps
-        tokio::select! {
-            _ = tokio::time::sleep(PERFORMANCE_TIMEOUT) => {
-                log::warn!("Ignoring model {model}: Timed out");
-            },
-            result = self.ollama_rs_client.generate(GenerationRequest::new(
+        let Ok(result) = tokio::time::timeout(
+            PERFORMANCE_TIMEOUT,
+            self.ollama_rs_client.generate(GenerationRequest::new(
                 model.to_string(),
                 TEST_PROMPT.to_string(),
-            )) => {
-                match result {
-                    Ok(response) => {
-                        let tps = (response.eval_count.unwrap_or_default() as f64)
-                        / (response.eval_duration.unwrap_or(1) as f64)
-                        * 1_000_000_000f64;
-
-                        if tps >= PERFORMANCE_MIN_TPS {
-                            log::info!("Model {} passed the test with tps: {}", model, tps);
-                            return true;
-                        }
-
-                        log::warn!(
-                            "Ignoring model {}: tps too low ({:.3} < {:.3})",
-                            model,
-                            tps,
-                            PERFORMANCE_MIN_TPS
-                        );
-                    }
-                    Err(e) => {
-                        log::warn!("Ignoring model {}: Task failed with error {}", model, e);
-                    }
-                }
-            }
+            )),
+        )
+        .await
+        else {
+            log::warn!("Ignoring model {model}: Timed out");
+            return SpecModelPerformance::Timeout;
         };
 
-        false
+        // check the result
+        match result {
+            Ok(response) => {
+                let tps = (response.eval_count.unwrap_or_default() as f64)
+                    / (response.eval_duration.unwrap_or(1) as f64)
+                    * 1_000_000_000f64;
+
+                if tps >= PERFORMANCE_MIN_TPS {
+                    log::info!("Model {model} passed the test with tps: {tps}");
+                    SpecModelPerformance::PassedWithTPS(tps)
+                } else {
+                    log::warn!(
+                        "Ignoring model {model}: tps too low ({tps:.3} < {:.3})",
+                        PERFORMANCE_MIN_TPS
+                    );
+                    SpecModelPerformance::FailedWithTPS(tps)
+                }
+            }
+            Err(err) => {
+                log::warn!("Ignoring model {model} due to: {err}");
+                SpecModelPerformance::ExecutionFailed
+            }
+        }
     }
 }