Skip to content

Commit ad14fee

Browse files
committed
added tps reporter
1 parent 285995e commit ad14fee

File tree

4 files changed

+215
-9
lines changed

4 files changed

+215
-9
lines changed

Cargo.lock

Lines changed: 119 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

workflows/Cargo.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ authors = ["Erhan Tezcan <[email protected]>"]
1010
[dependencies]
1111
# ollama-rs is re-exported from ollama-workflows as well
1212
ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows" }
13+
env_logger.workspace = true
1314

1415
# async stuff
1516
tokio-util.workspace = true
@@ -29,7 +30,13 @@ rand.workspace = true
2930
log.workspace = true
3031
eyre.workspace = true
3132

33+
# system info
34+
sysinfo = "0.32.0"
35+
3236
[dev-dependencies]
3337
# only used for tests
34-
env_logger.workspace = true
3538
dotenvy.workspace = true
39+
40+
[[bin]]
41+
name = "tps"
42+
path = "src/bin/tps.rs"

workflows/src/bin/tps.rs

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,91 @@
1+
use dkn_workflows::{DriaWorkflowsConfig, OllamaConfig};
2+
use ollama_workflows::ollama_rs::{
3+
generation::{completion::request::GenerationRequest, options::GenerationOptions},
4+
Ollama,
5+
};
6+
use sysinfo::{CpuRefreshKind, RefreshKind, System};
7+
18
#[tokio::main]
29
async fn main() {
3-
///////
4-
// loop over all Ollama models
5-
// report TPS to console
10+
// initialize logger
11+
env_logger::init();
12+
13+
let cfg = DriaWorkflowsConfig::new_from_csv("finalend/hermes-3-llama-3.1:8b-q8_0,phi3:14b-medium-4k-instruct-q4_1,phi3:14b-medium-128k-instruct-q4_1,phi3.5:3.8b,phi3.5:3.8b-mini-instruct-fp16,gemma2:9b-instruct-q8_0,gemma2:9b-instruct-fp16,llama3.1:latest,llama3.1:8b-instruct-q8_0,llama3.1:8b-instruct-fp16,llama3.1:70b-instruct-q4_0,llama3.1:70b-instruct-q8_0,llama3.2:1b,llama3.2:3b,qwen2.5:7b-instruct-q5_0,qwen2.5:7b-instruct-fp16,qwen2.5:32b-instruct-fp16,qwen2.5-coder:1.5b,qwen2.5-coder:7b-instruct,llama3.2:3b,qwen2.5-coder:7b-instruct-q8_0,qwen2.5-coder:7b-instruct-fp16,deepseek-coder:6.7b,mixtral:8x7b");
14+
let config = OllamaConfig::default();
15+
let ollama = Ollama::new(config.host, config.port);
16+
17+
log::info!("Starting...");
18+
// ensure that all lists of CPUs and processes are filled
19+
let mut system = System::new_all();
20+
// update all information of the system
21+
system.refresh_all();
22+
23+
log::debug!("Getting system information...");
24+
let brand = system.cpus()[0].brand().to_string();
25+
let os_name = System::name().unwrap_or_else(|| "Unknown".to_string());
26+
let os_version = System::long_os_version().unwrap_or_else(|| "Unknown".to_string());
27+
let cpu_usage = system.global_cpu_usage();
28+
let total_memory = system.total_memory();
29+
let used_memory = system.used_memory();
30+
31+
for (_, model) in cfg.models {
32+
log::info!("Pulling model: {}", model);
33+
34+
// pull model
35+
match ollama.pull_model(model.to_string(), false).await {
36+
Ok(status) => log::info!("Status: {}", status.message),
37+
Err(err) => {
38+
log::error!("Failed to pull model {}: {:?}", model, err);
39+
}
40+
}
41+
42+
log::debug!("Creating request...");
43+
// create dummy request
44+
let mut generation_request =
45+
GenerationRequest::new(model.to_string(), "compute 6780 * 1200".to_string());
46+
47+
if let Ok(num_thread) = std::env::var("OLLAMA_NUM_THREAD") {
48+
generation_request = generation_request.options(
49+
GenerationOptions::default().num_thread(
50+
num_thread
51+
.parse()
52+
.expect("num threads should be a positive integer"),
53+
),
54+
);
55+
}
656

7-
///////
8-
// report machine info here as well, such as OS / CPU / RAM / Ollama version
57+
// generate response
58+
match ollama.generate(generation_request).await {
59+
Ok(response) => {
60+
log::debug!("Got response for model {}", model);
61+
// compute TPS
62+
let tps = (response.eval_count.unwrap_or_default() as f64)
63+
/ (response.eval_duration.unwrap_or(1) as f64)
64+
* 1_000_000_000f64;
65+
// report machine info
66+
log::info!(
67+
"\n Model: {} \n TPS: {} \n OS: {} {} \n Version: {} \n CPU Usage: % {} \n Total Memory: {} KB \n Used Memory: {} KB ",
68+
model,
69+
tps,
70+
brand,
71+
os_name,
72+
os_version,
73+
cpu_usage,
74+
total_memory,
75+
used_memory,
76+
);
77+
}
78+
Err(e) => {
79+
log::warn!("Ignoring model {}: Workflow failed with error {}", model, e);
80+
}
81+
}
82+
// refresh CPU usage (https://docs.rs/sysinfo/latest/sysinfo/struct.Cpu.html#method.cpu_usage)
83+
system =
84+
System::new_with_specifics(RefreshKind::new().with_cpu(CpuRefreshKind::everything()));
85+
// wait a bit because CPU usage is based on diff
86+
std::thread::sleep(sysinfo::MINIMUM_CPU_UPDATE_INTERVAL);
87+
// refresh CPUs again to get actual value
88+
system.refresh_cpu_usage();
89+
}
90+
log::info!("Finished");
991
}

workflows/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
mod providers;
2+
pub use providers::OllamaConfig;
23

34
mod apis;
45

0 commit comments

Comments
 (0)