Skip to content

Commit 84895b8

Browse files
committed
rm profiling feature
1 parent b35d7f4 commit 84895b8

File tree

7 files changed

+81
-47
lines changed

7 files changed

+81
-47
lines changed

Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
11
[workspace]
22
resolver = "2"
33
members = ["compute", "p2p", "workflows"]
4+
# compute node is the default member, until Oracle comes in
5+
# then, a Launcher will be the default member
46
default-members = ["compute"]
57

8+
# profiling build for flamegraphs
9+
[profile.profiling]
10+
inherits = "release"
11+
debug = true
12+
613
[workspace.package]
714
edition = "2021"
815
license = "Apache-2.0"

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ build:
2727

2828
.PHONY: profile-cpu # | Profile CPU usage with flamegraph
2929
profile-cpu:
30-
cargo flamegraph --root --profile=profiling --features=profiling
30+
DKN_EXIT_TIMEOUT=120 cargo flamegraph --root --profile=profiling
3131

3232
.PHONY: profile-mem # | Profile memory usage with instruments
3333
profile-mem:
34-
cargo instruments --profile=profiling --features=profiling -t Allocations
34+
DKN_EXIT_TIMEOUT=120 cargo instruments --profile=profiling -t Allocations
3535

3636
###############################################################################
3737
.PHONY: test # | Run tests
@@ -42,6 +42,7 @@ test:
4242
.PHONY: lint # | Run linter (clippy)
4343
lint:
4444
cargo clippy
45+
cargo clippy
4546

4647
.PHONY: format # | Run formatter (cargo fmt)
4748
format:

compute/.env.example

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
## DRIA (required) ##
2+
# Secret key of your compute node, 32 byte in hexadecimal.
3+
# e.g.: DKN_WALLET_SECRET_KEY=0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80
4+
DKN_WALLET_SECRET_KEY=
5+
# Public key of Dria Admin node, 33-byte (compressed) in hexadecimal.
6+
# You don't need to change this, simply copy and paste it.
7+
DKN_ADMIN_PUBLIC_KEY=0208ef5e65a9c656a6f92fb2c770d5d5e2ecffe02a6aade19207f75110be6ae658
8+
# model1,model2,model3,... (comma separated, case-insensitive)
9+
# example: phi3:3.8b,gpt-4o-mini
10+
DKN_MODELS=
11+
12+
## DRIA (optional) ##
13+
# P2P address, you don't need to change this unless this port is already in use.
14+
DKN_P2P_LISTEN_ADDR=/ip4/0.0.0.0/tcp/4001
15+
# Comma-separated static relay nodes
16+
DKN_RELAY_NODES=
17+
# Comma-separated static bootstrap nodes
18+
DKN_BOOTSTRAP_NODES=
19+
20+
# PROFILING ONLY: set to a number of seconds to wait before exiting
21+
# DKN_EXIT_TIMEOUT=
22+
23+
## Open AI (if used, required) ##
24+
OPENAI_API_KEY=
25+
26+
## Ollama (if used, optional) ##
27+
# do not change this, it is used by Docker
28+
OLLAMA_HOST=http://host.docker.internal
29+
# you can change the port if you would like
30+
OLLAMA_PORT=11434
31+
# if "true", automatically pull models from Ollama
32+
# if "false", you have to download manually
33+
OLLAMA_AUTO_PULL=true
34+
35+
## Additional Services (optional)
36+
SERPER_API_KEY=
37+
JINA_API_KEY=

compute/Cargo.toml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,6 @@ license.workspace = true
66
readme = "README.md"
77
authors = ["Erhan Tezcan <[email protected]>"]
88

9-
# profiling build for flamegraphs
10-
[profile.profiling]
11-
inherits = "release"
12-
debug = true
13-
14-
[features]
15-
# used by flamegraphs & instruments
16-
profiling = []
17-
189
[dependencies]
1910
tokio-util = { version = "0.7.10", features = ["rt"] }
2011
tokio = { version = "1", features = ["macros", "rt-multi-thread", "signal"] }

compute/src/config.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,14 @@ impl DriaComputeNodeConfig {
7979
let address = to_address(&public_key);
8080
log::info!("Node Address: 0x{}", hex::encode(address));
8181

82-
let model_config =
82+
let workflows =
8383
DriaWorkflowsConfig::new_from_csv(&env::var("DKN_MODELS").unwrap_or_default());
8484
#[cfg(not(test))]
85-
if model_config.models.is_empty() {
85+
if workflows.models.is_empty() {
8686
log::error!("No models were provided, make sure to restart with at least one model provided within DKN_MODELS.");
8787
panic!("No models provided.");
8888
}
89-
log::info!("Models: {:?}", model_config.models);
89+
log::info!("Models: {:?}", workflows.models);
9090

9191
let p2p_listen_addr_str = env::var("DKN_P2P_LISTEN_ADDR")
9292
.map(|addr| addr.trim_matches('"').to_string())
@@ -99,12 +99,13 @@ impl DriaComputeNodeConfig {
9999
secret_key,
100100
public_key,
101101
address,
102-
workflows: model_config,
102+
workflows,
103103
p2p_listen_addr,
104104
}
105105
}
106106

107107
/// Asserts that the configured listen address is free.
108+
/// Throws an error if the address is already in use.
108109
pub fn assert_address_not_in_use(&self) -> Result<()> {
109110
if address_in_use(&self.p2p_listen_addr) {
110111
return Err(eyre!(

compute/src/main.rs

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1+
use std::env;
2+
13
use dkn_compute::*;
2-
use eyre::Result;
4+
use eyre::{Context, Result};
35
use tokio_util::sync::CancellationToken;
46

57
#[tokio::main]
68
async fn main() -> Result<()> {
7-
if let Err(e) = dotenvy::dotenv() {
8-
log::warn!("Could not load .env file: {}", e);
9-
}
10-
9+
let dotenv_result = dotenvy::dotenv();
1110
env_logger::builder()
1211
.format_timestamp(Some(env_logger::TimestampPrecision::Millis))
1312
.init();
13+
if let Err(e) = dotenv_result {
14+
log::warn!("Could not load .env file: {}", e);
15+
}
1416

1517
log::info!(
1618
r#"
@@ -26,49 +28,45 @@ async fn main() -> Result<()> {
2628

2729
let token = CancellationToken::new();
2830
let cancellation_token = token.clone();
29-
// add cancellation check
3031
tokio::spawn(async move {
31-
// FIXME: weird feature-gating here bugs with IDE, fix this later
32-
#[cfg(feature = "profiling")]
33-
{
34-
const PROFILE_DURATION_SECS: u64 = 120;
35-
tokio::time::sleep(tokio::time::Duration::from_secs(PROFILE_DURATION_SECS)).await;
32+
if let Some(timeout_str) = env::var("DKN_EXIT_TIMEOUT").ok() {
33+
// add cancellation check
34+
let duration_secs = timeout_str.parse().unwrap_or(120);
35+
tokio::time::sleep(tokio::time::Duration::from_secs(duration_secs)).await;
3636
cancellation_token.cancel();
37+
} else {
38+
if let Err(err) = wait_for_termination(cancellation_token.clone()).await {
39+
log::error!("Error waiting for termination: {:?}", err);
40+
log::error!("Cancelling due to unexpected error.");
41+
cancellation_token.cancel();
42+
};
3743
}
38-
39-
#[cfg(not(feature = "profiling"))]
40-
if let Err(err) = wait_for_termination(cancellation_token.clone()).await {
41-
log::error!("Error waiting for termination: {:?}", err);
42-
log::error!("Cancelling due to unexpected error.");
43-
cancellation_token.cancel();
44-
};
4544
});
4645

47-
// create configurations & check required services
48-
let config = DriaComputeNodeConfig::new();
46+
// create configurations & check required services & address in use
47+
let mut config = DriaComputeNodeConfig::new();
4948
config.assert_address_not_in_use()?;
5049
let service_check_token = token.clone();
51-
let mut config_clone = config.clone();
5250
let service_check_handle = tokio::spawn(async move {
5351
tokio::select! {
5452
_ = service_check_token.cancelled() => {
5553
log::info!("Service check cancelled.");
54+
config
5655
}
57-
result = config_clone.workflows.check_services() => {
56+
result = config.workflows.check_services() => {
5857
if let Err(err) = result {
5958
log::error!("Error checking services: {:?}", err);
6059
panic!("Service check failed.")
6160
}
61+
config
6262
}
6363
}
6464
});
65+
let config = service_check_handle
66+
.await
67+
.wrap_err("error during service checks")?;
6568

66-
// wait for service check to complete
67-
if let Err(err) = service_check_handle.await {
68-
log::error!("Service check handle error: {}", err);
69-
panic!("Could not exit service check thread handle.");
70-
};
71-
69+
log::warn!("Using models: {:#?}", config.workflows.models);
7270
if !token.is_cancelled() {
7371
// launch the node
7472
let node_token = token.clone();
@@ -97,11 +95,9 @@ async fn main() -> Result<()> {
9795
Ok(())
9896
}
9997

100-
// FIXME: remove this `unused` once we have a better way to handle this
10198
/// Waits for various termination signals, and cancels the given token when the signal is received.
10299
///
103100
/// Handles Unix and Windows [target families](https://doc.rust-lang.org/reference/conditional-compilation.html#target_family).
104-
#[allow(unused)]
105101
async fn wait_for_termination(cancellation: CancellationToken) -> Result<()> {
106102
#[cfg(unix)]
107103
{

workflows/src/providers/ollama.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,10 @@ impl OllamaConfig {
8282
/// Check if requested models exist in Ollama, and then tests them using a workflow.
8383
pub async fn check(&self, external_models: Vec<Model>) -> Result<Vec<Model>> {
8484
log::info!(
85-
"Checking Ollama requirements (auto-pull {}, workflow timeout: {}s)",
85+
"Checking Ollama requirements (auto-pull {}, timeout: {}s, min tps: {})",
8686
if self.auto_pull { "on" } else { "off" },
87-
self.timeout.as_secs()
87+
self.timeout.as_secs(),
88+
self.min_tps
8889
);
8990

9091
let ollama = Ollama::new(&self.host, self.port);

0 commit comments

Comments
 (0)