Skip to content

Commit 87c1ddb

Browse files
authored
Merge pull request #104 from firstbatchxyz/erhant/autonat-identify-fixes
use observed_addr, rfk cancellations, smol fixes
2 parents 7fe02d2 + 85e4290 commit 87c1ddb

File tree

17 files changed

+598
-438
lines changed

17 files changed

+598
-438
lines changed

Cargo.lock

Lines changed: 12 additions & 230 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "dkn-compute"
3-
version = "0.1.5"
3+
version = "0.1.6"
44
edition = "2021"
55
license = "Apache-2.0"
66
readme = "README.md"
@@ -10,6 +10,9 @@ readme = "README.md"
1010
inherits = "release"
1111
debug = true
1212

13+
[features]
14+
profiling = []
15+
1316
[dependencies]
1417
tokio-util = { version = "0.7.10", features = ["rt"] }
1518
tokio = { version = "1", features = ["macros", "rt-multi-thread", "signal"] }
@@ -28,6 +31,7 @@ url = "2.5.0"
2831
urlencoding = "2.1.3"
2932
uuid = { version = "1.8.0", features = ["v4"] }
3033
rand = "0.8.5"
34+
semver = "1.0.23"
3135

3236
# logging
3337
env_logger = "0.11.3"
@@ -41,10 +45,11 @@ sha3 = "0.10.8"
4145
fastbloom-rs = "0.5.9"
4246

4347
# workflows
44-
ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows", rev = "25467d2" }
48+
ollama-workflows = { git = "https://github.com/andthattoo/ollama-workflows", rev = "d6b2e1e" }
4549

4650
# peer-to-peer
4751
libp2p = { git = "https://github.com/anilaltuner/rust-libp2p.git", rev = "be2ed55", features = [
52+
# libp2p = { version = "0.54.1", features = [
4853
"dcutr",
4954
"ping",
5055
"relay",
@@ -60,11 +65,9 @@ libp2p = { git = "https://github.com/anilaltuner/rust-libp2p.git", rev = "be2ed5
6065
"quic",
6166
"kad",
6267
] }
63-
64-
libp2p-identity = { version = "0.2.9", features = ["secp256k1", "ed25519"] }
68+
libp2p-identity = { version = "0.2.9", features = ["secp256k1"] }
6569
tracing = { version = "0.1.40" }
6670
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
67-
public-ip = "0.2.2"
6871

6972

7073
[dev-dependencies]

Makefile

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,19 @@ debug:
1919

2020
.PHONY: trace # | Run with crate-level TRACE logging
2121
trace:
22-
RUST_LOG=none,dkn_compute=trace cargo run
22+
RUST_LOG=none,dkn_compute=trace,libp2p=debug cargo run
2323

2424
.PHONY: build # | Build
2525
build:
2626
cargo build
2727

28-
.PHONY: profile # | Profile with flamegraph at dev level
29-
profile:
30-
cargo flamegraph --root --profile=profiling
28+
.PHONY: profile-cpu # | Profile CPU usage with flamegraph
29+
profile-cpu:
30+
cargo flamegraph --root --profile=profiling --features=profiling
31+
32+
.PHONY: profile-mem # | Profile memory usage with instruments
33+
profile-mem:
34+
cargo instruments --profile=profiling --features=profiling -t Leaks
3135

3236
.PHONY: version # | Print version
3337
version:

README.md

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@
2828

2929
## About
3030

31-
A **Dria Compute Node** is a unit of computation within the Dria Knowledge Network. It's purpose is to process tasks given by the **Dria Admin Node**, and receive rewards for providing correct results.
32-
33-
To get started, [setup](#setup) your envrionment and then see [usage](#usage) to run the node.
31+
A **Dria Compute Node** is a unit of computation within the Dria Knowledge Network. It's purpose is to process tasks given by the **Dria Admin Node**. To get started, [setup](#setup) your envrionment and then see [usage](#usage) to run the node.
3432

3533
### Tasks
3634

@@ -164,6 +162,8 @@ Based on the resources of your machine, you must decide which models that you wi
164162
- `phi3:14b-medium-128k-instruct-q4_1`
165163
- `phi3:3.8b`
166164
- `llama3.1:latest`
165+
- `phi3.5:3.8b`
166+
- `phi3.5:3.8b-mini-instruct-fp16`
167167

168168
#### OpenAI Models
169169

@@ -338,17 +338,21 @@ make format # rustfmt
338338

339339
### Profiling
340340

341-
To create a flamegraph of the application, do:
341+
We would like to profile both CPU and Memory usage.
342+
343+
To create a [flamegraph](https://crates.io/crates/flamegraph) of the application, do:
342344

343345
```sh
344-
make profile
346+
make profile-cpu
345347
```
346348

347349
This will create a profiling build that inherits `release` mode, except with debug information.
348350

351+
To profile memory usage, we make use of [cargo-instruments](https://crates.io/crates/cargo-instruments).
352+
349353
> [!NOTE]
350354
>
351-
> Profiling requires superuser access.
355+
> CPU profiling may require super-user access.
352356
353357
## License
354358

compose.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,19 @@ services:
44
image: "firstbatch/dkn-compute-node:latest"
55
# build: "./" # use this one instead if you want to build locally
66
environment:
7+
RUST_LOG: ${RUST_LOG:-none,dkn_compute=info}
8+
# Dria
79
DKN_WALLET_SECRET_KEY: ${DKN_WALLET_SECRET_KEY}
810
DKN_ADMIN_PUBLIC_KEY: ${DKN_ADMIN_PUBLIC_KEY}
911
DKN_MODELS: ${DKN_MODELS}
10-
RUST_LOG: ${RUST_LOG-none,dkn_compute=info}
1112
DKN_P2P_LISTEN_ADDR: ${DKN_P2P_LISTEN_ADDR}
1213
DKN_RELAY_NODES: ${DKN_RELAY_NODES}
1314
DKN_BOOTSTRAP_NODES: ${DKN_BOOTSTRAP_NODES}
15+
# Api Keys
1416
OPENAI_API_KEY: ${OPENAI_API_KEY}
1517
SERPER_API_KEY: ${SERPER_API_KEY}
1618
JINA_API_KEY: ${JINA_API_KEY}
19+
# Ollama
1720
OLLAMA_HOST: ${OLLAMA_HOST}
1821
OLLAMA_PORT: ${OLLAMA_PORT}
1922
OLLAMA_AUTO_PULL: ${OLLAMA_AUTO_PULL:-true}

src/config/mod.rs

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use ollama::OllamaConfig;
99
use ollama_workflows::ModelProvider;
1010
use openai::OpenAIConfig;
1111

12-
use std::env;
12+
use std::{env, time::Duration};
1313

1414
#[derive(Debug, Clone)]
1515
pub struct DriaComputeNodeConfig {
@@ -105,34 +105,63 @@ impl DriaComputeNodeConfig {
105105
}
106106
}
107107

108-
/// Check if the required compute services are running, e.g. if Ollama
109-
/// is detected as a provider for the chosen models, it will check that
110-
/// Ollama is running.
111-
pub async fn check_services(&self) -> Result<(), String> {
108+
/// Check if the required compute services are running.
109+
/// This has several steps:
110+
///
111+
/// - If Ollama models are used, hardcoded models are checked locally, and for
112+
/// external models, the workflow is tested with a simple task with timeout.
113+
/// - If OpenAI models are used, the API key is checked and the models are tested
114+
///
115+
/// If both type of models are used, both services are checked.
116+
/// In the end, bad models are filtered out and we simply check if we are left if any valid models at all.
117+
/// If not, an error is returned.
118+
pub async fn check_services(&mut self) -> Result<(), String> {
112119
log::info!("Checking configured services.");
120+
121+
// TODO: can refactor (provider, model) logic here
113122
let unique_providers = self.model_config.get_providers();
114123

124+
let mut good_models = Vec::new();
125+
115126
// if Ollama is a provider, check that it is running & Ollama models are pulled (or pull them)
116127
if unique_providers.contains(&ModelProvider::Ollama) {
117128
let ollama_models = self
118129
.model_config
119130
.get_models_for_provider(ModelProvider::Ollama);
120-
self.ollama_config
121-
.check(ollama_models.into_iter().map(|m| m.to_string()).collect())
131+
132+
// ensure that the models are pulled / pull them if not
133+
let good_ollama_models = self
134+
.ollama_config
135+
.check(ollama_models, Duration::from_secs(30))
122136
.await?;
137+
good_models.extend(
138+
good_ollama_models
139+
.into_iter()
140+
.map(|m| (ModelProvider::Ollama, m)),
141+
);
123142
}
124143

125144
// if OpenAI is a provider, check that the API key is set
126145
if unique_providers.contains(&ModelProvider::OpenAI) {
127146
let openai_models = self
128147
.model_config
129148
.get_models_for_provider(ModelProvider::OpenAI);
130-
self.openai_config
131-
.check(openai_models.into_iter().map(|m| m.to_string()).collect())
132-
.await?;
149+
150+
let good_openai_models = self.openai_config.check(openai_models).await?;
151+
good_models.extend(
152+
good_openai_models
153+
.into_iter()
154+
.map(|m| (ModelProvider::OpenAI, m)),
155+
);
133156
}
134157

135-
Ok(())
158+
// update good models
159+
if good_models.is_empty() {
160+
return Err("No good models found, please check logs for errors.".into());
161+
} else {
162+
self.model_config.models = good_models;
163+
Ok(())
164+
}
136165
}
137166
}
138167

src/config/models.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ impl ModelConfig {
3838
Self { models }
3939
}
4040

41+
/// Returns the models that belong to a given providers from the config.
4142
pub fn get_models_for_provider(&self, provider: ModelProvider) -> Vec<Model> {
4243
self.models
4344
.iter()

0 commit comments

Comments
 (0)