Skip to content

Commit e39253c

Browse files
authored
PVF: Add worker check during tests and benches (#1771)
1 parent 8cba5b9 commit e39253c

File tree

19 files changed

+285
-113
lines changed

19 files changed

+285
-113
lines changed

Cargo.lock

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

polkadot/cli/src/command.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ impl SubstrateCli for Cli {
5050

5151
fn impl_version() -> String {
5252
let commit_hash = env!("SUBSTRATE_CLI_COMMIT_HASH");
53-
format!("{NODE_VERSION}-{commit_hash}")
53+
format!("{}-{commit_hash}", NODE_VERSION)
5454
}
5555

5656
fn description() -> String {

polkadot/node/core/pvf/Cargo.toml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ cfg-if = "1.0"
1212
futures = "0.3.21"
1313
futures-timer = "3.0.2"
1414
gum = { package = "tracing-gum", path = "../../gum" }
15+
is_executable = "1.0.1"
1516
libc = "0.2.139"
1617
pin-project = "1.0.9"
1718
rand = "0.8.5"
@@ -34,19 +35,23 @@ sp-maybe-compressed-blob = { path = "../../../../substrate/primitives/maybe-comp
3435
polkadot-node-core-pvf-prepare-worker = { path = "prepare-worker", optional = true }
3536
polkadot-node-core-pvf-execute-worker = { path = "execute-worker", optional = true }
3637

37-
[build-dependencies]
38-
substrate-build-script-utils = { path = "../../../../substrate/utils/build-script-utils" }
39-
4038
[dev-dependencies]
4139
assert_matches = "1.4.0"
40+
criterion = { version = "0.4.0", default-features = false, features = ["cargo_bench_support", "async_tokio"] }
4241
hex-literal = "0.4.1"
4342
polkadot-node-core-pvf-common = { path = "common", features = ["test-utils"] }
44-
# For the puppet worker, depend on ourselves with the test-utils feature.
43+
# For benches and integration tests, depend on ourselves with the test-utils
44+
# feature.
4545
polkadot-node-core-pvf = { path = ".", features = ["test-utils"] }
46+
rococo-runtime = { path = "../../../runtime/rococo" }
4647

4748
adder = { package = "test-parachain-adder", path = "../../../parachain/test-parachains/adder" }
4849
halt = { package = "test-parachain-halt", path = "../../../parachain/test-parachains/halt" }
4950

51+
[[bench]]
52+
name = "host_prepare_rococo_runtime"
53+
harness = false
54+
5055
[features]
5156
ci-only-tests = []
5257
jemalloc-allocator = [ "polkadot-node-core-pvf-common/jemalloc-allocator" ]
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Copyright (C) Parity Technologies (UK) Ltd.
2+
// This file is part of Polkadot.
3+
4+
// Polkadot is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
9+
// Polkadot is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU General Public License for more details.
13+
14+
// You should have received a copy of the GNU General Public License
15+
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
16+
17+
//! Benchmarks for preparation through the host. We use a real PVF to get realistic results.
18+
19+
use criterion::{criterion_group, criterion_main, BatchSize, Criterion, SamplingMode};
20+
use parity_scale_codec::Encode;
21+
use polkadot_node_core_pvf::{
22+
start, testing, Config, Metrics, PrepareError, PrepareJobKind, PrepareStats, PvfPrepData,
23+
ValidationError, ValidationHost,
24+
};
25+
use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams, ValidationResult};
26+
use polkadot_primitives::ExecutorParams;
27+
use rococo_runtime::WASM_BINARY;
28+
use std::time::Duration;
29+
use tokio::{runtime::Handle, sync::Mutex};
30+
31+
const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3);
32+
const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30);
33+
34+
struct TestHost {
35+
host: Mutex<ValidationHost>,
36+
}
37+
38+
impl TestHost {
39+
fn new_with_config<F>(handle: &Handle, f: F) -> Self
40+
where
41+
F: FnOnce(&mut Config),
42+
{
43+
let (prepare_worker_path, execute_worker_path) = testing::get_and_check_worker_paths();
44+
45+
let cache_dir = tempfile::tempdir().unwrap();
46+
let mut config = Config::new(
47+
cache_dir.path().to_owned(),
48+
None,
49+
prepare_worker_path,
50+
execute_worker_path,
51+
);
52+
f(&mut config);
53+
let (host, task) = start(config, Metrics::default());
54+
let _ = handle.spawn(task);
55+
Self { host: Mutex::new(host) }
56+
}
57+
58+
async fn precheck_pvf(
59+
&self,
60+
code: &[u8],
61+
executor_params: ExecutorParams,
62+
) -> Result<PrepareStats, PrepareError> {
63+
let (result_tx, result_rx) = futures::channel::oneshot::channel();
64+
65+
let code = sp_maybe_compressed_blob::decompress(code, 16 * 1024 * 1024)
66+
.expect("Compression works");
67+
68+
self.host
69+
.lock()
70+
.await
71+
.precheck_pvf(
72+
PvfPrepData::from_code(
73+
code.into(),
74+
executor_params,
75+
TEST_PREPARATION_TIMEOUT,
76+
PrepareJobKind::Prechecking,
77+
),
78+
result_tx,
79+
)
80+
.await
81+
.unwrap();
82+
result_rx.await.unwrap()
83+
}
84+
}
85+
86+
fn host_prepare_rococo_runtime(c: &mut Criterion) {
87+
polkadot_node_core_pvf_common::sp_tracing::try_init_simple();
88+
89+
let rt = tokio::runtime::Runtime::new().unwrap();
90+
91+
let blob = WASM_BINARY.expect("You need to build the WASM binaries to run the tests!");
92+
let pvf = match sp_maybe_compressed_blob::decompress(&blob, 64 * 1024 * 1024) {
93+
Ok(code) => PvfPrepData::from_code(
94+
code.into_owned(),
95+
ExecutorParams::default(),
96+
Duration::from_secs(360),
97+
PrepareJobKind::Compilation,
98+
),
99+
Err(e) => {
100+
panic!("Cannot decompress blob: {:?}", e);
101+
},
102+
};
103+
104+
let mut group = c.benchmark_group("prepare rococo");
105+
group.sampling_mode(SamplingMode::Flat);
106+
group.sample_size(20);
107+
group.measurement_time(Duration::from_secs(240));
108+
group.bench_function("host: prepare Rococo runtime", |b| {
109+
b.to_async(&rt).iter_batched(
110+
|| {
111+
(
112+
TestHost::new_with_config(rt.handle(), |cfg| {
113+
cfg.prepare_workers_hard_max_num = 1;
114+
}),
115+
pvf.clone().code(),
116+
)
117+
},
118+
|(host, pvf_code)| async move {
119+
// `PvfPrepData` is designed to be cheap to clone, so cloning shouldn't affect the
120+
// benchmark accuracy.
121+
let _stats = host.precheck_pvf(&pvf_code, Default::default()).await.unwrap();
122+
},
123+
BatchSize::SmallInput,
124+
)
125+
});
126+
group.finish();
127+
}
128+
129+
criterion_group!(prepare, host_prepare_rococo_runtime);
130+
criterion_main!(prepare);

polkadot/node/core/pvf/bin/puppet_worker.rs

Lines changed: 0 additions & 17 deletions
This file was deleted.

polkadot/node/core/pvf/common/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,5 @@ tempfile = "3.3.0"
3737

3838
[features]
3939
# This feature is used to export test code to other crates without putting it in the production build.
40-
# Also used for building the puppet worker.
4140
test-utils = []
4241
jemalloc-allocator = []

polkadot/node/core/pvf/common/src/worker/mod.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,14 @@ use tokio::{io, runtime::Runtime};
3535
/// spawning the desired worker.
3636
#[macro_export]
3737
macro_rules! decl_worker_main {
38-
($expected_command:expr, $entrypoint:expr, $worker_version:expr $(,)*) => {
38+
($expected_command:expr, $entrypoint:expr, $worker_version:expr, $worker_version_hash:expr $(,)*) => {
39+
fn get_full_version() -> String {
40+
format!("{}-{}", $worker_version, $worker_version_hash)
41+
}
42+
3943
fn print_help(expected_command: &str) {
4044
println!("{} {}", expected_command, $worker_version);
45+
println!("commit: {}", $worker_version_hash);
4146
println!();
4247
println!("PVF worker that is called by polkadot.");
4348
}
@@ -67,6 +72,11 @@ macro_rules! decl_worker_main {
6772
println!("{}", $worker_version);
6873
return
6974
},
75+
// Useful for debugging. --version is used for version checks.
76+
"--full-version" => {
77+
println!("{}", get_full_version());
78+
return
79+
},
7080

7181
"--check-can-enable-landlock" => {
7282
#[cfg(target_os = "linux")]

polkadot/node/core/pvf/src/artifacts.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ impl ArtifactPathId {
141141
}
142142
}
143143

144+
#[derive(Debug)]
144145
pub enum ArtifactState {
145146
/// The artifact is ready to be used by the executor.
146147
///

polkadot/node/core/pvf/src/host.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,8 @@ async fn handle_to_host(
446446
/// This tries to prepare the PVF by compiling the WASM blob within a timeout set in
447447
/// `PvfPrepData`.
448448
///
449-
/// If the prepare job failed previously, we may retry it under certain conditions.
449+
/// We don't retry artifacts that previously failed preparation. We don't expect multiple
450+
/// pre-checking requests.
450451
async fn handle_precheck_pvf(
451452
artifacts: &mut Artifacts,
452453
prepare_queue: &mut mpsc::Sender<prepare::ToQueue>,
@@ -464,8 +465,7 @@ async fn handle_precheck_pvf(
464465
ArtifactState::Preparing { waiting_for_response, num_failures: _ } =>
465466
waiting_for_response.push(result_sender),
466467
ArtifactState::FailedToProcess { error, .. } => {
467-
// Do not retry failed preparation if another pre-check request comes in. We do not
468-
// retry pre-checking, anyway.
468+
// Do not retry an artifact that previously failed preparation.
469469
let _ = result_sender.send(PrepareResult::Err(error.clone()));
470470
},
471471
}
@@ -764,7 +764,7 @@ async fn handle_prepare_done(
764764
let last_time_failed = SystemTime::now();
765765
let num_failures = *num_failures + 1;
766766

767-
gum::warn!(
767+
gum::error!(
768768
target: LOG_TARGET,
769769
?artifact_id,
770770
time_failed = ?last_time_failed,
@@ -846,7 +846,7 @@ async fn sweeper_task(mut sweeper_rx: mpsc::Receiver<PathBuf>) {
846846
gum::trace!(
847847
target: LOG_TARGET,
848848
?result,
849-
"Sweeping the artifact file {}",
849+
"Sweeped the artifact file {}",
850850
condemned.display(),
851851
);
852852
},

polkadot/node/core/pvf/src/lib.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,5 +116,18 @@ pub use polkadot_node_core_pvf_common::{
116116
SecurityStatus,
117117
};
118118

119+
use std::{path::Path, process::Command};
120+
119121
/// The log target for this crate.
120122
pub const LOG_TARGET: &str = "parachain::pvf";
123+
124+
/// Utility to get the version of a worker, used for version checks.
125+
///
126+
/// The worker's existence at the given path must be checked separately.
127+
pub fn get_worker_version(worker_path: &Path) -> std::io::Result<String> {
128+
let worker_version = Command::new(worker_path).args(["--version"]).output()?.stdout;
129+
Ok(std::str::from_utf8(&worker_version)
130+
.expect("version is printed as a string; qed")
131+
.trim()
132+
.to_string())
133+
}

0 commit comments

Comments
 (0)