Skip to content

Commit b9ed65a

Browse files
authored
Merge pull request #2754 from lann/factors-fix-tests
factors: Fix tests / CI
2 parents 1cae51f + c5ce6f9 commit b9ed65a

File tree

22 files changed

+1234
-3386
lines changed

22 files changed

+1234
-3386
lines changed

crates/componentize/src/bugs.rs

Lines changed: 63 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,63 @@
1-
use anyhow::bail;
2-
use wasm_metadata::Producers;
3-
use wasmparser::{Encoding, ExternalKind, Parser, Payload};
1+
use crate::module_info::ModuleInfo;
42

5-
/// Represents the detected likelihood of the allocation bug fixed in
6-
/// https://github.com/WebAssembly/wasi-libc/pull/377 being present in a Wasm
7-
/// module.
3+
pub const EARLIEST_PROBABLY_SAFE_CLANG_VERSION: &str = "15.0.7";
4+
5+
/// This error represents the likely presence of the allocation bug fixed in
6+
/// https://github.com/WebAssembly/wasi-libc/pull/377 in a Wasm module.
87
#[derive(Debug, PartialEq)]
9-
pub enum WasiLibc377Bug {
10-
ProbablySafe,
11-
ProbablyUnsafe,
12-
Unknown,
8+
pub struct WasiLibc377Bug {
9+
clang_version: Option<String>,
1310
}
1411

1512
impl WasiLibc377Bug {
16-
pub fn detect(module: &[u8]) -> anyhow::Result<Self> {
17-
for payload in Parser::new(0).parse_all(module) {
18-
match payload? {
19-
Payload::Version { encoding, .. } if encoding != Encoding::Module => {
20-
bail!("detection only applicable to modules");
21-
}
22-
Payload::ExportSection(reader) => {
23-
for export in reader {
24-
let export = export?;
25-
if export.kind == ExternalKind::Func && export.name == "cabi_realloc" {
26-
// `cabi_realloc` is a good signal that this module
27-
// uses wit-bindgen, making it probably-safe.
28-
tracing::debug!("Found cabi_realloc export");
29-
return Ok(Self::ProbablySafe);
30-
}
31-
}
32-
}
33-
Payload::CustomSection(c) if c.name() == "producers" => {
34-
let producers = Producers::from_bytes(c.data(), c.data_offset())?;
35-
if let Some(clang_version) =
36-
producers.get("processed-by").and_then(|f| f.get("clang"))
37-
{
38-
tracing::debug!(clang_version, "Parsed producers.processed-by.clang");
39-
40-
// Clang/LLVM version is a good proxy for wasi-sdk
41-
// version; the allocation bug was fixed in wasi-sdk-18
42-
// and LLVM was updated to 15.0.7 in wasi-sdk-19.
43-
if let Some((major, minor, patch)) = parse_clang_version(clang_version) {
44-
return if (major, minor, patch) >= (15, 0, 7) {
45-
Ok(Self::ProbablySafe)
46-
} else {
47-
Ok(Self::ProbablyUnsafe)
48-
};
49-
} else {
50-
tracing::warn!(
51-
clang_version,
52-
"Unexpected producers.processed-by.clang version"
53-
);
54-
}
55-
}
56-
}
57-
_ => (),
13+
/// Detects the likely presence of this bug.
14+
pub fn check(module_info: &ModuleInfo) -> Result<(), Self> {
15+
if module_info.probably_uses_wit_bindgen() {
16+
// Modules built with wit-bindgen are probably safe.
17+
return Ok(());
18+
}
19+
if let Some(clang_version) = &module_info.clang_version {
20+
// Clang/LLVM version is a good proxy for wasi-sdk
21+
// version; the allocation bug was fixed in wasi-sdk-18
22+
// and LLVM was updated to 15.0.7 in wasi-sdk-19.
23+
if let Some((major, minor, patch)) = parse_clang_version(clang_version) {
24+
let earliest_safe =
25+
parse_clang_version(EARLIEST_PROBABLY_SAFE_CLANG_VERSION).unwrap();
26+
if (major, minor, patch) >= earliest_safe {
27+
return Ok(());
28+
} else {
29+
return Err(Self {
30+
clang_version: Some(clang_version.clone()),
31+
});
32+
};
33+
} else {
34+
tracing::warn!(
35+
clang_version,
36+
"Unexpected producers.processed-by.clang version"
37+
);
5838
}
5939
}
60-
Ok(Self::Unknown)
40+
// If we can't assert that the module uses wit-bindgen OR was compiled
41+
// with a new-enough wasi-sdk, conservatively assume it may be buggy.
42+
Err(Self {
43+
clang_version: None,
44+
})
6145
}
6246
}
6347

48+
impl std::fmt::Display for WasiLibc377Bug {
49+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50+
write!(
51+
f,
52+
"This Wasm module may have been compiled with wasi-sdk version <19 which \
53+
contains a critical memory safety bug. For more information, see: \
54+
https://github.com/fermyon/spin/issues/2552"
55+
)
56+
}
57+
}
58+
59+
impl std::error::Error for WasiLibc377Bug {}
60+
6461
fn parse_clang_version(ver: &str) -> Option<(u16, u16, u16)> {
6562
// Strip optional trailing detail after space
6663
let ver = ver.split(' ').next().unwrap();
@@ -77,42 +74,42 @@ mod tests {
7774

7875
#[test]
7976
fn wasi_libc_377_detect() {
80-
use WasiLibc377Bug::*;
81-
for (wasm, expected) in [
82-
(r#"(module)"#, Unknown),
77+
for (wasm, safe) in [
78+
(r#"(module)"#, false),
8379
(
8480
r#"(module (func (export "cabi_realloc") (unreachable)))"#,
85-
ProbablySafe,
81+
true,
8682
),
8783
(
8884
r#"(module (func (export "some_other_function") (unreachable)))"#,
89-
Unknown,
85+
false,
9086
),
9187
(
9288
r#"(module (@producers (processed-by "clang" "16.0.0 extra-stuff")))"#,
93-
ProbablySafe,
89+
true,
9490
),
9591
(
9692
r#"(module (@producers (processed-by "clang" "15.0.7")))"#,
97-
ProbablySafe,
93+
true,
9894
),
9995
(
10096
r#"(module (@producers (processed-by "clang" "15.0.6")))"#,
101-
ProbablyUnsafe,
97+
false,
10298
),
10399
(
104-
r#"(module (@producers (processed-by "clang" "14.0.0")))"#,
105-
ProbablyUnsafe,
100+
r#"(module (@producers (processed-by "clang" "14.0.0 extra-stuff")))"#,
101+
false,
106102
),
107103
(
108104
r#"(module (@producers (processed-by "clang" "a.b.c")))"#,
109-
Unknown,
105+
false,
110106
),
111107
] {
112108
eprintln!("WAT: {wasm}");
113109
let module = wat::parse_str(wasm).unwrap();
114-
let detected = WasiLibc377Bug::detect(&module).unwrap();
115-
assert_eq!(detected, expected);
110+
let module_info = ModuleInfo::from_module(&module).unwrap();
111+
let detected = WasiLibc377Bug::check(&module_info);
112+
assert!(detected.is_ok() == safe, "{wasm} -> {detected:?}");
116113
}
117114
}
118115
}

crates/componentize/src/lib.rs

Lines changed: 40 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
use {
44
anyhow::{anyhow, Context, Result},
55
convert::{IntoEntityType, IntoExportKind},
6+
module_info::ModuleInfo,
67
std::{borrow::Cow, collections::HashSet},
78
wasm_encoder::{CustomSection, ExportSection, ImportSection, Module, RawSection},
89
wasmparser::{Encoding, Parser, Payload},
@@ -14,6 +15,7 @@ pub mod bugs;
1415
#[cfg(test)]
1516
mod abi_conformance;
1617
mod convert;
18+
mod module_info;
1719

1820
const SPIN_ADAPTER: &[u8] = include_bytes!(concat!(
1921
env!("OUT_DIR"),
@@ -51,8 +53,9 @@ pub fn componentize_if_necessary(module_or_component: &[u8]) -> Result<Cow<[u8]>
5153
}
5254

5355
pub fn componentize(module: &[u8]) -> Result<Vec<u8>> {
54-
match WitBindgenVersion::from_module(module)? {
55-
WitBindgenVersion::V0_2 => componentize_old_bindgen(module),
56+
let module_info = ModuleInfo::from_module(module)?;
57+
match WitBindgenVersion::detect(&module_info)? {
58+
WitBindgenVersion::V0_2OrNone => componentize_old_module(module, &module_info),
5659
WitBindgenVersion::GreaterThanV0_4 => componentize_new_bindgen(module),
5760
WitBindgenVersion::Other(other) => Err(anyhow::anyhow!(
5861
"cannot adapt modules created with wit-bindgen version {other}"
@@ -65,40 +68,36 @@ pub fn componentize(module: &[u8]) -> Result<Vec<u8>> {
6568
#[derive(Debug)]
6669
enum WitBindgenVersion {
6770
GreaterThanV0_4,
68-
V0_2,
71+
V0_2OrNone,
6972
Other(String),
7073
}
7174

7275
impl WitBindgenVersion {
73-
fn from_module(module: &[u8]) -> Result<Self> {
74-
let (_, bindgen) = metadata::decode(module)?;
75-
if let Some(producers) = bindgen.producers {
76-
if let Some(processors) = producers.get("processed-by") {
77-
let bindgen_version = processors.iter().find_map(|(key, value)| {
78-
key.starts_with("wit-bindgen").then_some(value.as_str())
79-
});
80-
if let Some(v) = bindgen_version {
81-
let mut parts = v.split('.');
82-
let Some(major) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
83-
return Ok(Self::Other(v.to_owned()));
84-
};
85-
let Some(minor) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
86-
return Ok(Self::Other(v.to_owned()));
87-
};
88-
if (major == 0 && minor < 5) || major >= 1 {
89-
return Ok(Self::Other(v.to_owned()));
90-
}
91-
// Either there should be no patch version or nothing after patch
92-
if parts.next().is_none() || parts.next().is_none() {
93-
return Ok(Self::GreaterThanV0_4);
94-
} else {
95-
return Ok(Self::Other(v.to_owned()));
96-
}
76+
fn detect(module_info: &ModuleInfo) -> Result<Self> {
77+
if let Some(processors) = module_info.bindgen_processors() {
78+
let bindgen_version = processors
79+
.iter()
80+
.find_map(|(key, value)| key.starts_with("wit-bindgen").then_some(value.as_str()));
81+
if let Some(v) = bindgen_version {
82+
let mut parts = v.split('.');
83+
let Some(major) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
84+
return Ok(Self::Other(v.to_owned()));
85+
};
86+
let Some(minor) = parts.next().and_then(|p| p.parse::<u8>().ok()) else {
87+
return Ok(Self::Other(v.to_owned()));
88+
};
89+
if (major == 0 && minor < 5) || major >= 1 {
90+
return Ok(Self::Other(v.to_owned()));
91+
}
92+
// Either there should be no patch version or nothing after patch
93+
if parts.next().is_none() || parts.next().is_none() {
94+
return Ok(Self::GreaterThanV0_4);
95+
} else {
96+
return Ok(Self::Other(v.to_owned()));
9797
}
9898
}
9999
}
100-
101-
Ok(Self::V0_2)
100+
Ok(Self::V0_2OrNone)
102101
}
103102
}
104103

@@ -111,6 +110,18 @@ pub fn componentize_new_bindgen(module: &[u8]) -> Result<Vec<u8>> {
111110
.encode()
112111
}
113112

113+
/// Modules *not* produced with wit-bindgen >= 0.5 could be old wit-bindgen or no wit-bindgen
114+
pub fn componentize_old_module(module: &[u8], module_info: &ModuleInfo) -> Result<Vec<u8>> {
115+
// If the module has a _start export and doesn't obviously use wit-bindgen
116+
// it is likely an old p1 command module.
117+
if module_info.has_start_export && !module_info.probably_uses_wit_bindgen() {
118+
bugs::WasiLibc377Bug::check(module_info)?;
119+
componentize_command(module)
120+
} else {
121+
componentize_old_bindgen(module)
122+
}
123+
}
124+
114125
/// Modules produced with wit-bindgen 0.2 need more extensive adaption
115126
pub fn componentize_old_bindgen(module: &[u8]) -> Result<Vec<u8>> {
116127
let (module, exports) = retarget_imports_and_get_exports(ADAPTER_NAME, module)?;
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
use wasm_metadata::Producers;
2+
use wasmparser::{Encoding, ExternalKind, Parser, Payload};
3+
use wit_component::metadata::Bindgen;
4+
5+
// wit-bindgen has used both of these historically.
6+
const CANONICAL_ABI_REALLOC_EXPORTS: &[&str] = &["cabi_realloc", "canonical_abi_realloc"];
7+
8+
/// Stores various bits of info parsed from a Wasm module that are relevant to
9+
/// componentization.
10+
#[derive(Default)]
11+
pub struct ModuleInfo {
12+
pub bindgen: Option<Bindgen>,
13+
pub clang_version: Option<String>,
14+
pub realloc_export: Option<String>,
15+
pub has_start_export: bool,
16+
}
17+
18+
impl ModuleInfo {
19+
/// Parses info from the given binary module bytes.
20+
pub fn from_module(module: &[u8]) -> anyhow::Result<Self> {
21+
let mut info = Self::default();
22+
for payload in Parser::new(0).parse_all(module) {
23+
match payload? {
24+
Payload::Version { encoding, .. } => {
25+
anyhow::ensure!(
26+
encoding == Encoding::Module,
27+
"ModuleInfo::from_module is only applicable to Modules; got a {encoding:?}"
28+
);
29+
}
30+
Payload::ExportSection(reader) => {
31+
for export in reader {
32+
let export = export?;
33+
if export.kind == ExternalKind::Func {
34+
if CANONICAL_ABI_REALLOC_EXPORTS.contains(&export.name) {
35+
tracing::debug!(
36+
"Found canonical ABI realloc export {:?}",
37+
export.name
38+
);
39+
info.realloc_export = Some(export.name.to_string());
40+
} else if export.name == "_start" {
41+
tracing::debug!("Found _start export");
42+
info.has_start_export = true;
43+
}
44+
}
45+
}
46+
}
47+
Payload::CustomSection(c) => {
48+
let section_name = c.name();
49+
if section_name == "producers" {
50+
let producers = Producers::from_bytes(c.data(), c.data_offset())?;
51+
if let Some(clang_version) =
52+
producers.get("processed-by").and_then(|f| f.get("clang"))
53+
{
54+
tracing::debug!(clang_version, "Parsed producers.processed-by.clang");
55+
info.clang_version = Some(clang_version.to_string());
56+
}
57+
} else if section_name.starts_with("component-type") {
58+
match decode_bindgen_custom_section(section_name, c.data()) {
59+
Ok(bindgen) => {
60+
tracing::debug!("Parsed bindgen section {section_name:?}");
61+
info.bindgen = Some(bindgen);
62+
}
63+
Err(err) => tracing::warn!(
64+
"Error parsing bindgen section {section_name:?}: {err}"
65+
),
66+
}
67+
}
68+
}
69+
_ => (),
70+
}
71+
}
72+
Ok(info)
73+
}
74+
75+
/// Returns true if the given module was heuristically probably compiled
76+
/// with wit-bindgen.
77+
pub fn probably_uses_wit_bindgen(&self) -> bool {
78+
if self.bindgen.is_some() {
79+
// Presence of bindgen metadata is a strong signal
80+
true
81+
} else if self.realloc_export.is_some() {
82+
// A canonical ABI realloc export is a decent signal
83+
true
84+
} else {
85+
false
86+
}
87+
}
88+
89+
/// Returns the wit-bindgen metadata producers processed-by field, if
90+
/// present.
91+
pub fn bindgen_processors(&self) -> Option<wasm_metadata::ProducersField> {
92+
self.bindgen
93+
.as_ref()?
94+
.producers
95+
.as_ref()?
96+
.get("processed-by")
97+
}
98+
}
99+
100+
/// This is a silly workaround for the limited public interface available in
101+
/// [`wit_component::metadata`].
102+
// TODO: Make Bindgen::decode_custom_section public?
103+
fn decode_bindgen_custom_section(name: &str, data: &[u8]) -> anyhow::Result<Bindgen> {
104+
let mut module = wasm_encoder::Module::new();
105+
module.section(&wasm_encoder::CustomSection {
106+
name: name.into(),
107+
data: data.into(),
108+
});
109+
let (_, bindgen) = wit_component::metadata::decode(module.as_slice())?;
110+
Ok(bindgen)
111+
}

0 commit comments

Comments
 (0)