Skip to content

Commit d2c36f4

Browse files
authored
Support componentizing modules that have duplicate imports. (#2145)
* Support componentizing modules that have duplicate imports. When the `--deduplicate-imports` option is passed to `wasm-tools component new`, we rewrite the input module to compact out the duplicates. This allows tinyGo's output to be wrapped in a component. Closes #2063. Using `Reencode` takes the maintenance pressure I feared off the deduplication engine; it won't have to chase future changes to the wasm format. Instead, `Reencode` should adapt to them and continue calling `function_index()` in all the right places. * Typos * Leave a slot in `dupes` even for non-function imports. * Move integration test to wit-component, where there's infra for testing against adapters.
1 parent 2dbe8b9 commit d2c36f4

File tree

11 files changed

+499
-8
lines changed

11 files changed

+499
-8
lines changed

crates/wasm-encoder/src/reencode.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -554,9 +554,9 @@ pub enum Error<E = Infallible> {
554554
InvalidConstExpr,
555555
/// The code section size listed was not valid for the wasm binary provided.
556556
InvalidCodeSectionSize,
557-
/// There was a section that does not belong into a core wasm module.
557+
/// There was a section that does not belong in a core wasm module.
558558
UnexpectedNonCoreModuleSection,
559-
/// There was a section that does not belong into a component module.
559+
/// There was a section that does not belong in a component module.
560560
UnexpectedNonComponentSection,
561561
/// A core type definition was found in a component that's not supported.
562562
UnsupportedCoreTypeInComponent,
@@ -582,11 +582,11 @@ impl<E: core::fmt::Display> core::fmt::Display for Error<E> {
582582
Self::InvalidConstExpr => write!(fmt, "The const expression was invalid"),
583583
Self::UnexpectedNonCoreModuleSection => write!(
584584
fmt,
585-
"There was a section that does not belong into a core wasm module"
585+
"There was a section that does not belong in a core wasm module"
586586
),
587587
Self::UnexpectedNonComponentSection => write!(
588588
fmt,
589-
"There was a section that does not belong into a component"
589+
"There was a section that does not belong in a component"
590590
),
591591
Self::CanonicalizedHeapTypeReference => write!(
592592
fmt,

crates/wit-component/src/encoding.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,9 @@ use types::{InstanceTypeEncoder, RootTypeEncoder, ValtypeEncoder};
9898
mod world;
9999
use world::{ComponentWorld, ImportedInterface, Lowering};
100100

101+
mod dedupe;
102+
use dedupe::dedupe_imports;
103+
101104
fn to_val_type(ty: &WasmType) -> ValType {
102105
match ty {
103106
WasmType::I32 => ValType::I32,
@@ -2677,6 +2680,7 @@ pub struct ComponentEncoder {
26772680
realloc_via_memory_grow: bool,
26782681
merge_imports_based_on_semver: Option<bool>,
26792682
pub(super) reject_legacy_names: bool,
2683+
deduplicate_imports: bool,
26802684
}
26812685

26822686
impl ComponentEncoder {
@@ -2686,7 +2690,12 @@ impl ComponentEncoder {
26862690
/// It will also add any producers information inside the component type information to the
26872691
/// core module.
26882692
pub fn module(mut self, module: &[u8]) -> Result<Self> {
2689-
let (wasm, metadata) = self.decode(module)?;
2693+
let (wasm, metadata) = self.decode(module.as_ref())?;
2694+
let wasm = if self.deduplicate_imports {
2695+
dedupe_imports(wasm.as_ref())?
2696+
} else {
2697+
wasm
2698+
};
26902699
let exports = self
26912700
.merge_metadata(metadata)
26922701
.context("failed merge WIT metadata for module with previous metadata")?;
@@ -2742,6 +2751,19 @@ impl ComponentEncoder {
27422751
self
27432752
}
27442753

2754+
/// Sets whether to remove duplicate function imports to support turning
2755+
/// otherwise-illegal core modules into components.
2756+
///
2757+
/// While multiple imports of the same module/name pair are legal in core
2758+
/// modules, they are illegal in components.
2759+
///
2760+
/// This is disabled by default due to the quiet stripping of possibly
2761+
/// invalidated custom sections.
2762+
pub fn deduplicate_imports(mut self, dedupe: bool) -> Self {
2763+
self.deduplicate_imports = dedupe;
2764+
self
2765+
}
2766+
27452767
/// Specifies a new adapter which is used to translate from a historical
27462768
/// wasm ABI to the canonical ABI and the `interface` provided.
27472769
///
Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
//! Routines to rewrite a wasm core module to remove duplicate imports, which
2+
//! are illegal in components
3+
//!
4+
//! Thus far, this supports only duplicatively named *functions*, because our
5+
//! goal is to make `wasm-tools component new` work, and the only imports that
6+
//! supports are functions from adapters.
7+
8+
use anyhow::{anyhow, Error};
9+
use std::{
10+
borrow::Cow,
11+
collections::hash_map::{Entry, HashMap},
12+
fmt::{self, Display, Formatter},
13+
};
14+
use wasm_encoder::{
15+
self,
16+
reencode::{utils::parse_custom_section, Reencode},
17+
};
18+
use wasmparser::{self, BinaryReaderError, Import, KnownCustom, Parser, TypeRef};
19+
20+
/// The module/name pair of a wasm import
21+
#[derive(Hash, Eq, PartialEq, Debug)]
22+
struct ImportPath {
23+
module: String,
24+
field: String,
25+
}
26+
27+
/// Info which lets us quickly replace references to duplicate-imported
28+
/// functions with refs to the canonical ones once the duplicates have been
29+
/// removed
30+
///
31+
/// Default represents a state as if the import section were entirely absent
32+
/// from the module.
33+
#[derive(Default)]
34+
struct Remappings {
35+
/// Map of old func indices to new
36+
indices: Vec<u32>,
37+
/// Number of duplicate imports removed
38+
num_dupes: u32,
39+
/// Which imports indices were duplicates
40+
dupes: Vec<bool>,
41+
}
42+
43+
impl Remappings {
44+
/// Works out the mappings of a core module's func idxs to new ones that
45+
/// take into account the compacting-out of duplicate imports. The indices
46+
/// can then be looked up using `new_index_for()`.
47+
///
48+
/// Calling this twice replaces any previous state iff it returns `Ok`.
49+
fn imports<'a, T>(&'a mut self, import_section: T) -> Result<(), BinaryReaderError>
50+
where
51+
T: IntoIterator<Item = Result<Import<'a>, BinaryReaderError>>,
52+
{
53+
let mut num_dupes = 0;
54+
// A map of module/field pairs to the first import idx at which each
55+
// occurs
56+
let mut canonicals = HashMap::new();
57+
let mut indices = Vec::new();
58+
let mut dupes = Vec::new();
59+
60+
// import_section.clone() lets us iterate over the section a 2nd time
61+
// without consuming the original iterator or copying the bytes.
62+
let mut func_idx = 0u32; // Imports are a "vec" of imports and thus bounded to u32.
63+
for import in import_section.into_iter() {
64+
let import = import?;
65+
let TypeRef::Func(_) = import.ty else {
66+
dupes.push(false);
67+
continue;
68+
};
69+
let path = ImportPath {
70+
module: import.module.to_string(),
71+
field: import.name.to_string(),
72+
};
73+
match canonicals.entry(path) {
74+
Entry::Occupied(slot) => {
75+
indices.push(*slot.get());
76+
num_dupes += 1;
77+
dupes.push(true);
78+
}
79+
Entry::Vacant(slot) => {
80+
indices.push(*slot.insert(func_idx - num_dupes));
81+
dupes.push(false);
82+
}
83+
}
84+
func_idx += 1;
85+
}
86+
self.indices = indices;
87+
self.num_dupes = num_dupes;
88+
self.dupes = dupes;
89+
Ok(())
90+
}
91+
92+
/// Given an original-to-the-module func index, returns the one that should
93+
/// be used once duplicate imports are removed.
94+
fn new_index_for(&self, idx: u32) -> u32 {
95+
match self.indices.get(idx as usize) {
96+
// Return a remapping using the index vector, if the idx is within
97+
// bounds of the imports.
98+
Some(new_idx) => *new_idx,
99+
// Otherwise, it's a local idx (as from the local function section),
100+
// and we need only subtract the number of duplicates removed. (The
101+
// spec says "The index space for functions, tables, memories and
102+
// globals includes respective imports declared in the same module.
103+
// The indices of these imports precede the indices of other
104+
// definitions in the same index space.")
105+
None => idx - self.num_dupes,
106+
}
107+
}
108+
109+
/// Returns whether we contain no remappings to apply.
110+
fn is_empty(&self) -> bool {
111+
self.num_dupes == 0
112+
}
113+
114+
/// Returns whether the import with the given index has been eliminated and
115+
/// references to it redirected to a preceding one.
116+
fn is_duplicate(&self, import_idx: u32) -> bool {
117+
match self.dupes.get(import_idx as usize) {
118+
Some(b) => *b,
119+
None => false,
120+
}
121+
}
122+
}
123+
124+
/// No duplicate imports were found, so I have nothing to do.
125+
struct NoDuplicatesFound;
126+
127+
impl Display for NoDuplicatesFound {
128+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
129+
write!(
130+
f,
131+
"no duplicate functions were found, so there was nothing to do"
132+
)
133+
}
134+
}
135+
136+
#[derive(Default)]
137+
struct DedupingReencoder {
138+
remappings: Remappings,
139+
}
140+
141+
type DeduperError = wasm_encoder::reencode::Error<NoDuplicatesFound>;
142+
143+
impl Reencode for DedupingReencoder {
144+
type Error = NoDuplicatesFound;
145+
146+
// Fortunately, the import section comes before any section we need to
147+
// modify. Thus, we don't have to decode the wasm twice.
148+
fn parse_import_section(
149+
&mut self,
150+
imports: &mut wasm_encoder::ImportSection,
151+
section: wasmparser::ImportSectionReader<'_>,
152+
) -> Result<(), wasm_encoder::reencode::Error<Self::Error>> {
153+
self.remappings.imports(section.clone())?;
154+
155+
// If no duplicates, take the fast path out.
156+
if self.remappings.is_empty() {
157+
return Err(DeduperError::UserError(NoDuplicatesFound));
158+
}
159+
160+
for (idx, import) in section.into_iter().enumerate() {
161+
if !self.remappings.is_duplicate(idx as u32) {
162+
self.parse_import(imports, import?)?;
163+
}
164+
}
165+
Ok(())
166+
}
167+
168+
fn function_index(&mut self, func_idx: u32) -> u32 {
169+
self.remappings.new_index_for(func_idx)
170+
}
171+
172+
// Strip all but known-safe custom sections.
173+
//
174+
// Because custom sections are opaque to us, they may contain information
175+
// (like debugging info) which depends on byte offsets we've invalidated by
176+
// removing imports or on function indices that we didn't patch up. To be
177+
// conservative, we strip all such sections except those we know are okay.
178+
// At the moment, we retain producers and target_features sections, as well
179+
// as name sections (which Reencode does patch up).
180+
fn parse_custom_section(
181+
&mut self,
182+
module: &mut wasm_encoder::Module,
183+
section: wasmparser::CustomSectionReader<'_>,
184+
) -> Result<(), wasm_encoder::reencode::Error<Self::Error>> {
185+
match section.as_known() {
186+
KnownCustom::Name(_) | KnownCustom::Producers(_) => {
187+
// Keep this section verbatim:
188+
parse_custom_section(self, module, section)
189+
}
190+
KnownCustom::Unknown if section.name() == "target_features" => {
191+
parse_custom_section(self, module, section)
192+
}
193+
// Strip others:
194+
_ => Ok(()),
195+
}
196+
}
197+
}
198+
199+
/// Given a core wasm module that may contain duplicate function imports
200+
/// (repeats of module/name pairs), returns an equivalent one without the
201+
/// duplicates, rewriting references to those functions elsewhere in the module
202+
/// to compensate.
203+
pub fn dedupe_imports(module: &[u8]) -> Result<Cow<[u8]>, Error> {
204+
let mut new_module = wasm_encoder::Module::new();
205+
let result =
206+
DedupingReencoder::default().parse_core_module(&mut new_module, Parser::new(0), module);
207+
match result {
208+
// Fast path: return module verbatim
209+
Err(DeduperError::UserError(NoDuplicatesFound)) => Ok(Cow::Borrowed(module)),
210+
// Rewrite module
211+
Ok(_) => Ok(Cow::Owned(new_module.finish())),
212+
Err(err) => Err(anyhow!("reencoding failed: {}", err)),
213+
}
214+
}
215+
216+
#[cfg(test)]
217+
mod test {
218+
use super::*;
219+
use wasmparser::{GlobalType, ValType};
220+
221+
#[test]
222+
fn remappings_empty_state() -> Result<(), BinaryReaderError> {
223+
let mut remappings = Remappings::default();
224+
assert!(remappings.is_empty());
225+
assert!(
226+
!remappings.is_duplicate(0),
227+
"func idxs should default to not being duplicates."
228+
);
229+
assert_eq!(
230+
remappings.new_index_for(7),
231+
7,
232+
"new_index_for() should return unchanged indices unless it knows a better mapping."
233+
);
234+
let imports = vec![func("A", "a"), func("B", "b"), func("C", "c")];
235+
remappings.imports(imports)?;
236+
assert!(
237+
remappings.is_empty(),
238+
"remappings should show as empty when there are function imports but no duplicates."
239+
);
240+
Ok(())
241+
}
242+
243+
/// Convenience to construct function imports for test data
244+
fn func<'a>(module: &'a str, name: &'a str) -> Result<Import<'a>, BinaryReaderError> {
245+
Ok(Import {
246+
module,
247+
name,
248+
ty: TypeRef::Func(0), // We don't care about this.
249+
})
250+
}
251+
252+
#[test]
253+
fn remappings_populated_state() -> Result<(), BinaryReaderError> {
254+
// We test contiguous and discontiguous duplicates, singletons, and
255+
// indices outside the mapping table (which represent local, unimported
256+
// functions).
257+
let imports = vec![
258+
func("A", "a"),
259+
func("A", "a"),
260+
// Include a non-function import to make sure those are skipped and
261+
// don't take up a slot in our newly constructed func index space.
262+
Ok(Import {
263+
module: "non",
264+
name: "function",
265+
ty: TypeRef::Global(GlobalType {
266+
content_type: ValType::I32,
267+
mutable: false,
268+
shared: false,
269+
}),
270+
}),
271+
func("C", "c"),
272+
func("D", "d"),
273+
func("D", "d"),
274+
func("D", "d"),
275+
func("G", "g"),
276+
func("D", "d"),
277+
func("I", "i"),
278+
];
279+
let mut remappings = Remappings::default();
280+
remappings.imports(imports)?;
281+
assert!(!remappings.is_empty());
282+
assert!(!remappings.is_duplicate(0));
283+
assert!(remappings.is_duplicate(1));
284+
assert_eq!(
285+
(0..=9) // Go one beyond the length of the input.
286+
.map(|idx| remappings.new_index_for(idx))
287+
.collect::<Vec<_>>(),
288+
vec![0, 0, 1, 2, 2, 2, 3, 2, 4, 5]
289+
);
290+
Ok(())
291+
}
292+
293+
/// Show that non-function imports don't throw off the `is_duplicate()`
294+
/// positions of later imports.
295+
#[test]
296+
fn remappings_non_function_imports() -> Result<(), BinaryReaderError> {
297+
let imports = vec![
298+
func("A", "a"),
299+
Ok(Import {
300+
module: "non",
301+
name: "function",
302+
ty: TypeRef::Global(GlobalType {
303+
content_type: ValType::I32,
304+
mutable: false,
305+
shared: false,
306+
}),
307+
}),
308+
func("A", "a"),
309+
];
310+
let mut remappings = Remappings::default();
311+
remappings.imports(imports)?;
312+
assert!(!remappings.is_duplicate(1));
313+
assert!(remappings.is_duplicate(2));
314+
Ok(())
315+
}
316+
}

0 commit comments

Comments
 (0)