Skip to content

Commit c4c9125

Browse files
authored
Fix metadata/strip on components with core modules (#932)
This fixes the `metadata add` and `strip` loops in the `wasm-tools` CLI for components to correctly handle nested components and nested modules. Previously the output contents would get corrupted due to modules showing up multiple times by accident.
1 parent a45ae28 commit c4c9125

18 files changed

+170
-83
lines changed

crates/wasm-encoder/src/component.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ const INSTANCE_SORT: u8 = 0x05;
4242
pub trait ComponentSection: Encode {
4343
/// Gets the section identifier for this section.
4444
fn id(&self) -> u8;
45+
46+
/// Appends this section to the specified destination list of bytes.
47+
fn append_to_component(&self, dst: &mut Vec<u8>) {
48+
dst.push(self.id());
49+
self.encode(dst);
50+
}
4551
}
4652

4753
/// Known section identifiers of WebAssembly components.
@@ -101,13 +107,19 @@ pub struct Component {
101107
}
102108

103109
impl Component {
110+
/// The 8-byte header at the beginning of all components.
111+
#[rustfmt::skip]
112+
pub const HEADER: [u8; 8] = [
113+
// Magic
114+
0x00, 0x61, 0x73, 0x6D,
115+
// Version
116+
0x0c, 0x00, 0x01, 0x00,
117+
];
118+
104119
/// Begin writing a new `Component`.
105120
pub fn new() -> Self {
106121
Self {
107-
bytes: vec![
108-
0x00, 0x61, 0x73, 0x6D, // magic (`\0asm`)
109-
0x0c, 0x00, 0x01, 0x00, // version
110-
],
122+
bytes: Self::HEADER.to_vec(),
111123
}
112124
}
113125

crates/wasm-encoder/src/core.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ pub(crate) const CORE_TAG_SORT: u8 = 0x04;
4848
pub trait Section: Encode {
4949
/// Gets the section identifier for this section.
5050
fn id(&self) -> u8;
51+
52+
/// Appends this section to the specified destination list of bytes.
53+
fn append_to(&self, dst: &mut Vec<u8>) {
54+
dst.push(self.id());
55+
self.encode(dst);
56+
}
5157
}
5258

5359
/// Known section identifiers of WebAssembly modules.
@@ -110,16 +116,20 @@ pub struct Module {
110116
}
111117

112118
impl Module {
119+
/// The 8-byte header at the beginning of all core wasm modules.
120+
#[rustfmt::skip]
121+
pub const HEADER: [u8; 8] = [
122+
// Magic
123+
0x00, 0x61, 0x73, 0x6D,
124+
// Version
125+
0x01, 0x00, 0x00, 0x00,
126+
];
127+
113128
/// Begin writing a new `Module`.
114129
#[rustfmt::skip]
115130
pub fn new() -> Self {
116131
Module {
117-
bytes: vec![
118-
// Magic
119-
0x00, 0x61, 0x73, 0x6D,
120-
// Version
121-
0x01, 0x00, 0x00, 0x00,
122-
],
132+
bytes: Self::HEADER.to_vec(),
123133
}
124134
}
125135

crates/wasm-metadata/src/lib.rs

Lines changed: 46 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use anyhow::Result;
22
use indexmap::{map::Entry, IndexMap};
33
use serde::Serialize;
44
use std::fmt;
5+
use std::mem;
6+
use wasm_encoder::{ComponentSection as _, ComponentSectionId, Encode, Section};
57
use wasmparser::{
68
ComponentNameSectionReader, NameSectionReader, Parser, Payload::*, ProducersSectionReader,
79
};
@@ -222,125 +224,101 @@ fn rewrite_wasm(
222224
add_producers: &Producers,
223225
input: &[u8],
224226
) -> Result<Vec<u8>> {
225-
let mut parser = Parser::new(0).parse_all(&input);
226-
227-
enum Output {
228-
Component(wasm_encoder::Component),
229-
Module(wasm_encoder::Module),
230-
}
231-
impl Output {
232-
fn section(
233-
&mut self,
234-
section: &(impl wasm_encoder::Section + wasm_encoder::ComponentSection),
235-
) {
236-
match self {
237-
Output::Component(c) => {
238-
c.section(section);
239-
}
240-
Output::Module(m) => {
241-
m.section(section);
242-
}
243-
}
244-
}
245-
fn finish(self) -> Vec<u8> {
246-
match self {
247-
Output::Component(c) => c.finish(),
248-
Output::Module(m) => m.finish(),
249-
}
250-
}
251-
}
252-
253-
let mut output = match parser
254-
.next()
255-
.ok_or_else(|| anyhow::anyhow!("at least a version tag on binary"))??
256-
{
257-
Version {
258-
encoding: wasmparser::Encoding::Component,
259-
..
260-
} => Output::Component(wasm_encoder::Component::new()),
261-
Version {
262-
encoding: wasmparser::Encoding::Module,
263-
..
264-
} => Output::Module(wasm_encoder::Module::new()),
265-
_ => {
266-
panic!("first item from parser must be a Version tag")
267-
}
268-
};
269-
270227
let mut producers_found = false;
271228
let mut names_found = false;
272-
let mut depth = 0;
273-
for payload in parser {
229+
let mut stack = Vec::new();
230+
let mut output = Vec::new();
231+
for payload in Parser::new(0).parse_all(&input) {
274232
let payload = payload?;
275233

276234
// Track nesting depth, so that we don't mess with inner producer sections:
277235
match payload {
278-
ModuleSection { .. } | ComponentSection { .. } => depth += 1,
279-
End { .. } => depth -= 1,
236+
Version { encoding, .. } => {
237+
output.extend_from_slice(match encoding {
238+
wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
239+
wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
240+
});
241+
}
242+
ModuleSection { .. } | ComponentSection { .. } => {
243+
stack.push(mem::take(&mut output));
244+
continue;
245+
}
246+
End { .. } => {
247+
let mut parent = match stack.pop() {
248+
Some(c) => c,
249+
None => break,
250+
};
251+
if output.starts_with(&wasm_encoder::Component::HEADER) {
252+
parent.push(ComponentSectionId::Component as u8);
253+
output.encode(&mut parent);
254+
} else {
255+
parent.push(ComponentSectionId::CoreModule as u8);
256+
output.encode(&mut parent);
257+
}
258+
output = parent;
259+
}
280260
_ => {}
281261
}
282262

283263
// Process the wasm sections:
284264
match payload {
285265
// Only rewrite the outermost producers section:
286-
CustomSection(c) if c.name() == "producers" && depth == 0 => {
266+
CustomSection(c) if c.name() == "producers" && stack.len() == 0 => {
287267
producers_found = true;
288268
let section = ProducersSectionReader::new(c.data(), c.data_offset())?;
289269
let mut producers = Producers::from_reader(section)?;
290270
// Add to the section according to the command line flags:
291271
producers.merge(&add_producers);
292272
// Encode into output:
293-
output.section(&producers.section());
273+
producers.section().append_to(&mut output);
294274
}
295275

296-
CustomSection(c) if c.name() == "name" && depth == 0 => {
276+
CustomSection(c) if c.name() == "name" && stack.len() == 0 => {
297277
names_found = true;
298278
let section = NameSectionReader::new(c.data(), c.data_offset());
299279
let mut names = ModuleNames::from_reader(section)?;
300280
names.merge(&ModuleNames::from_name(add_name));
301281

302-
output.section(&names.section()?.as_custom());
282+
names.section()?.as_custom().append_to(&mut output);
303283
}
304284

305-
CustomSection(c) if c.name() == "component-name" && depth == 0 => {
285+
CustomSection(c) if c.name() == "component-name" && stack.len() == 0 => {
306286
names_found = true;
307287
let section = ComponentNameSectionReader::new(c.data(), c.data_offset());
308288
let mut names = ComponentNames::from_reader(section)?;
309289
names.merge(&ComponentNames::from_name(add_name));
310-
output.section(&names.section()?.as_custom());
290+
names.section()?.as_custom().append_to(&mut output);
311291
}
312292

313293
// All other sections get passed through unmodified:
314294
_ => {
315295
if let Some((id, range)) = payload.as_section() {
316-
output.section(&wasm_encoder::RawSection {
296+
wasm_encoder::RawSection {
317297
id,
318298
data: &input[range],
319-
});
299+
}
300+
.append_to(&mut output);
320301
}
321302
}
322303
}
323304
}
324305
if !names_found && add_name.is_some() {
325-
match &mut output {
326-
Output::Component(c) => {
327-
let names = ComponentNames::from_name(add_name);
328-
c.section(&names.section()?);
329-
}
330-
Output::Module(m) => {
331-
let names = ModuleNames::from_name(add_name);
332-
m.section(&names.section()?);
333-
}
306+
if output.starts_with(&wasm_encoder::Component::HEADER) {
307+
let names = ComponentNames::from_name(add_name);
308+
names.section()?.append_to_component(&mut output);
309+
} else {
310+
let names = ModuleNames::from_name(add_name);
311+
names.section()?.append_to(&mut output)
334312
}
335313
}
336314
if !producers_found && !add_producers.is_empty() {
337315
let mut producers = Producers::empty();
338316
// Add to the section according to the command line flags:
339317
producers.merge(add_producers);
340318
// Encode into output:
341-
output.section(&producers.section());
319+
producers.section().append_to(&mut output);
342320
}
343-
Ok(output.finish())
321+
Ok(output)
344322
}
345323

346324
/// A tree of the metadata found in a WebAssembly binary.

src/bin/wasm-tools/strip.rs

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use anyhow::Result;
2-
use wasm_encoder::RawSection;
2+
use std::mem;
3+
use wasm_encoder::{ComponentSectionId, Encode, RawSection, Section};
34
use wasmparser::{Parser, Payload::*};
45

56
/// Removes custom sections from an input WebAssembly file.
@@ -45,10 +46,41 @@ impl Opts {
4546
name != "name"
4647
};
4748

48-
let mut module = wasm_encoder::Module::new();
49+
let mut output = Vec::new();
50+
let mut stack = Vec::new();
4951

5052
for payload in Parser::new(0).parse_all(&input) {
5153
let payload = payload?;
54+
55+
// Track nesting depth, so that we don't mess with inner producer sections:
56+
match payload {
57+
Version { encoding, .. } => {
58+
output.extend_from_slice(match encoding {
59+
wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
60+
wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
61+
});
62+
}
63+
ModuleSection { .. } | ComponentSection { .. } => {
64+
stack.push(mem::take(&mut output));
65+
continue;
66+
}
67+
End { .. } => {
68+
let mut parent = match stack.pop() {
69+
Some(c) => c,
70+
None => break,
71+
};
72+
if output.starts_with(&wasm_encoder::Component::HEADER) {
73+
parent.push(ComponentSectionId::Component as u8);
74+
output.encode(&mut parent);
75+
} else {
76+
parent.push(ComponentSectionId::CoreModule as u8);
77+
output.encode(&mut parent);
78+
}
79+
output = parent;
80+
}
81+
_ => {}
82+
}
83+
5284
match &payload {
5385
CustomSection(c) => {
5486
if strip_custom_section(c.name()) {
@@ -59,15 +91,16 @@ impl Opts {
5991
_ => {}
6092
}
6193
if let Some((id, range)) = payload.as_section() {
62-
module.section(&RawSection {
94+
RawSection {
6395
id,
6496
data: &input[range],
65-
});
97+
}
98+
.append_to(&mut output);
6699
}
67100
}
68101

69102
self.io.output(wasm_tools::Output::Wasm {
70-
bytes: module.as_slice(),
103+
bytes: &output,
71104
wat: self.wat,
72105
})?;
73106
Ok(())
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
;; RUN: metadata add --language foo % | metadata add --language bar | metadata add --sdk foo=2 | metadata show
2+
(module)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
module:
2+
language:
3+
foo
4+
bar
5+
sdk:
6+
foo: 2
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
;; RUN: metadata add % --name foo | metadata show
2+
(module $bar)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
module foo:

tests/cli/add-metadata.wat

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
;; RUN: metadata add % --name foo --language bar --processed-by baz=1 --sdk my-sdk=2 | metadata show
2+
(module)

tests/cli/add-metadata.wat.stdout

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
module foo:
2+
language:
3+
bar
4+
processed-by:
5+
baz: 1
6+
sdk:
7+
my-sdk: 2

0 commit comments

Comments
 (0)