|
| 1 | +//! Routines to rewrite a wasm core module to remove duplicate imports, which |
| 2 | +//! are illegal in components |
| 3 | +//! |
| 4 | +//! Thus far, this supports only duplicatively named *functions*, because our |
| 5 | +//! goal is to make `wasm-tools component new` work, and the only imports that |
| 6 | +//! supports are functions from adapters. |
| 7 | +
|
| 8 | +use anyhow::{anyhow, Error}; |
| 9 | +use std::{ |
| 10 | + borrow::Cow, |
| 11 | + collections::hash_map::{Entry, HashMap}, |
| 12 | + fmt::{self, Display, Formatter}, |
| 13 | +}; |
| 14 | +use wasm_encoder::{ |
| 15 | + self, |
| 16 | + reencode::{utils::parse_custom_section, Reencode}, |
| 17 | +}; |
| 18 | +use wasmparser::{self, BinaryReaderError, Import, KnownCustom, Parser, TypeRef}; |
| 19 | + |
| 20 | +/// The module/name pair of a wasm import |
| 21 | +#[derive(Hash, Eq, PartialEq, Debug)] |
| 22 | +struct ImportPath { |
| 23 | + module: String, |
| 24 | + field: String, |
| 25 | +} |
| 26 | + |
| 27 | +/// Info which lets us quickly replace references to duplicate-imported |
| 28 | +/// functions with refs to the canonical ones once the duplicates have been |
| 29 | +/// removed |
| 30 | +/// |
| 31 | +/// Default represents a state as if the import section were entirely absent |
| 32 | +/// from the module. |
| 33 | +#[derive(Default)] |
| 34 | +struct Remappings { |
| 35 | + /// Map of old func indices to new |
| 36 | + indices: Vec<u32>, |
| 37 | + /// Number of duplicate imports removed |
| 38 | + num_dupes: u32, |
| 39 | + /// Which imports indices were duplicates |
| 40 | + dupes: Vec<bool>, |
| 41 | +} |
| 42 | + |
| 43 | +impl Remappings { |
| 44 | + /// Works out the mappings of a core module's func idxs to new ones that |
| 45 | + /// take into account the compacting-out of duplicate imports. The indices |
| 46 | + /// can then be looked up using `new_index_for()`. |
| 47 | + /// |
| 48 | + /// Calling this twice replaces any previous state iff it returns `Ok`. |
| 49 | + fn imports<'a, T>(&'a mut self, import_section: T) -> Result<(), BinaryReaderError> |
| 50 | + where |
| 51 | + T: IntoIterator<Item = Result<Import<'a>, BinaryReaderError>>, |
| 52 | + { |
| 53 | + let mut num_dupes = 0; |
| 54 | + // A map of module/field pairs to the first import idx at which each |
| 55 | + // occurs |
| 56 | + let mut canonicals = HashMap::new(); |
| 57 | + let mut indices = Vec::new(); |
| 58 | + let mut dupes = Vec::new(); |
| 59 | + |
| 60 | + // import_section.clone() lets us iterate over the section a 2nd time |
| 61 | + // without consuming the original iterator or copying the bytes. |
| 62 | + let mut func_idx = 0u32; // Imports are a "vec" of imports and thus bounded to u32. |
| 63 | + for import in import_section.into_iter() { |
| 64 | + let import = import?; |
| 65 | + let TypeRef::Func(_) = import.ty else { |
| 66 | + dupes.push(false); |
| 67 | + continue; |
| 68 | + }; |
| 69 | + let path = ImportPath { |
| 70 | + module: import.module.to_string(), |
| 71 | + field: import.name.to_string(), |
| 72 | + }; |
| 73 | + match canonicals.entry(path) { |
| 74 | + Entry::Occupied(slot) => { |
| 75 | + indices.push(*slot.get()); |
| 76 | + num_dupes += 1; |
| 77 | + dupes.push(true); |
| 78 | + } |
| 79 | + Entry::Vacant(slot) => { |
| 80 | + indices.push(*slot.insert(func_idx - num_dupes)); |
| 81 | + dupes.push(false); |
| 82 | + } |
| 83 | + } |
| 84 | + func_idx += 1; |
| 85 | + } |
| 86 | + self.indices = indices; |
| 87 | + self.num_dupes = num_dupes; |
| 88 | + self.dupes = dupes; |
| 89 | + Ok(()) |
| 90 | + } |
| 91 | + |
| 92 | + /// Given an original-to-the-module func index, returns the one that should |
| 93 | + /// be used once duplicate imports are removed. |
| 94 | + fn new_index_for(&self, idx: u32) -> u32 { |
| 95 | + match self.indices.get(idx as usize) { |
| 96 | + // Return a remapping using the index vector, if the idx is within |
| 97 | + // bounds of the imports. |
| 98 | + Some(new_idx) => *new_idx, |
| 99 | + // Otherwise, it's a local idx (as from the local function section), |
| 100 | + // and we need only subtract the number of duplicates removed. (The |
| 101 | + // spec says "The index space for functions, tables, memories and |
| 102 | + // globals includes respective imports declared in the same module. |
| 103 | + // The indices of these imports precede the indices of other |
| 104 | + // definitions in the same index space.") |
| 105 | + None => idx - self.num_dupes, |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + /// Returns whether we contain no remappings to apply. |
| 110 | + fn is_empty(&self) -> bool { |
| 111 | + self.num_dupes == 0 |
| 112 | + } |
| 113 | + |
| 114 | + /// Returns whether the import with the given index has been eliminated and |
| 115 | + /// references to it redirected to a preceding one. |
| 116 | + fn is_duplicate(&self, import_idx: u32) -> bool { |
| 117 | + match self.dupes.get(import_idx as usize) { |
| 118 | + Some(b) => *b, |
| 119 | + None => false, |
| 120 | + } |
| 121 | + } |
| 122 | +} |
| 123 | + |
| 124 | +/// No duplicate imports were found, so I have nothing to do. |
| 125 | +struct NoDuplicatesFound; |
| 126 | + |
| 127 | +impl Display for NoDuplicatesFound { |
| 128 | + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
| 129 | + write!( |
| 130 | + f, |
| 131 | + "no duplicate functions were found, so there was nothing to do" |
| 132 | + ) |
| 133 | + } |
| 134 | +} |
| 135 | + |
| 136 | +#[derive(Default)] |
| 137 | +struct DedupingReencoder { |
| 138 | + remappings: Remappings, |
| 139 | +} |
| 140 | + |
| 141 | +type DeduperError = wasm_encoder::reencode::Error<NoDuplicatesFound>; |
| 142 | + |
| 143 | +impl Reencode for DedupingReencoder { |
| 144 | + type Error = NoDuplicatesFound; |
| 145 | + |
| 146 | + // Fortunately, the import section comes before any section we need to |
| 147 | + // modify. Thus, we don't have to decode the wasm twice. |
| 148 | + fn parse_import_section( |
| 149 | + &mut self, |
| 150 | + imports: &mut wasm_encoder::ImportSection, |
| 151 | + section: wasmparser::ImportSectionReader<'_>, |
| 152 | + ) -> Result<(), wasm_encoder::reencode::Error<Self::Error>> { |
| 153 | + self.remappings.imports(section.clone())?; |
| 154 | + |
| 155 | + // If no duplicates, take the fast path out. |
| 156 | + if self.remappings.is_empty() { |
| 157 | + return Err(DeduperError::UserError(NoDuplicatesFound)); |
| 158 | + } |
| 159 | + |
| 160 | + for (idx, import) in section.into_iter().enumerate() { |
| 161 | + if !self.remappings.is_duplicate(idx as u32) { |
| 162 | + self.parse_import(imports, import?)?; |
| 163 | + } |
| 164 | + } |
| 165 | + Ok(()) |
| 166 | + } |
| 167 | + |
| 168 | + fn function_index(&mut self, func_idx: u32) -> u32 { |
| 169 | + self.remappings.new_index_for(func_idx) |
| 170 | + } |
| 171 | + |
| 172 | + // Strip all but known-safe custom sections. |
| 173 | + // |
| 174 | + // Because custom sections are opaque to us, they may contain information |
| 175 | + // (like debugging info) which depends on byte offsets we've invalidated by |
| 176 | + // removing imports or on function indices that we didn't patch up. To be |
| 177 | + // conservative, we strip all such sections except those we know are okay. |
| 178 | + // At the moment, we retain producers and target_features sections, as well |
| 179 | + // as name sections (which Reencode does patch up). |
| 180 | + fn parse_custom_section( |
| 181 | + &mut self, |
| 182 | + module: &mut wasm_encoder::Module, |
| 183 | + section: wasmparser::CustomSectionReader<'_>, |
| 184 | + ) -> Result<(), wasm_encoder::reencode::Error<Self::Error>> { |
| 185 | + match section.as_known() { |
| 186 | + KnownCustom::Name(_) | KnownCustom::Producers(_) => { |
| 187 | + // Keep this section verbatim: |
| 188 | + parse_custom_section(self, module, section) |
| 189 | + } |
| 190 | + KnownCustom::Unknown if section.name() == "target_features" => { |
| 191 | + parse_custom_section(self, module, section) |
| 192 | + } |
| 193 | + // Strip others: |
| 194 | + _ => Ok(()), |
| 195 | + } |
| 196 | + } |
| 197 | +} |
| 198 | + |
| 199 | +/// Given a core wasm module that may contain duplicate function imports |
| 200 | +/// (repeats of module/name pairs), returns an equivalent one without the |
| 201 | +/// duplicates, rewriting references to those functions elsewhere in the module |
| 202 | +/// to compensate. |
| 203 | +pub fn dedupe_imports(module: &[u8]) -> Result<Cow<[u8]>, Error> { |
| 204 | + let mut new_module = wasm_encoder::Module::new(); |
| 205 | + let result = |
| 206 | + DedupingReencoder::default().parse_core_module(&mut new_module, Parser::new(0), module); |
| 207 | + match result { |
| 208 | + // Fast path: return module verbatim |
| 209 | + Err(DeduperError::UserError(NoDuplicatesFound)) => Ok(Cow::Borrowed(module)), |
| 210 | + // Rewrite module |
| 211 | + Ok(_) => Ok(Cow::Owned(new_module.finish())), |
| 212 | + Err(err) => Err(anyhow!("reencoding failed: {}", err)), |
| 213 | + } |
| 214 | +} |
| 215 | + |
| 216 | +#[cfg(test)] |
| 217 | +mod test { |
| 218 | + use super::*; |
| 219 | + use wasmparser::{GlobalType, ValType}; |
| 220 | + |
| 221 | + #[test] |
| 222 | + fn remappings_empty_state() -> Result<(), BinaryReaderError> { |
| 223 | + let mut remappings = Remappings::default(); |
| 224 | + assert!(remappings.is_empty()); |
| 225 | + assert!( |
| 226 | + !remappings.is_duplicate(0), |
| 227 | + "func idxs should default to not being duplicates." |
| 228 | + ); |
| 229 | + assert_eq!( |
| 230 | + remappings.new_index_for(7), |
| 231 | + 7, |
| 232 | + "new_index_for() should return unchanged indices unless it knows a better mapping." |
| 233 | + ); |
| 234 | + let imports = vec![func("A", "a"), func("B", "b"), func("C", "c")]; |
| 235 | + remappings.imports(imports)?; |
| 236 | + assert!( |
| 237 | + remappings.is_empty(), |
| 238 | + "remappings should show as empty when there are function imports but no duplicates." |
| 239 | + ); |
| 240 | + Ok(()) |
| 241 | + } |
| 242 | + |
| 243 | + /// Convenience to construct function imports for test data |
| 244 | + fn func<'a>(module: &'a str, name: &'a str) -> Result<Import<'a>, BinaryReaderError> { |
| 245 | + Ok(Import { |
| 246 | + module, |
| 247 | + name, |
| 248 | + ty: TypeRef::Func(0), // We don't care about this. |
| 249 | + }) |
| 250 | + } |
| 251 | + |
| 252 | + #[test] |
| 253 | + fn remappings_populated_state() -> Result<(), BinaryReaderError> { |
| 254 | + // We test contiguous and discontiguous duplicates, singletons, and |
| 255 | + // indices outside the mapping table (which represent local, unimported |
| 256 | + // functions). |
| 257 | + let imports = vec![ |
| 258 | + func("A", "a"), |
| 259 | + func("A", "a"), |
| 260 | + // Include a non-function import to make sure those are skipped and |
| 261 | + // don't take up a slot in our newly constructed func index space. |
| 262 | + Ok(Import { |
| 263 | + module: "non", |
| 264 | + name: "function", |
| 265 | + ty: TypeRef::Global(GlobalType { |
| 266 | + content_type: ValType::I32, |
| 267 | + mutable: false, |
| 268 | + shared: false, |
| 269 | + }), |
| 270 | + }), |
| 271 | + func("C", "c"), |
| 272 | + func("D", "d"), |
| 273 | + func("D", "d"), |
| 274 | + func("D", "d"), |
| 275 | + func("G", "g"), |
| 276 | + func("D", "d"), |
| 277 | + func("I", "i"), |
| 278 | + ]; |
| 279 | + let mut remappings = Remappings::default(); |
| 280 | + remappings.imports(imports)?; |
| 281 | + assert!(!remappings.is_empty()); |
| 282 | + assert!(!remappings.is_duplicate(0)); |
| 283 | + assert!(remappings.is_duplicate(1)); |
| 284 | + assert_eq!( |
| 285 | + (0..=9) // Go one beyond the length of the input. |
| 286 | + .map(|idx| remappings.new_index_for(idx)) |
| 287 | + .collect::<Vec<_>>(), |
| 288 | + vec![0, 0, 1, 2, 2, 2, 3, 2, 4, 5] |
| 289 | + ); |
| 290 | + Ok(()) |
| 291 | + } |
| 292 | + |
| 293 | + /// Show that non-function imports don't throw off the `is_duplicate()` |
| 294 | + /// positions of later imports. |
| 295 | + #[test] |
| 296 | + fn remappings_non_function_imports() -> Result<(), BinaryReaderError> { |
| 297 | + let imports = vec![ |
| 298 | + func("A", "a"), |
| 299 | + Ok(Import { |
| 300 | + module: "non", |
| 301 | + name: "function", |
| 302 | + ty: TypeRef::Global(GlobalType { |
| 303 | + content_type: ValType::I32, |
| 304 | + mutable: false, |
| 305 | + shared: false, |
| 306 | + }), |
| 307 | + }), |
| 308 | + func("A", "a"), |
| 309 | + ]; |
| 310 | + let mut remappings = Remappings::default(); |
| 311 | + remappings.imports(imports)?; |
| 312 | + assert!(!remappings.is_duplicate(1)); |
| 313 | + assert!(remappings.is_duplicate(2)); |
| 314 | + Ok(()) |
| 315 | + } |
| 316 | +} |
0 commit comments