|
| 1 | +//! Filesystem operations for [`Resolve`]. |
| 2 | +
|
| 3 | +use alloc::format; |
| 4 | +use alloc::string::ToString; |
| 5 | +use std::path::Path; |
| 6 | +use std::vec::Vec; |
| 7 | + |
| 8 | +use anyhow::{Context, Result, bail}; |
| 9 | + |
| 10 | +use super::{PackageSources, Resolve}; |
| 11 | +use crate::{IndexSet, UnresolvedPackageGroup}; |
| 12 | + |
| 13 | +/// All the sources used during resolving a directory or path. |
| 14 | +#[derive(Clone, Debug)] |
| 15 | +pub struct PackageSourceMap { |
| 16 | + inner: PackageSources, |
| 17 | +} |
| 18 | + |
| 19 | +impl PackageSourceMap { |
| 20 | + pub(super) fn from_single_source(package_id: super::PackageId, source: &Path) -> Self { |
| 21 | + use alloc::collections::BTreeMap; |
| 22 | + use alloc::vec; |
| 23 | + |
| 24 | + Self { |
| 25 | + inner: PackageSources { |
| 26 | + sources: vec![vec![source.display().to_string()]], |
| 27 | + package_id_to_source_map_idx: BTreeMap::from([(package_id, 0)]), |
| 28 | + }, |
| 29 | + } |
| 30 | + } |
| 31 | + |
| 32 | + pub(super) fn from_inner(inner: PackageSources) -> Self { |
| 33 | + Self { inner } |
| 34 | + } |
| 35 | + |
| 36 | + /// All unique source paths. |
| 37 | + pub fn paths(&self) -> impl Iterator<Item = &Path> { |
| 38 | + // Usually any two source map should not have duplicated source paths, |
| 39 | + // but it can happen, e.g. with using [`Resolve::push_str`] directly. |
| 40 | + // To be sure we use a set for deduplication here. |
| 41 | + self.inner |
| 42 | + .sources |
| 43 | + .iter() |
| 44 | + .flatten() |
| 45 | + .map(|s| Path::new(s)) |
| 46 | + .collect::<IndexSet<&Path>>() |
| 47 | + .into_iter() |
| 48 | + } |
| 49 | + |
| 50 | + /// Source paths for package |
| 51 | + pub fn package_paths(&self, id: super::PackageId) -> Option<impl Iterator<Item = &Path>> { |
| 52 | + self.inner |
| 53 | + .package_id_to_source_map_idx |
| 54 | + .get(&id) |
| 55 | + .map(|&idx| self.inner.sources[idx].iter().map(|s| Path::new(s))) |
| 56 | + } |
| 57 | +} |
| 58 | + |
| 59 | +enum ParsedFile { |
| 60 | + #[cfg(feature = "decoding")] |
| 61 | + Package(super::PackageId), |
| 62 | + Unresolved(UnresolvedPackageGroup), |
| 63 | +} |
| 64 | + |
| 65 | +impl Resolve { |
| 66 | + /// Parse WIT packages from the input `path`. |
| 67 | + /// |
| 68 | + /// The input `path` can be one of: |
| 69 | + /// |
| 70 | + /// * A directory containing a WIT package with an optional `deps` directory |
| 71 | + /// for local dependencies. In this case `deps` is parsed first and then |
| 72 | + /// the parent `path` is parsed and returned. |
| 73 | + /// * A single standalone WIT file. |
| 74 | + /// * A wasm-encoded WIT package as a single file in either the text or |
| 75 | + /// binary format. |
| 76 | + /// |
| 77 | + /// More information can also be found at [`Resolve::push_dir`] and |
| 78 | + /// [`Resolve::push_file`]. |
| 79 | + pub fn push_path( |
| 80 | + &mut self, |
| 81 | + path: impl AsRef<Path>, |
| 82 | + ) -> Result<(super::PackageId, PackageSourceMap)> { |
| 83 | + self._push_path(path.as_ref()) |
| 84 | + } |
| 85 | + |
| 86 | + fn _push_path(&mut self, path: &Path) -> Result<(super::PackageId, PackageSourceMap)> { |
| 87 | + if path.is_dir() { |
| 88 | + self.push_dir(path).with_context(|| { |
| 89 | + format!( |
| 90 | + "failed to resolve directory while parsing WIT for path [{}]", |
| 91 | + path.display() |
| 92 | + ) |
| 93 | + }) |
| 94 | + } else { |
| 95 | + let id = self.push_file(path)?; |
| 96 | + Ok((id, PackageSourceMap::from_single_source(id, path))) |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + /// Parses the filesystem directory at `path` as a WIT package and returns |
| 101 | + /// a fully resolved [`PackageId`] list as a result. |
| 102 | + /// |
| 103 | + /// The directory itself is parsed with [`UnresolvedPackageGroup::parse_dir`] |
| 104 | + /// and then all packages found are inserted into this `Resolve`. The `path` |
| 105 | + /// specified may have a `deps` subdirectory which is probed automatically |
| 106 | + /// for any other WIT dependencies. |
| 107 | + /// |
| 108 | + /// The `deps` folder may contain: |
| 109 | + /// |
| 110 | + /// * `$path/deps/my-package/*.wit` - a directory that may contain multiple |
| 111 | + /// WIT files. This is parsed with [`UnresolvedPackageGroup::parse_dir`] |
| 112 | + /// and then inserted into this [`Resolve`]. Note that cannot recursively |
| 113 | + /// contain a `deps` directory. |
| 114 | + /// * `$path/deps/my-package.wit` - a single-file WIT package. This is |
| 115 | + /// parsed with [`Resolve::push_file`] and then added to `self` for |
| 116 | + /// name resolution. |
| 117 | + /// * `$path/deps/my-package.{wasm,wat}` - a wasm-encoded WIT package either |
| 118 | + /// in the text for binary format. |
| 119 | + /// |
| 120 | + /// In all cases entries in the `deps` folder are added to `self` first |
| 121 | + /// before adding files found in `path` itself. All WIT packages found are |
| 122 | + /// candidates for name-based resolution that other packages may use. |
| 123 | + /// |
| 124 | + /// This function returns a tuple of two values. The first value is a |
| 125 | + /// [`PackageId`], which represents the main WIT package found within |
| 126 | + /// `path`. This argument is useful for passing to [`Resolve::select_world`] |
| 127 | + /// for choosing something to bindgen with. |
| 128 | + /// |
| 129 | + /// The second value returned is a [`PackageSourceMap`], which contains all the sources |
| 130 | + /// that were parsed during resolving. This can be useful for: |
| 131 | + /// * build systems that want to rebuild bindings whenever one of the files changed |
| 132 | + /// * or other tools, which want to identify the sources for the resolved packages |
| 133 | + pub fn push_dir( |
| 134 | + &mut self, |
| 135 | + path: impl AsRef<Path>, |
| 136 | + ) -> Result<(super::PackageId, PackageSourceMap)> { |
| 137 | + self._push_dir(path.as_ref()) |
| 138 | + } |
| 139 | + |
| 140 | + fn _push_dir(&mut self, path: &Path) -> Result<(super::PackageId, PackageSourceMap)> { |
| 141 | + let top_pkg = UnresolvedPackageGroup::parse_dir(path) |
| 142 | + .with_context(|| format!("failed to parse package: {}", path.display()))?; |
| 143 | + let deps = path.join("deps"); |
| 144 | + let deps = self |
| 145 | + .parse_deps_dir(&deps) |
| 146 | + .with_context(|| format!("failed to parse dependency directory: {}", deps.display()))?; |
| 147 | + |
| 148 | + let (pkg_id, inner) = self.sort_unresolved_packages(top_pkg, deps)?; |
| 149 | + Ok((pkg_id, PackageSourceMap::from_inner(inner))) |
| 150 | + } |
| 151 | + |
| 152 | + fn parse_deps_dir(&mut self, path: &Path) -> Result<Vec<UnresolvedPackageGroup>> { |
| 153 | + let mut ret = Vec::new(); |
| 154 | + if !path.exists() { |
| 155 | + return Ok(ret); |
| 156 | + } |
| 157 | + let mut entries = path |
| 158 | + .read_dir() |
| 159 | + .and_then(|i| i.collect::<std::io::Result<Vec<_>>>()) |
| 160 | + .context("failed to read directory")?; |
| 161 | + entries.sort_by_key(|e| e.file_name()); |
| 162 | + for dep in entries { |
| 163 | + let path = dep.path(); |
| 164 | + let pkg = if dep.file_type()?.is_dir() || path.metadata()?.is_dir() { |
| 165 | + // If this entry is a directory or a symlink point to a |
| 166 | + // directory then always parse it as an `UnresolvedPackage` |
| 167 | + // since it's intentional to not support recursive `deps` |
| 168 | + // directories. |
| 169 | + UnresolvedPackageGroup::parse_dir(&path) |
| 170 | + .with_context(|| format!("failed to parse package: {}", path.display()))? |
| 171 | + } else { |
| 172 | + // If this entry is a file then we may want to ignore it but |
| 173 | + // this may also be a standalone WIT file or a `*.wasm` or |
| 174 | + // `*.wat` encoded package. |
| 175 | + let filename = dep.file_name(); |
| 176 | + match Path::new(&filename).extension().and_then(|s| s.to_str()) { |
| 177 | + Some("wit") | Some("wat") | Some("wasm") => match self._push_file(&path)? { |
| 178 | + #[cfg(feature = "decoding")] |
| 179 | + ParsedFile::Package(_) => continue, |
| 180 | + ParsedFile::Unresolved(pkg) => pkg, |
| 181 | + }, |
| 182 | + |
| 183 | + // Other files in deps dir are ignored for now to avoid |
| 184 | + // accidentally including things like `.DS_Store` files in |
| 185 | + // the call below to `parse_dir`. |
| 186 | + _ => continue, |
| 187 | + } |
| 188 | + }; |
| 189 | + ret.push(pkg); |
| 190 | + } |
| 191 | + Ok(ret) |
| 192 | + } |
| 193 | + |
| 194 | + /// Parses the contents of `path` from the filesystem and pushes the result |
| 195 | + /// into this `Resolve`. |
| 196 | + /// |
| 197 | + /// The `path` referenced here can be one of: |
| 198 | + /// |
| 199 | + /// * A WIT file. Note that in this case this single WIT file will be the |
| 200 | + /// entire package and any dependencies it has must already be in `self`. |
| 201 | + /// * A WIT package encoded as WebAssembly, either in text or binary form. |
| 202 | + /// In this the package and all of its dependencies are automatically |
| 203 | + /// inserted into `self`. |
| 204 | + /// |
| 205 | + /// In both situations the `PackageId`s of the resulting resolved packages |
| 206 | + /// are returned from this method. The return value is mostly useful in |
| 207 | + /// conjunction with [`Resolve::select_world`]. |
| 208 | + pub fn push_file(&mut self, path: impl AsRef<Path>) -> Result<super::PackageId> { |
| 209 | + match self._push_file(path.as_ref())? { |
| 210 | + #[cfg(feature = "decoding")] |
| 211 | + ParsedFile::Package(id) => Ok(id), |
| 212 | + ParsedFile::Unresolved(pkg) => self.push_group(pkg), |
| 213 | + } |
| 214 | + } |
| 215 | + |
| 216 | + fn _push_file(&mut self, path: &Path) -> Result<ParsedFile> { |
| 217 | + let contents = std::fs::read(path) |
| 218 | + .with_context(|| format!("failed to read path for WIT [{}]", path.display()))?; |
| 219 | + |
| 220 | + // If decoding is enabled at compile time then try to see if this is a |
| 221 | + // wasm file. |
| 222 | + #[cfg(feature = "decoding")] |
| 223 | + { |
| 224 | + use crate::decoding::{DecodedWasm, decode}; |
| 225 | + |
| 226 | + #[cfg(feature = "wat")] |
| 227 | + let is_wasm = wat::Detect::from_bytes(&contents).is_wasm(); |
| 228 | + #[cfg(not(feature = "wat"))] |
| 229 | + let is_wasm = wasmparser::Parser::is_component(&contents); |
| 230 | + |
| 231 | + if is_wasm { |
| 232 | + #[cfg(feature = "wat")] |
| 233 | + let contents = wat::parse_bytes(&contents).map_err(|mut e| { |
| 234 | + e.set_path(path); |
| 235 | + e |
| 236 | + })?; |
| 237 | + |
| 238 | + match decode(&contents)? { |
| 239 | + DecodedWasm::Component(..) => { |
| 240 | + bail!("found an actual component instead of an encoded WIT package in wasm") |
| 241 | + } |
| 242 | + DecodedWasm::WitPackage(resolve, pkg) => { |
| 243 | + let remap = self.merge(resolve)?; |
| 244 | + return Ok(ParsedFile::Package(remap.packages[pkg.index()])); |
| 245 | + } |
| 246 | + } |
| 247 | + } |
| 248 | + } |
| 249 | + |
| 250 | + // If this wasn't a wasm file then assume it's a WIT file. |
| 251 | + let text = match core::str::from_utf8(&contents) { |
| 252 | + Ok(s) => s, |
| 253 | + Err(_) => bail!("input file is not valid utf-8 [{}]", path.display()), |
| 254 | + }; |
| 255 | + let pkgs = UnresolvedPackageGroup::parse(path, text)?; |
| 256 | + Ok(ParsedFile::Unresolved(pkgs)) |
| 257 | + } |
| 258 | + |
| 259 | + /// Convenience method for combining [`UnresolvedPackageGroup::parse`] and |
| 260 | + /// [`Resolve::push_group`]. |
| 261 | + /// |
| 262 | + /// The `path` provided is used for error messages but otherwise is not |
| 263 | + /// read. This method does not touch the filesystem. The `contents` provided |
| 264 | + /// are the contents of a WIT package. |
| 265 | + pub fn push_str(&mut self, path: impl AsRef<Path>, contents: &str) -> Result<super::PackageId> { |
| 266 | + let path = path |
| 267 | + .as_ref() |
| 268 | + .to_str() |
| 269 | + .ok_or_else(|| anyhow::anyhow!("path is not valid utf-8: {:?}", path.as_ref()))?; |
| 270 | + self.push_source(path, contents) |
| 271 | + } |
| 272 | +} |
0 commit comments