|
| 1 | +use std::collections::HashMap; |
| 2 | + |
| 3 | +use auditable_serde::{Package, Source, VersionInfo}; |
| 4 | +use cargo_metadata::{ |
| 5 | + semver::{self, Version}, |
| 6 | + DependencyKind, |
| 7 | +}; |
| 8 | +use serde::{Deserialize, Serialize}; |
| 9 | + |
| 10 | +/// Cargo SBOM precursor format. |
| 11 | +#[derive(Debug, Clone, Serialize, Deserialize)] |
| 12 | +pub struct SbomPrecursor { |
| 13 | + /// Schema version |
| 14 | + pub version: u32, |
| 15 | + /// Index into the crates array for the root crate |
| 16 | + pub root: usize, |
| 17 | + /// Array of all crates |
| 18 | + pub crates: Vec<Crate>, |
| 19 | + /// Information about rustc used to perform the compilation |
| 20 | + pub rustc: RustcInfo, |
| 21 | +} |
| 22 | + |
| 23 | +impl From<SbomPrecursor> for VersionInfo { |
| 24 | + fn from(sbom: SbomPrecursor) -> Self { |
| 25 | + // cargo sbom data format has more nodes than the auditable info format - if a crate is both a build |
| 26 | + // and runtime dependency it will appear twice in the `crates` array. |
| 27 | + // The `VersionInfo` format lists each package only once, with a single `kind` field |
| 28 | + // (Runtime having precedence over other kinds). |
| 29 | + |
| 30 | + // Firstly, we deduplicate the (name, version) pairs and create a mapping from the |
| 31 | + // original indices in the cargo sbom array to the new index in the auditable info package array. |
| 32 | + let (_, mut packages, indices) = sbom.crates.iter().enumerate().fold( |
| 33 | + (HashMap::new(), Vec::new(), Vec::new()), |
| 34 | + |(mut id_to_index_map, mut packages, mut indices), (index, crate_)| { |
| 35 | + match id_to_index_map.entry(crate_.id.clone()) { |
| 36 | + std::collections::hash_map::Entry::Occupied(entry) => { |
| 37 | + // Just store the new index in the indices array |
| 38 | + indices.push(*entry.get()); |
| 39 | + } |
| 40 | + std::collections::hash_map::Entry::Vacant(entry) => { |
| 41 | + let (name, version, source) = parse_fully_qualified_package_id(&crate_.id); |
| 42 | + // If the entry does not exist, we create it |
| 43 | + packages.push(Package { |
| 44 | + name, |
| 45 | + version, |
| 46 | + source, |
| 47 | + // Assume build, if we determine this is a runtime dependency we'll update later |
| 48 | + kind: auditable_serde::DependencyKind::Build, |
| 49 | + // We will fill this in later |
| 50 | + dependencies: Vec::new(), |
| 51 | + root: index == sbom.root, |
| 52 | + }); |
| 53 | + entry.insert(packages.len() - 1); |
| 54 | + indices.push(packages.len() - 1); |
| 55 | + } |
| 56 | + } |
| 57 | + (id_to_index_map, packages, indices) |
| 58 | + }, |
| 59 | + ); |
| 60 | + |
| 61 | + // Traverse the graph as given by the sbom to fill in the dependencies with the new indices. |
| 62 | + // |
| 63 | + // Keep track of whether the dependency is a runtime dependency. |
| 64 | + // If we ever encounter a non-runtime dependency, all deps in the remaining subtree |
| 65 | + // are not runtime dependencies, i.e a runtime dep of a build dep is not recognized as a runtime dep. |
| 66 | + let mut stack = Vec::new(); |
| 67 | + stack.push((sbom.root, true)); |
| 68 | + while let Some((old_index, is_runtime)) = stack.pop() { |
| 69 | + let crate_ = &sbom.crates[old_index]; |
| 70 | + for dep in &crate_.dependencies { |
| 71 | + stack.push((dep.index, dep.kind == DependencyKind::Normal && is_runtime)); |
| 72 | + } |
| 73 | + |
| 74 | + let package = &mut packages[indices[old_index]]; |
| 75 | + if is_runtime { |
| 76 | + package.kind = auditable_serde::DependencyKind::Runtime |
| 77 | + }; |
| 78 | + |
| 79 | + for dep in &crate_.dependencies { |
| 80 | + let new_dep_index = indices[dep.index]; |
| 81 | + if package.dependencies.contains(&new_dep_index) { |
| 82 | + continue; // Already added this dependency |
| 83 | + } else if new_dep_index == indices[old_index] { |
| 84 | + // If the dependency is the same as the package itself, skip it |
| 85 | + continue; |
| 86 | + } else { |
| 87 | + package.dependencies.push(new_dep_index); |
| 88 | + } |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + VersionInfo { packages } |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +#[derive(Debug, Clone, Serialize, Deserialize)] |
| 97 | +pub struct Crate { |
| 98 | + /// Package ID specification |
| 99 | + pub id: String, |
| 100 | + /// List of target kinds |
| 101 | + pub kind: Vec<String>, |
| 102 | + /// Enabled feature flags |
| 103 | + pub features: Vec<String>, |
| 104 | + /// Dependencies for this crate |
| 105 | + pub dependencies: Vec<Dependency>, |
| 106 | +} |
| 107 | + |
| 108 | +#[derive(Debug, Clone, Serialize, Deserialize)] |
| 109 | +pub struct Dependency { |
| 110 | + /// Index into the crates array |
| 111 | + pub index: usize, |
| 112 | + /// Dependency kind: "normal", "build", or "dev" |
| 113 | + pub kind: DependencyKind, |
| 114 | +} |
| 115 | + |
| 116 | +#[derive(Debug, Clone, Serialize, Deserialize)] |
| 117 | +pub struct RustcInfo { |
| 118 | + /// Compiler version |
| 119 | + pub version: String, |
| 120 | + /// Compiler wrapper |
| 121 | + pub wrapper: Option<String>, |
| 122 | + /// Compiler workspace wrapper |
| 123 | + pub workspace_wrapper: Option<String>, |
| 124 | + /// Commit hash for rustc |
| 125 | + pub commit_hash: String, |
| 126 | + /// Host target triple |
| 127 | + pub host: String, |
| 128 | + /// Verbose version string: `rustc -vV` |
| 129 | + pub verbose_version: String, |
| 130 | +} |
| 131 | + |
| 132 | +const CRATES_IO_INDEX: &str = "https://github.com/rust-lang/crates.io-index"; |
| 133 | + |
| 134 | +/// Parses a fully qualified package ID spec string into a tuple of (name, version, source). |
| 135 | +/// The package ID spec format is defined at https://doc.rust-lang.org/cargo/reference/pkgid-spec.html#package-id-specifications-1 |
| 136 | +/// |
| 137 | +/// The fully qualified form of a package ID spec is mentioned in the Cargo documentation, |
| 138 | +/// figuring it out is left as an exercise to the reader. |
| 139 | +/// |
| 140 | +/// Adapting the grammar in the cargo doc, the format appears to be : |
| 141 | +/// ```norust |
| 142 | +/// fully_qualified_spec := kind "+" proto "://" hostname-and-path [ "?" query] "#" [ name "@" ] semver |
| 143 | +/// query := ( "branch" | "tag" | "rev" ) "=" ref |
| 144 | +/// semver := digits "." digits "." digits [ "-" prerelease ] [ "+" build ] |
| 145 | +/// kind := "registry" | "git" | "path" |
| 146 | +/// proto := "http" | "git" | "file" | ... |
| 147 | +/// ``` |
| 148 | +/// where: |
| 149 | +/// - the name is always present except when the kind is `path` and the last segment of the path doesn't match the name |
| 150 | +/// - the query string is only present for git dependencies (which we can ignore since we don't record git information) |
| 151 | +fn parse_fully_qualified_package_id(id: &str) -> (String, Version, Source) { |
| 152 | + let (kind, rest) = id.split_once('+').expect("Package ID to have a kind"); |
| 153 | + let (url, rest) = rest |
| 154 | + .split_once('#') |
| 155 | + .expect("Package ID to have version information"); |
| 156 | + let source = match (kind, url) { |
| 157 | + ("registry", CRATES_IO_INDEX) => Source::CratesIo, |
| 158 | + ("registry", _) => Source::Registry, |
| 159 | + ("git", _) => Source::Git, |
| 160 | + ("path", _) => Source::Local, |
| 161 | + _ => Source::Other(kind.to_string()), |
| 162 | + }; |
| 163 | + |
| 164 | + if source == Source::Local { |
| 165 | + // For local packages, the name might be in the suffix after '#' if it has |
| 166 | + // a diferent name than the last segment of the path. |
| 167 | + if let Some((name, version)) = rest.split_once('@') { |
| 168 | + ( |
| 169 | + name.to_string(), |
| 170 | + semver::Version::parse(version).expect("Version to be valid SemVer"), |
| 171 | + source, |
| 172 | + ) |
| 173 | + } else { |
| 174 | + // If no name is specified, use the last segment of the path as the name |
| 175 | + let name = url |
| 176 | + .split('/') |
| 177 | + .next_back() |
| 178 | + .unwrap() |
| 179 | + .split('\\') |
| 180 | + .next_back() |
| 181 | + .unwrap(); |
| 182 | + ( |
| 183 | + name.to_string(), |
| 184 | + semver::Version::parse(rest).expect("Version to be valid SemVer"), |
| 185 | + source, |
| 186 | + ) |
| 187 | + } |
| 188 | + } else { |
| 189 | + // For other sources, the name and version are after the '#', separated by '@' |
| 190 | + let (name, version) = rest |
| 191 | + .split_once('@') |
| 192 | + .expect("Package ID to have a name and version"); |
| 193 | + ( |
| 194 | + name.to_string(), |
| 195 | + semver::Version::parse(version).expect("Version to be valid SemVer"), |
| 196 | + source, |
| 197 | + ) |
| 198 | + } |
| 199 | +} |
0 commit comments