Skip to content

Commit 7f9ff85

Browse files
authored
Merge pull request #213 from tofay/cargo-sbom
Use cargo SBOM precursor files, if available
2 parents 4edf5c7 + 37cd4fb commit 7f9ff85

File tree

16 files changed

+397
-31
lines changed

16 files changed

+397
-31
lines changed

Cargo.lock

Lines changed: 12 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,4 +104,4 @@ Do not rely on SBOMs when dealing with supply chain attacks!
104104

105105
### What is blocking uplifting this into Cargo?
106106

107-
The [RFC for this functionality in Cargo itself](https://github.com/rust-lang/rfcs/pull/2801) has been [postponed](https://github.com/rust-lang/rfcs/pull/2801#issuecomment-2122880841) by the Cargo team until the [more foundational SBOM RFC](https://github.com/rust-lang/rfcs/pull/3553) is implemented.
107+
The [RFC for this functionality in Cargo itself](https://github.com/rust-lang/rfcs/pull/2801) has been [postponed](https://github.com/rust-lang/rfcs/pull/2801#issuecomment-2122880841) by the Cargo team until the [more foundational SBOM RFC](https://github.com/rust-lang/rfcs/pull/3553) is implemented. That RFC has now been implemented and is available via an [unstable feature](https://doc.rust-lang.org/cargo/reference/unstable.html#sbom). cargo-auditable integrates with this: if you enable that feature and build with cargo auditable, e.g with `CARGO_BUILD_SBOM=true cargo auditable -Z sbom build` and a nightly Rust toolchain, then cargo auditable will use the SBOM precursor files generated by cargo.

cargo-auditable/src/collect_audit_data.rs

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,30 @@ use std::str::from_utf8;
44

55
use crate::{
66
auditable_from_metadata::encode_audit_data, cargo_arguments::CargoArgs,
7-
rustc_arguments::RustcArgs,
7+
rustc_arguments::RustcArgs, sbom_precursor,
88
};
99

1010
/// Calls `cargo metadata` to obtain the dependency tree, serializes it to JSON and compresses it
1111
pub fn compressed_dependency_list(rustc_args: &RustcArgs, target_triple: &str) -> Vec<u8> {
12-
let metadata = get_metadata(rustc_args, target_triple);
13-
let version_info = encode_audit_data(&metadata).unwrap();
12+
let sbom_path = std::env::var_os("CARGO_SBOM_PATH");
13+
14+
// If cargo has created precursor SBOM files, use them instead of `cargo metadata`.
15+
let version_info = if sbom_path.as_ref().map(|p| !p.is_empty()).unwrap_or(false) {
16+
// Cargo creates an SBOM file for each output file (rlib, bin, cdylib, etc),
17+
// but the SBOM file is identical for each output file in a given rustc crate compilation,
18+
// so we can just use the first SBOM we find.
19+
let sbom_path = std::env::split_paths(&sbom_path.unwrap()).next().unwrap();
20+
let sbom_data: Vec<u8> = std::fs::read(&sbom_path)
21+
.unwrap_or_else(|_| panic!("Failed to read SBOM file at {}", sbom_path.display()));
22+
let sbom_precursor: sbom_precursor::SbomPrecursor = serde_json::from_slice(&sbom_data)
23+
.unwrap_or_else(|_| panic!("Failed to parse SBOM file at {}", sbom_path.display()));
24+
sbom_precursor.into()
25+
} else {
26+
// If no SBOM files are available, fall back to `cargo metadata`
27+
let metadata = get_metadata(rustc_args, target_triple);
28+
encode_audit_data(&metadata).unwrap()
29+
};
30+
1431
let json = serde_json::to_string(&version_info).unwrap();
1532
// compression level 7 makes this complete in a few milliseconds, so no need to drop to a lower level in debug mode
1633
let compressed_json = compress_to_vec_zlib(json.as_bytes(), 7);

cargo-auditable/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ mod object_file;
99
mod platform_detection;
1010
mod rustc_arguments;
1111
mod rustc_wrapper;
12+
mod sbom_precursor;
1213
mod target_info;
1314

1415
use std::process::exit;

cargo-auditable/src/sbom_precursor.rs

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
use std::collections::HashMap;
2+
3+
use auditable_serde::{Package, Source, VersionInfo};
4+
use cargo_metadata::{
5+
semver::{self, Version},
6+
DependencyKind,
7+
};
8+
use serde::{Deserialize, Serialize};
9+
10+
/// Cargo SBOM precursor format.
11+
#[derive(Debug, Clone, Serialize, Deserialize)]
12+
pub struct SbomPrecursor {
13+
/// Schema version
14+
pub version: u32,
15+
/// Index into the crates array for the root crate
16+
pub root: usize,
17+
/// Array of all crates
18+
pub crates: Vec<Crate>,
19+
/// Information about rustc used to perform the compilation
20+
pub rustc: RustcInfo,
21+
}
22+
23+
impl From<SbomPrecursor> for VersionInfo {
24+
fn from(sbom: SbomPrecursor) -> Self {
25+
// cargo sbom data format has more nodes than the auditable info format - if a crate is both a build
26+
// and runtime dependency it will appear twice in the `crates` array.
27+
// The `VersionInfo` format lists each package only once, with a single `kind` field
28+
// (Runtime having precedence over other kinds).
29+
30+
// Firstly, we deduplicate the (name, version) pairs and create a mapping from the
31+
// original indices in the cargo sbom array to the new index in the auditable info package array.
32+
let (_, mut packages, indices) = sbom.crates.iter().enumerate().fold(
33+
(HashMap::new(), Vec::new(), Vec::new()),
34+
|(mut id_to_index_map, mut packages, mut indices), (index, crate_)| {
35+
match id_to_index_map.entry(crate_.id.clone()) {
36+
std::collections::hash_map::Entry::Occupied(entry) => {
37+
// Just store the new index in the indices array
38+
indices.push(*entry.get());
39+
}
40+
std::collections::hash_map::Entry::Vacant(entry) => {
41+
let (name, version, source) = parse_fully_qualified_package_id(&crate_.id);
42+
// If the entry does not exist, we create it
43+
packages.push(Package {
44+
name,
45+
version,
46+
source,
47+
// Assume build, if we determine this is a runtime dependency we'll update later
48+
kind: auditable_serde::DependencyKind::Build,
49+
// We will fill this in later
50+
dependencies: Vec::new(),
51+
root: index == sbom.root,
52+
});
53+
entry.insert(packages.len() - 1);
54+
indices.push(packages.len() - 1);
55+
}
56+
}
57+
(id_to_index_map, packages, indices)
58+
},
59+
);
60+
61+
// Traverse the graph as given by the sbom to fill in the dependencies with the new indices.
62+
//
63+
// Keep track of whether the dependency is a runtime dependency.
64+
// If we ever encounter a non-runtime dependency, all deps in the remaining subtree
65+
// are not runtime dependencies, i.e a runtime dep of a build dep is not recognized as a runtime dep.
66+
let mut stack = Vec::new();
67+
stack.push((sbom.root, true));
68+
while let Some((old_index, is_runtime)) = stack.pop() {
69+
let crate_ = &sbom.crates[old_index];
70+
for dep in &crate_.dependencies {
71+
stack.push((dep.index, dep.kind == DependencyKind::Normal && is_runtime));
72+
}
73+
74+
let package = &mut packages[indices[old_index]];
75+
if is_runtime {
76+
package.kind = auditable_serde::DependencyKind::Runtime
77+
};
78+
79+
for dep in &crate_.dependencies {
80+
let new_dep_index = indices[dep.index];
81+
if package.dependencies.contains(&new_dep_index) {
82+
continue; // Already added this dependency
83+
} else if new_dep_index == indices[old_index] {
84+
// If the dependency is the same as the package itself, skip it
85+
continue;
86+
} else {
87+
package.dependencies.push(new_dep_index);
88+
}
89+
}
90+
}
91+
92+
VersionInfo { packages }
93+
}
94+
}
95+
96+
#[derive(Debug, Clone, Serialize, Deserialize)]
97+
pub struct Crate {
98+
/// Package ID specification
99+
pub id: String,
100+
/// List of target kinds
101+
pub kind: Vec<String>,
102+
/// Enabled feature flags
103+
pub features: Vec<String>,
104+
/// Dependencies for this crate
105+
pub dependencies: Vec<Dependency>,
106+
}
107+
108+
#[derive(Debug, Clone, Serialize, Deserialize)]
109+
pub struct Dependency {
110+
/// Index into the crates array
111+
pub index: usize,
112+
/// Dependency kind: "normal", "build", or "dev"
113+
pub kind: DependencyKind,
114+
}
115+
116+
#[derive(Debug, Clone, Serialize, Deserialize)]
117+
pub struct RustcInfo {
118+
/// Compiler version
119+
pub version: String,
120+
/// Compiler wrapper
121+
pub wrapper: Option<String>,
122+
/// Compiler workspace wrapper
123+
pub workspace_wrapper: Option<String>,
124+
/// Commit hash for rustc
125+
pub commit_hash: String,
126+
/// Host target triple
127+
pub host: String,
128+
/// Verbose version string: `rustc -vV`
129+
pub verbose_version: String,
130+
}
131+
132+
const CRATES_IO_INDEX: &str = "https://github.com/rust-lang/crates.io-index";
133+
134+
/// Parses a fully qualified package ID spec string into a tuple of (name, version, source).
135+
/// The package ID spec format is defined at https://doc.rust-lang.org/cargo/reference/pkgid-spec.html#package-id-specifications-1
136+
///
137+
/// The fully qualified form of a package ID spec is mentioned in the Cargo documentation,
138+
/// figuring it out is left as an exercise to the reader.
139+
///
140+
/// Adapting the grammar in the cargo doc, the format appears to be :
141+
/// ```norust
142+
/// fully_qualified_spec := kind "+" proto "://" hostname-and-path [ "?" query] "#" [ name "@" ] semver
143+
/// query := ( "branch" | "tag" | "rev" ) "=" ref
144+
/// semver := digits "." digits "." digits [ "-" prerelease ] [ "+" build ]
145+
/// kind := "registry" | "git" | "path"
146+
/// proto := "http" | "git" | "file" | ...
147+
/// ```
148+
/// where:
149+
/// - the name is always present except when the kind is `path` and the last segment of the path doesn't match the name
150+
/// - the query string is only present for git dependencies (which we can ignore since we don't record git information)
151+
fn parse_fully_qualified_package_id(id: &str) -> (String, Version, Source) {
152+
let (kind, rest) = id.split_once('+').expect("Package ID to have a kind");
153+
let (url, rest) = rest
154+
.split_once('#')
155+
.expect("Package ID to have version information");
156+
let source = match (kind, url) {
157+
("registry", CRATES_IO_INDEX) => Source::CratesIo,
158+
("registry", _) => Source::Registry,
159+
("git", _) => Source::Git,
160+
("path", _) => Source::Local,
161+
_ => Source::Other(kind.to_string()),
162+
};
163+
164+
if source == Source::Local {
165+
// For local packages, the name might be in the suffix after '#' if it has
166+
// a diferent name than the last segment of the path.
167+
if let Some((name, version)) = rest.split_once('@') {
168+
(
169+
name.to_string(),
170+
semver::Version::parse(version).expect("Version to be valid SemVer"),
171+
source,
172+
)
173+
} else {
174+
// If no name is specified, use the last segment of the path as the name
175+
let name = url
176+
.split('/')
177+
.next_back()
178+
.unwrap()
179+
.split('\\')
180+
.next_back()
181+
.unwrap();
182+
(
183+
name.to_string(),
184+
semver::Version::parse(rest).expect("Version to be valid SemVer"),
185+
source,
186+
)
187+
}
188+
} else {
189+
// For other sources, the name and version are after the '#', separated by '@'
190+
let (name, version) = rest
191+
.split_once('@')
192+
.expect("Package ID to have a name and version");
193+
(
194+
name.to_string(),
195+
semver::Version::parse(version).expect("Version to be valid SemVer"),
196+
source,
197+
)
198+
}
199+
}

cargo-auditable/tests/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Cargo.lock
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
fn main() {}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
fn main() {}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[package]
2+
name = "bar"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
8+
[workspace]
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
pub fn add(left: u64, right: u64) -> u64 {
2+
left + right
3+
}
4+
5+
#[cfg(test)]
6+
mod tests {
7+
use super::*;
8+
9+
#[test]
10+
fn it_works() {
11+
let result = add(2, 2);
12+
assert_eq!(result, 4);
13+
}
14+
}

0 commit comments

Comments
 (0)