Skip to content
Merged
6 changes: 6 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1406,8 +1406,11 @@ name = "generate-copyright"
version = "0.1.0"
dependencies = [
"anyhow",
"cargo_metadata 0.18.1",
"rinja",
"serde",
"serde_json",
"thiserror",
]

[[package]]
Expand Down Expand Up @@ -3103,7 +3106,10 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d3762e3740cdbf2fd2be465cc2c26d643ad17353cc2e0223d211c1b096118bd"
dependencies = [
"humansize",
"itoa",
"num-traits",
"percent-encoding",
"rinja_derive",
]

Expand Down
2 changes: 1 addition & 1 deletion REUSE.toml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ SPDX-License-Identifier = "MIT OR Apache-2.0"
path = "src/llvm-project/**"
precedence = "override"
SPDX-FileCopyrightText = [
"2003-2019 by the contributors listed in [CREDITS.TXT](https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)",
"2003-2019 by the contributors listed in CREDITS.TXT (https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)",
"2010 Apple Inc",
"2003-2019 University of Illinois at Urbana-Champaign.",
]
Expand Down
4 changes: 3 additions & 1 deletion src/bootstrap/src/core/build_steps/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,13 @@ impl Step for GenerateCopyright {
let license_metadata = builder.ensure(CollectLicenseMetadata);

// Temporary location, it will be moved to the proper one once it's accurate.
let dest = builder.out.join("COPYRIGHT.md");
let dest = builder.out.join("COPYRIGHT.html");

let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
cmd.env("LICENSE_METADATA", &license_metadata);
cmd.env("DEST", &dest);
cmd.env("OUT_DIR", &builder.out);
cmd.env("CARGO", &builder.initial_cargo);
cmd.run(builder);

dest
Expand Down
2 changes: 2 additions & 0 deletions src/tools/collect-license-metadata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
name = "collect-license-metadata"
version = "0.1.0"
edition = "2021"
description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed"
license = "MIT OR Apache-2.0"

[dependencies]
anyhow = "1.0.65"
Expand Down
5 changes: 5 additions & 0 deletions src/tools/collect-license-metadata/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ use anyhow::Error;

use crate::licenses::LicensesInterner;

/// The entry point to the binary.
///
/// You should probably let `bootstrap` execute this program instead of running it directly.
///
/// Run `x.py run collect-license-metadata`
fn main() -> Result<(), Error> {
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();
Expand Down
4 changes: 4 additions & 0 deletions src/tools/generate-copyright/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
name = "generate-copyright"
version = "0.1.0"
edition = "2021"
description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0.65"
cargo_metadata = "0.18.1"
rinja = "0.3.0"
serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.85"
thiserror = "1"
191 changes: 191 additions & 0 deletions src/tools/generate-copyright/src/cargo_metadata.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
//! Gets metadata about a workspace from Cargo

use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};

/// Describes how this module can fail
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("I/O Error: {0:?}")]
Io(#[from] std::io::Error),
#[error("Failed get output from cargo-metadata: {0:?}")]
GettingMetadata(#[from] cargo_metadata::Error),
#[error("Failed to run cargo vendor: {0:?}")]
LaunchingVendor(std::io::Error),
#[error("Failed to complete cargo vendor")]
RunningVendor,
#[error("Bad path {0:?} whilst scraping files")]
Scraping(PathBuf),
}

/// Uniquely describes a package on crates.io
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Package {
/// The name of the package
pub name: String,
/// The version number
pub version: String,
}

/// Extra data about a package
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct PackageMetadata {
/// The license it is under
pub license: String,
/// The list of authors from the package metadata
pub authors: Vec<String>,
/// A list of important files from the package, with their contents.
///
/// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive.
pub notices: BTreeMap<String, String>,
/// If this is true, this dep is in the Rust Standard Library
pub is_in_libstd: Option<bool>,
}

/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data.
///
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
/// grab the license files.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
pub fn get_metadata_and_notices(
cargo: &Path,
dest: &Path,
root_path: &Path,
manifest_paths: &[&Path],
) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
let mut output = get_metadata(cargo, root_path, manifest_paths)?;

// Now do a cargo-vendor and grab everything
let vendor_path = dest.join("vendor");
println!("Vendoring deps into {}...", vendor_path.display());
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;

// Now for each dependency we found, go and grab any important looking files
for (package, metadata) in output.iter_mut() {
load_important_files(package, metadata, &vendor_path)?;
}

Ok(output)
}

/// Use `cargo metadata` to get a list of dependencies and their license data.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
pub fn get_metadata(
cargo: &Path,
root_path: &Path,
manifest_paths: &[&Path],
) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
let mut output = BTreeMap::new();
// Look at the metadata for each manifest
for manifest_path in manifest_paths {
if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
panic!("cargo_manifest::get requires a path to a Cargo.toml file");
}
let metadata = cargo_metadata::MetadataCommand::new()
.cargo_path(cargo)
.env("RUSTC_BOOTSTRAP", "1")
.manifest_path(manifest_path)
.exec()?;
for package in metadata.packages {
let manifest_path = package.manifest_path.as_path();
if manifest_path.starts_with(root_path) {
// it's an in-tree dependency and reuse covers it
continue;
}
// otherwise it's an out-of-tree dependency
let package_id = Package { name: package.name, version: package.version.to_string() };
output.insert(
package_id,
PackageMetadata {
license: package.license.unwrap_or_else(|| String::from("Unspecified")),
authors: package.authors,
notices: BTreeMap::new(),
is_in_libstd: None,
},
);
}
}

Ok(output)
}

/// Run cargo-vendor, fetching into the given dir
fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> {
let mut vendor_command = std::process::Command::new(cargo);
vendor_command.env("RUSTC_BOOTSTRAP", "1");
vendor_command.arg("vendor");
vendor_command.arg("--quiet");
vendor_command.arg("--versioned-dirs");
for manifest_path in manifest_paths {
vendor_command.arg("-s");
vendor_command.arg(manifest_path);
}
vendor_command.arg(dest);

let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?;

if !vendor_status.success() {
return Err(Error::RunningVendor);
}

Ok(())
}

/// Add important files off disk into this dependency.
///
/// Maybe one-day Cargo.toml will contain enough information that we don't need
/// to do this manual scraping.
fn load_important_files(
package: &Package,
dep: &mut PackageMetadata,
vendor_root: &Path,
) -> Result<(), Error> {
let name_version = format!("{}-{}", package.name, package.version);
println!("Scraping notices for {}...", name_version);
let dep_vendor_path = vendor_root.join(name_version);
for entry in std::fs::read_dir(dep_vendor_path)? {
let entry = entry?;
let metadata = entry.metadata()?;
let path = entry.path();
let Some(filename) = path.file_name() else {
return Err(Error::Scraping(path));
};
let lc_filename = filename.to_ascii_lowercase();
let lc_filename_str = lc_filename.to_string_lossy();
let mut keep = false;
for m in ["copyright", "licence", "license", "author", "notice"] {
if lc_filename_str.contains(m) {
keep = true;
break;
}
}
if keep {
if metadata.is_dir() {
for inner_entry in std::fs::read_dir(entry.path())? {
let inner_entry = inner_entry?;
if inner_entry.metadata()?.is_file() {
let inner_filename = inner_entry.file_name();
let inner_filename_str = inner_filename.to_string_lossy();
let qualified_filename =
format!("{}/{}", lc_filename_str, inner_filename_str);
println!("Scraping {}", qualified_filename);
dep.notices.insert(
qualified_filename.to_string(),
std::fs::read_to_string(inner_entry.path())?,
);
}
}
} else if metadata.is_file() {
let filename = filename.to_string_lossy();
println!("Scraping {}", filename);
dep.notices.insert(filename.to_string(), std::fs::read_to_string(path)?);
}
}
}
Ok(())
}
Loading