diff --git a/Cargo.lock b/Cargo.lock index 37051bc1..95b2fcba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -241,6 +241,28 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum-extra" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45bf463831f5131b7d3c756525b305d40f1185b688565648a92e1392ca35713d" +dependencies = [ + "axum", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "serde", + "tower", + "tower-layer", + "tower-service", +] + [[package]] name = "backtrace" version = "0.3.64" @@ -839,6 +861,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "foreign-types" version = "0.3.2" @@ -1077,6 +1105,9 @@ name = "hashbrown" version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "foldhash", +] [[package]] name = "heck" @@ -1302,6 +1333,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "imara-diff" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c" +dependencies = [ + "hashbrown 0.15.2", + "memchr", +] + [[package]] name = "indexmap" version = "2.9.0" @@ -1486,9 +1527,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "mime" @@ -3045,6 +3086,7 @@ dependencies = [ "anyhow", "async-trait", "axum", + "axum-extra", "bon", "bytes", "chrono", @@ -3060,12 +3102,14 @@ dependencies = [ "hmac", "hyper", "ignore", + "imara-diff", "itertools", "native-tls", "octocrab", "parser", "postgres-native-tls", "postgres-types", + "pulldown-cmark-escape", "rand", "regex", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index 174b2a1c..3b96ee67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,9 @@ clap = { version = "4", features = ["derive"] } hmac = "0.12.1" subtle = "2.6.1" sha2 = "0.10.9" +imara-diff = "0.2.0" +pulldown-cmark-escape = "0.11.0" +axum-extra = { version = "0.10.1", default-features = false } [dependencies.serde] version = "1" diff --git a/src/gh_range_diff.rs b/src/gh_range_diff.rs new file mode 100644 index 00000000..62be6d11 --- /dev/null +++ b/src/gh_range_diff.rs @@ -0,0 +1,371 @@ +use std::collections::HashSet; +use std::fmt::{self, Write}; +use std::sync::{Arc, LazyLock}; + +use anyhow::Context as _; +use axum::{ + extract::{Path, State}, + http::HeaderValue, + response::IntoResponse, +}; +use axum_extra::extract::Host; +use hyper::header::CACHE_CONTROL; +use hyper::{ + HeaderMap, StatusCode, + header::{CONTENT_SECURITY_POLICY, CONTENT_TYPE}, +}; +use imara_diff::{ + Algorithm, Diff, InternedInput, Interner, Token, UnifiedDiffConfig, UnifiedDiffPrinter, +}; +use pulldown_cmark_escape::FmtWriter; +use regex::Regex; + +use crate::{github, handlers::Context, utils::AppError}; + +static MARKER_RE: LazyLock = + LazyLock::new(|| Regex::new(r"@@ -[\d]+,[\d]+ [+][\d]+,[\d]+ @@").unwrap()); + +/// Compute and renders an emulated `git range-diff` between two pushes (old and new). +/// +/// `basehead` is `OLDHEAD..NEWHEAD`, both `OLDHEAD` and `NEWHEAD` must be SHAs or branch names. +pub async fn gh_range_diff( + Path((owner, repo, basehead)): Path<(String, String, String)>, + State(ctx): State>, + Host(host): Host, +) -> axum::response::Result { + let Some((oldhead, newhead)) = basehead.split_once("..") else { + return Ok(( + StatusCode::BAD_REQUEST, + HeaderMap::new(), + format!("`{basehead}` is not in the form `base..head`"), + )); + }; + + // Configure unified diff + let config = UnifiedDiffConfig::default(); + + let repos = ctx + .team + .repos() + .await + .context("unable to retrieve team repos")?; + + // Verify that the request org is part of the Rust project + let Some(repos) = repos.repos.get(&owner) else { + return Ok(( + StatusCode::BAD_REQUEST, + HeaderMap::new(), + format!("organization `{owner}` is not part of the Rust Project team repos"), + )); + }; + + // Verify that the request repo is part of the Rust project + if !repos.iter().any(|r| r.name == repo) { + return Ok(( + StatusCode::BAD_REQUEST, + HeaderMap::new(), + format!("repository `{owner}` is not part of the Rust Project team repos"), + )); + } + + let issue_repo = github::IssueRepository { + organization: owner.to_string(), + repository: repo.to_string(), + }; + + // Determine the oldbase and get the comparison for the old diff + let old = async { + // We need to determine the oldbase (ie. the parent sha of all the commits of old). + // Fortunatly GitHub compare API returns the the merge base commit when comparing + // two different sha. + // + // Unformtunatly for us we don't know in which tree the parent is (could be master, beta, stable, ...) + // so for now we assume that the parent is in the default branch (that we hardcore for now to "master"). + // + // We therefore compare those the master and oldhead to get a guess of the oldbase. + // + // As an optimization we compare them in reverse to speed up things. The resulting + // patches won't be correct, but we only care about the merge base commit which + // is always correct no matter the order. + let oldbase = ctx + .github + .compare(&issue_repo, "master", oldhead) + .await + .context("failed to retrive the comparison between newhead and oldhead")? + .merge_base_commit + .sha; + + // Get the comparison between the oldbase..oldhead + let mut old = ctx + .github + .compare(&issue_repo, &oldbase, oldhead) + .await + .with_context(|| { + format!("failed to retrive the comparison between {oldbase} and {oldhead}") + })?; + + // Sort by filename, so it's consistent with GitHub UI + old.files + .sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename)); + + anyhow::Result::<_>::Ok((oldbase, old)) + }; + + // Determine the newbase and get the comparison for the new diff + let new = async { + // Get the newbase from comparing master and newhead. + // + // See the comment above on old for more details. + let newbase = ctx + .github + .compare(&issue_repo, "master", newhead) + .await + .context("failed to retrive the comparison between master and newhead")? + .merge_base_commit + .sha; + + // Get the comparison between the newbase..newhead + let mut new = ctx + .github + .compare(&issue_repo, &newbase, newhead) + .await + .with_context(|| { + format!("failed to retrive the comparison between {newbase} and {newhead}") + })?; + + // Sort by filename, so it's consistent with GitHub UI + new.files + .sort_unstable_by(|f1, f2| f1.filename.cmp(&f2.filename)); + + anyhow::Result::<_>::Ok((newbase, new)) + }; + + // Wait for both futures and early exit if there is an error + let ((oldbase, old), (newbase, new)) = futures::try_join!(old, new)?; + + // Create the HTML buffer with a very rough approximation for the capacity + let mut html: String = String::with_capacity(800 + old.files.len() * 100); + + // Compute the bookmarklet for the current host + let bookmarklet = bookmarklet(&host); + + // Write HTML header, style, ... + writeln!( + &mut html, + r#" + + + + + + range-diff of {oldbase}...{oldhead} {newbase}...{newhead} + + + +

range-diff of {oldbase}...{oldhead} {newbase}...{newhead}

+

Bookmarklet: range-diff 🛈 | {ADDED_BLOCK_SIGN} added {REMOVED_BLOCK_SIGN} removed

+"# + )?; + + let mut process_diffs = |filename, old_patch, new_patch| -> anyhow::Result<()> { + // Removes diff markers to avoid false-positives + let new_marker = format!("@@ {filename}:"); + let old_patch = MARKER_RE.replace_all(old_patch, &*new_marker); + let new_patch = MARKER_RE.replace_all(new_patch, &*new_marker); + + // Prepare input + let input: InternedInput<&str> = InternedInput::new(&*old_patch, &*new_patch); + + // Compute the diff + let mut diff = Diff::compute(Algorithm::Histogram, &input); + + // Run postprocessing to improve hunk boundaries + diff.postprocess_lines(&input); + + // Determine if there are any differences + let has_hunks = diff.hunks().next().is_some(); + + if has_hunks { + let printer = HtmlDiffPrinter(&input.interner); + let diff = diff.unified_diff(&printer, config.clone(), &input); + + let before_href = + format_args!("https://github.com/{owner}/{repo}/blob/{oldhead}/{filename}"); + let after_href = + format_args!("https://github.com/{owner}/{repo}/blob/{newhead}/{filename}"); + + writeln!( + html, + r#"
{filename} before after
{diff}
"# + )?; + } + Ok(()) + }; + + let mut seen_files = HashSet::<&str>::new(); + + // Process the old files + for old_file in &old.files { + let filename = &*old_file.filename; + + let new_file_patch = new + .files + .iter() + .find(|f| f.filename == filename) + .map(|f| &*f.patch) + .unwrap_or_default(); + + seen_files.insert(filename); + + process_diffs(filename, &*old_file.patch, new_file_patch)?; + } + + // Process the not yet seen new files + for new_file in &new.files { + let filename = &*new_file.filename; + + if seen_files.contains(filename) { + continue; + } + + process_diffs(filename, "", &*new_file.patch)?; + } + + writeln!( + html, + r#" + + + "# + )?; + + let mut headers = HeaderMap::new(); + headers.insert( + CONTENT_TYPE, + HeaderValue::from_static("text/html; charset=utf-8"), + ); + headers.insert( + CACHE_CONTROL, + HeaderValue::from_static("public, max-age=15552000, immutable"), + ); + headers.insert( + CONTENT_SECURITY_POLICY, + HeaderValue::from_static( + "default-src 'none'; style-src 'unsafe-inline'; img-src www.rust-lang.org", + ), + ); + + Ok((StatusCode::OK, headers, html)) +} + +const REMOVED_BLOCK_SIGN: &str = r#"-"#; +const ADDED_BLOCK_SIGN: &str = r#"+"#; + +struct HtmlDiffPrinter<'a>(pub &'a Interner<&'a str>); + +impl HtmlDiffPrinter<'_> { + fn handle_hunk_token(&self, mut f: impl fmt::Write, color: &str, token: &str) -> fmt::Result { + // Highlight the whole the line only if it has changes it-self, otherwise + // only highlight the `+`, `-` to avoid distracting users with context + // changes. + if token.starts_with('+') || token.starts_with('-') { + write!(f, r#""#)?; + pulldown_cmark_escape::escape_html(FmtWriter(&mut f), token)?; + write!(f, "")?; + } else { + pulldown_cmark_escape::escape_html(FmtWriter(&mut f), token)?; + } + Ok(()) + } +} + +impl UnifiedDiffPrinter for HtmlDiffPrinter<'_> { + fn display_header( + &self, + _f: impl fmt::Write, + _start_before: u32, + _start_after: u32, + _len_before: u32, + _len_after: u32, + ) -> fmt::Result { + // ignore the header as does not represent anything meaningful for the range-diff + Ok(()) + } + + fn display_context_token(&self, mut f: impl fmt::Write, token: Token) -> fmt::Result { + let token = self.0[token]; + write!(f, " ")?; + pulldown_cmark_escape::escape_html(FmtWriter(&mut f), token)?; + if !token.ends_with('\n') { + writeln!(f)?; + } + Ok(()) + } + + fn display_hunk( + &self, + mut f: impl fmt::Write, + before: &[Token], + after: &[Token], + ) -> fmt::Result { + if let Some(&last) = before.last() { + for &token in before { + let token = self.0[token]; + write!(f, "{REMOVED_BLOCK_SIGN}")?; + self.handle_hunk_token(&mut f, "red", token)?; + } + if !self.0[last].ends_with('\n') { + writeln!(f)?; + } + } + + if let Some(&last) = after.last() { + for &token in after { + let token = self.0[token]; + write!(f, "{ADDED_BLOCK_SIGN}")?; + self.handle_hunk_token(&mut f, "green", token)?; + } + if !self.0[last].ends_with('\n') { + writeln!(f)?; + } + } + Ok(()) + } +} + +// Create the javascript bookmarklet based on the host +fn bookmarklet(host: &str) -> String { + let protocol = if host.starts_with("localhost:") { + "http" + } else { + "https" + }; + + format!( + r"javascript:(() => {{ + const githubUrlPattern = /^https:\/\/github\.com\/([^\/]+)\/([^\/]+)\/compare\/([^\/]+[.]{{2}}[^\/]+)$/; + const match = window.location.href.match(githubUrlPattern); + if (!match) {{alert('Invalid GitHub Compare URL format.\nExpected: https://github.com/ORG_NAME/REPO_NAME/compare/BASESHA..HEADSHA'); return;}} + const [, orgName, repoName, basehead] = match; window.location = `{protocol}://{host}/gh-range-diff/${{orgName}}/${{repoName}}/${{basehead}}`; +}})();" + ) +} diff --git a/src/github.rs b/src/github.rs index 26480fe9..e64f9127 100644 --- a/src/github.rs +++ b/src/github.rs @@ -258,6 +258,18 @@ impl GithubClient { .await .context("failed to retrive git trees") } + + pub async fn compare( + &self, + repo: &IssueRepository, + before: &str, + after: &str, + ) -> anyhow::Result { + let url = format!("{}/compare/{before}...{after}", repo.url(&self)); + self.json(self.get(&url)) + .await + .context("failed to retrive the compare") + } } #[derive(Debug, serde::Serialize)] diff --git a/src/lib.rs b/src/lib.rs index aba95801..9583dcbb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ pub mod bors; mod changelogs; mod config; pub mod db; +pub mod gh_range_diff; pub mod gha_logs; pub mod github; pub mod handlers; diff --git a/src/main.rs b/src/main.rs index 47ad8257..232b4575 100644 --- a/src/main.rs +++ b/src/main.rs @@ -181,6 +181,10 @@ async fn run_server(addr: SocketAddr) -> anyhow::Result<()> { "/gha-logs/{owner}/{repo}/{log-id}", get(triagebot::gha_logs::gha_logs), ) + .route( + "/gh-range-diff/{owner}/{repo}/{basehead}", + get(triagebot::gh_range_diff::gh_range_diff), + ) .nest("/agenda", agenda) .route("/bors-commit-list", get(triagebot::bors::bors_commit_list)) .route(