Skip to content

Commit 2f00fe7

Browse files
authored
Merge pull request #2124 from Urgau/gha_logs-misc
Misc changes around our GHA logs viewers
2 parents d2e2064 + c9d45b5 commit 2f00fe7

File tree

2 files changed

+66
-15
lines changed

2 files changed

+66
-15
lines changed

src/gha_logs.rs

Lines changed: 65 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ use crate::handlers::Context;
33
use anyhow::Context as _;
44
use hyper::header::{CACHE_CONTROL, CONTENT_SECURITY_POLICY, CONTENT_TYPE};
55
use hyper::{Body, Response, StatusCode};
6-
use itertools::Itertools;
76
use std::collections::VecDeque;
87
use std::str::FromStr;
98
use std::sync::Arc;
@@ -100,11 +99,15 @@ async fn process_logs(
10099
.context("unable to retrieve team repos")?;
101100

102101
let Some(repos) = repos.repos.get(owner) else {
103-
anyhow::bail!("Organization `{owner}` is not part of team repos")
102+
return Ok(bad_request(format!(
103+
"organization `{owner}` is not part of the Rust Project team repos"
104+
)));
104105
};
105106

106107
if !repos.iter().any(|r| r.name == repo) {
107-
anyhow::bail!("Repository `{repo}` is not part of team repos");
108+
return Ok(bad_request(format!(
109+
"repository `{owner}` is not part of the Rust Project team repos"
110+
)));
108111
}
109112

110113
let log_uuid = format!("{owner}/{repo}${log_id}");
@@ -132,22 +135,57 @@ async fn process_logs(
132135
.workflow_run_job(&repo, log_id)
133136
.await
134137
.context("unable to fetch job details")?;
135-
let trees = ctx
138+
139+
// To minimize false positives in paths linked to the GitHub repositories, we
140+
// restrict matching to only the second-level directories of the repository.
141+
//
142+
// We achieve this by retrieving the contents of the root repository and then
143+
// retrive the content of the top-level directory which we then serialize for
144+
// the JS so they can be escaped and concatenated into a regex OR pattern
145+
// (e.g., `compiler/rustc_ast|tests/ui|src/version`) which is used in the JS regex.
146+
let mut root_trees = ctx
136147
.github
137148
.repo_git_trees(&repo, &job.head_sha)
138149
.await
139150
.context("unable to fetch git tree for the repository")?;
140151

141-
// To minimize false positives in paths linked to the GitHub repositories,
142-
// we restrict matching to only the top-level directories of the repository.
143-
// We achieve this by retrieving all "tree" objects and concatenating them
144-
// into a regex OR pattern (e.g., `compiler|tests|src`) which is used in the
145-
// JS regex.
146-
let tree_roots = trees
152+
// Prune every entry that isn't a tree (aka directory)
153+
root_trees.tree.retain(|t| t.object_type == "tree");
154+
155+
// Retrive all the sub-directories trees (for rust-lang/rust it's 6 API calls)
156+
let roots_trees: Vec<_> = root_trees
157+
.tree
158+
.iter()
159+
.map(|t| async { ctx.github.repo_git_trees(&repo, &t.sha).await })
160+
.collect();
161+
162+
// Join all futures and fail fast if one of them returns an error
163+
let roots_trees = futures::future::try_join_all(roots_trees)
164+
.await
165+
.context("unable to fetch content details")?;
166+
167+
// Collect and fix-up all the paths to directories and files (avoid submodules)
168+
let mut tree_roots: Vec<_> = root_trees
147169
.tree
148170
.iter()
149-
.filter_map(|t| (t.object_type == "tree").then_some(&t.path))
150-
.join("|");
171+
.zip(&roots_trees)
172+
.map(|(root, childs)| {
173+
childs
174+
.tree
175+
.iter()
176+
.filter(|t| t.object_type == "tree" || t.object_type == "blob")
177+
.map(|t| format!("{}/{}", root.path, t.path))
178+
})
179+
.flatten()
180+
.collect();
181+
182+
// We need to sort the tree roots by descending order, otherwise `library/std` will
183+
// be matched before `library/stdarch`
184+
tree_roots.sort_by(|a, b| b.cmp(a));
185+
186+
// Serialize to a JS(ON) array so we can escape them in the browser
187+
let tree_roots =
188+
serde_json::to_string(&tree_roots).context("unable to serialize the tree roots")?;
151189

152190
anyhow::Result::<_>::Ok((job, tree_roots))
153191
};
@@ -233,6 +271,7 @@ async fn process_logs(
233271
import {{ AnsiUp }} from '{ANSI_UP_URL}'
234272
235273
var logs = {logs};
274+
var tree_roots = {tree_roots};
236275
var ansi_up = new AnsiUp();
237276
238277
// 1. Tranform the ANSI escape codes to HTML
@@ -264,7 +303,7 @@ async fn process_logs(
264303
// Detailed examples of what the regex does is at https://regex101.com/r/vCnx9Y/2
265304
//
266305
// But simply speaking the regex tries to find absolute (with `/checkout` prefix) and
267-
// relative paths, the path must start with one of the repository top-level directory.
306+
// relative paths, the path must start with one of the repository level-2 directories.
268307
// We also try to retrieve the lines and cols if given (`<path>:line:col`).
269308
//
270309
// Some examples of paths we want to find:
@@ -273,7 +312,12 @@ async fn process_logs(
273312
// - /checkout/src/doc/rustdoc/src/advanced-features.md
274313
//
275314
// Any other paths, in particular if prefixed by `./` or `obj/` should not taken.
276-
const pathRegex = /(?<boundary>[^a-zA-Z0-9.\\/])(?<inner>(?:[\\\/]?(?:checkout[\\\/])?(?<path>(?:{tree_roots})[\\\/][a-zA-Z0-9_$\-.\\\/]+))(?::(?<line>[0-9]+):(?<col>[0-9]+))?)/g;
315+
const pathRegex = new RegExp(
316+
"(?<boundary>[^a-zA-Z0-9.\\/])(?<inner>(?:[\\\/]?(?:checkout[\\\/])?(?<path>(?:"
317+
+ tree_roots.map(p => RegExp.escape(p)).join("|") +
318+
")(?:[\\\/][a-zA-Z0-9_$\\\-.\\\/]+)?))(?::(?<line>[0-9]+):(?<col>[0-9]+))?)",
319+
"g"
320+
);
277321
html = html.replace(pathRegex, (match, boundary, inner, path, line, col) => {{
278322
const pos = (line !== undefined) ? `#L${{line}}` : "";
279323
return `${{boundary}}<a href="https://github.com/{owner}/{repo}/blob/{sha}/${{path}}${{pos}}" class="path-marker">${{inner}}</a>`;
@@ -345,3 +389,10 @@ pub fn failure_svg() -> anyhow::Result<Response<Body>, hyper::Error> {
345389
.body(Body::from(FAILURE_SVG))
346390
.unwrap())
347391
}
392+
393+
fn bad_request(body: String) -> Response<Body> {
394+
Response::builder()
395+
.status(StatusCode::BAD_REQUEST)
396+
.body(Body::from(body))
397+
.unwrap()
398+
}

src/gha_logs/failure.svg

Lines changed: 1 addition & 1 deletion
Loading

0 commit comments

Comments
 (0)