Skip to content

Commit 456030e

Browse files
authored
Merge pull request #2120 from Urgau/gha_logs-paths
Add anchor around repository paths in our GHA logs viewer
2 parents 51616b1 + e29fa01 commit 456030e

File tree

2 files changed

+135
-29
lines changed

2 files changed

+135
-29
lines changed

src/gha_logs.rs

Lines changed: 102 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
use crate::github;
1+
use crate::github::{self, WorkflowRunJob};
22
use crate::handlers::Context;
33
use anyhow::Context as _;
44
use hyper::header::{CACHE_CONTROL, CONTENT_SECURITY_POLICY, CONTENT_TYPE};
55
use hyper::{Body, Response, StatusCode};
6+
use itertools::Itertools;
67
use std::collections::VecDeque;
78
use std::str::FromStr;
89
use std::sync::Arc;
@@ -14,11 +15,17 @@ const MAX_CACHE_CAPACITY_BYTES: u64 = 50 * 1024 * 1024; // 50 Mb
1415
#[derive(Default)]
1516
pub struct GitHubActionLogsCache {
1617
capacity: u64,
17-
entries: VecDeque<(String, Arc<String>)>,
18+
entries: VecDeque<(String, Arc<CachedLog>)>,
19+
}
20+
21+
pub struct CachedLog {
22+
job: WorkflowRunJob,
23+
tree_roots: String,
24+
logs: String,
1825
}
1926

2027
impl GitHubActionLogsCache {
21-
pub fn get(&mut self, key: &String) -> Option<Arc<String>> {
28+
pub fn get(&mut self, key: &String) -> Option<Arc<CachedLog>> {
2229
if let Some(pos) = self.entries.iter().position(|(k, _)| k == key) {
2330
// Move previously cached entry to the front
2431
let entry = self.entries.remove(pos).unwrap();
@@ -29,26 +36,26 @@ impl GitHubActionLogsCache {
2936
}
3037
}
3138

32-
pub fn put(&mut self, key: String, value: Arc<String>) -> Arc<String> {
33-
if value.len() as u64 > MAX_CACHE_CAPACITY_BYTES {
39+
pub fn put(&mut self, key: String, value: Arc<CachedLog>) -> Arc<CachedLog> {
40+
if value.logs.len() as u64 > MAX_CACHE_CAPACITY_BYTES {
3441
// Entry is too large, don't cache, return as is
3542
return value;
3643
}
3744

3845
// Remove duplicate or last entry when necessary
3946
let removed = if let Some(pos) = self.entries.iter().position(|(k, _)| k == &key) {
4047
self.entries.remove(pos)
41-
} else if self.capacity + value.len() as u64 >= MAX_CACHE_CAPACITY_BYTES {
48+
} else if self.capacity + value.logs.len() as u64 >= MAX_CACHE_CAPACITY_BYTES {
4249
self.entries.pop_back()
4350
} else {
4451
None
4552
};
4653
if let Some(removed) = removed {
47-
self.capacity -= removed.1.len() as u64;
54+
self.capacity -= removed.1.logs.len() as u64;
4855
}
4956

5057
// Add entry the front of the list ane return it
51-
self.capacity += value.len() as u64;
58+
self.capacity += value.logs.len() as u64;
5259
self.entries.push_front((key, value.clone()));
5360
value
5461
}
@@ -99,34 +106,78 @@ async fn process_logs(
99106

100107
let log_uuid = format!("{owner}/{repo}${log_id}");
101108

102-
let logs = 'logs: {
109+
let CachedLog {
110+
job,
111+
tree_roots,
112+
logs,
113+
} = &*'logs: {
103114
if let Some(logs) = ctx.gha_logs.write().await.get(&log_uuid) {
104115
tracing::info!("gha_logs: cache hit for {log_uuid}");
105116
break 'logs logs;
106117
}
107118

108119
tracing::info!("gha_logs: cache miss for {log_uuid}");
109-
let logs = ctx
110-
.github
111-
.raw_job_logs(
112-
&github::IssueRepository {
113-
organization: owner.to_string(),
114-
repository: repo.to_string(),
115-
},
116-
log_id,
117-
)
118-
.await
119-
.context("unable to get the raw logs")?;
120-
121-
let json_logs = serde_json::to_string(&*logs).context("unable to JSON-ify the raw logs")?;
122-
123-
ctx.gha_logs
124-
.write()
125-
.await
126-
.put(log_uuid.clone(), json_logs.into())
120+
121+
let repo = github::IssueRepository {
122+
organization: owner.to_string(),
123+
repository: repo.to_string(),
124+
};
125+
126+
let job_and_tree_roots = async {
127+
let job = ctx
128+
.github
129+
.workflow_run_job(&repo, log_id)
130+
.await
131+
.context("unable to fetch job details")?;
132+
let trees = ctx
133+
.github
134+
.repo_git_trees(&repo, &job.head_sha)
135+
.await
136+
.context("unable to fetch git tree for the repository")?;
137+
138+
// To minimize false positives in paths linked to the GitHub repositories,
139+
// we restrict matching to only the top-level directories of the repository.
140+
// We achieve this by retrieving all "tree" objects and concatenating them
141+
// into a regex OR pattern (e.g., `compiler|tests|src`) which is used in the
142+
// JS regex.
143+
let tree_roots = trees
144+
.tree
145+
.iter()
146+
.filter_map(|t| (t.object_type == "tree").then_some(&t.path))
147+
.join("|");
148+
149+
anyhow::Result::<_>::Ok((job, tree_roots))
150+
};
151+
152+
let logs = async {
153+
let logs = ctx
154+
.github
155+
.raw_job_logs(&repo, log_id)
156+
.await
157+
.context("unable to get the raw logs")?;
158+
159+
let json_logs =
160+
serde_json::to_string(&*logs).context("unable to JSON-ify the raw logs")?;
161+
162+
anyhow::Result::<_>::Ok(json_logs)
163+
};
164+
165+
let (job_and_tree_roots, logs) = futures::join!(job_and_tree_roots, logs);
166+
let ((job, tree_roots), logs) = (job_and_tree_roots?, logs?);
167+
168+
ctx.gha_logs.write().await.put(
169+
log_uuid.clone(),
170+
CachedLog {
171+
job,
172+
tree_roots,
173+
logs,
174+
}
175+
.into(),
176+
)
127177
};
128178

129179
let nonce = Uuid::new_v4().to_hyphenated().to_string();
180+
let sha = &*job.head_sha;
130181

131182
let html = format!(
132183
r###"<!DOCTYPE html>
@@ -157,6 +208,9 @@ async fn process_logs(
157208
.warning-marker {{
158209
color: #c69026;
159210
}}
211+
.path-marker {{
212+
color: #26c6a8;
213+
}}
160214
</style>
161215
<script type="module" nonce="{nonce}">
162216
import {{ AnsiUp }} from '{ANSI_UP_URL}'
@@ -189,11 +243,30 @@ async fn process_logs(
189243
`<span class="warning-marker">##[warning]</span>`
190244
);
191245
192-
// 5. Add the html to the DOM
246+
// 5. Add anchors around some paths
247+
// Detailed examples of what the regex does is at https://regex101.com/r/vCnx9Y/2
248+
//
249+
// But simply speaking the regex tries to find absolute (with `/checkout` prefix) and
250+
// relative paths, the path must start with one of the repository top-level directory.
251+
// We also try to retrieve the lines and cols if given (`<path>:line:col`).
252+
//
253+
// Some examples of paths we want to find:
254+
// - src/tools/test-float-parse/src/traits.rs:173:11
255+
// - /checkout/compiler/rustc_macros
256+
// - /checkout/src/doc/rustdoc/src/advanced-features.md
257+
//
258+
// Any other paths, in particular if prefixed by `./` or `obj/` should not taken.
259+
const pathRegex = /(?<boundary>[^a-zA-Z0-9.\\/])(?<inner>(?:[\\\/]?(?:checkout[\\\/])?(?<path>(?:{tree_roots})[\\\/][a-zA-Z0-9_$\-.\\\/]+))(?::(?<line>[0-9]+):(?<col>[0-9]+))?)/g;
260+
html = html.replace(pathRegex, (match, boundary, inner, path, line, col) => {{
261+
const pos = (line !== undefined) ? `#L${{line}}` : "";
262+
return `${{boundary}}<a href="https://github.com/{owner}/{repo}/blob/{sha}/${{path}}${{pos}}" class="path-marker">${{inner}}</a>`;
263+
}});
264+
265+
// 6. Add the html to the DOM
193266
var cdiv = document.getElementById("console");
194267
cdiv.innerHTML = html;
195268
196-
// 6. If no anchor is given, scroll to the last error
269+
// 7. If no anchor is given, scroll to the last error
197270
if (location.hash === "" && errorCounter >= 0) {{
198271
const hasSmallViewport = window.innerWidth <= 750;
199272
document.getElementById(`error-${{errorCounter}}`).scrollIntoView({{

src/github.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,28 @@ impl GithubClient {
232232
.context("failed to retrieve job logs")?;
233233
Ok(String::from_utf8_lossy(&body).to_string())
234234
}
235+
236+
pub async fn workflow_run_job(
237+
&self,
238+
repo: &IssueRepository,
239+
job_id: u128,
240+
) -> anyhow::Result<WorkflowRunJob> {
241+
let url = format!("{}/actions/jobs/{job_id}", repo.url(&self));
242+
self.json(self.get(&url))
243+
.await
244+
.context("failed to retrive workflow job run details")
245+
}
246+
247+
pub async fn repo_git_trees(
248+
&self,
249+
repo: &IssueRepository,
250+
sha: &str,
251+
) -> anyhow::Result<GitTrees> {
252+
let url = format!("{}/git/trees/{sha}", repo.url(&self));
253+
self.json(self.get(&url))
254+
.await
255+
.context("failed to retrive git trees")
256+
}
235257
}
236258

237259
#[derive(Debug, serde::Serialize)]
@@ -1240,6 +1262,11 @@ impl IssuesEvent {
12401262
#[derive(Debug, serde::Deserialize)]
12411263
struct PullRequestEventFields {}
12421264

1265+
#[derive(Debug, serde::Deserialize)]
1266+
pub struct WorkflowRunJob {
1267+
pub head_sha: String,
1268+
}
1269+
12431270
#[derive(Clone, Debug, serde::Deserialize)]
12441271
pub struct CommitBase {
12451272
pub sha: String,
@@ -2829,6 +2856,12 @@ pub struct GitTreeObject {
28292856
pub sha: String,
28302857
}
28312858

2859+
#[derive(Debug, serde::Deserialize)]
2860+
pub struct GitTrees {
2861+
pub sha: String,
2862+
pub tree: Vec<GitTreeEntry>,
2863+
}
2864+
28322865
#[derive(Debug, serde::Serialize, serde::Deserialize)]
28332866
pub struct GitTreeEntry {
28342867
pub path: String,

0 commit comments

Comments
 (0)