1
- use crate :: github;
1
+ use crate :: github:: { self , WorkflowRunJob } ;
2
2
use crate :: handlers:: Context ;
3
3
use anyhow:: Context as _;
4
4
use hyper:: header:: { CACHE_CONTROL , CONTENT_SECURITY_POLICY , CONTENT_TYPE } ;
5
5
use hyper:: { Body , Response , StatusCode } ;
6
+ use itertools:: Itertools ;
6
7
use std:: collections:: VecDeque ;
7
8
use std:: str:: FromStr ;
8
9
use std:: sync:: Arc ;
@@ -14,11 +15,17 @@ const MAX_CACHE_CAPACITY_BYTES: u64 = 50 * 1024 * 1024; // 50 Mb
14
15
#[ derive( Default ) ]
15
16
pub struct GitHubActionLogsCache {
16
17
capacity : u64 ,
17
- entries : VecDeque < ( String , Arc < String > ) > ,
18
+ entries : VecDeque < ( String , Arc < CachedLog > ) > ,
19
+ }
20
+
21
+ pub struct CachedLog {
22
+ job : WorkflowRunJob ,
23
+ tree_roots : String ,
24
+ logs : String ,
18
25
}
19
26
20
27
impl GitHubActionLogsCache {
21
- pub fn get ( & mut self , key : & String ) -> Option < Arc < String > > {
28
+ pub fn get ( & mut self , key : & String ) -> Option < Arc < CachedLog > > {
22
29
if let Some ( pos) = self . entries . iter ( ) . position ( |( k, _) | k == key) {
23
30
// Move previously cached entry to the front
24
31
let entry = self . entries . remove ( pos) . unwrap ( ) ;
@@ -29,26 +36,26 @@ impl GitHubActionLogsCache {
29
36
}
30
37
}
31
38
32
- pub fn put ( & mut self , key : String , value : Arc < String > ) -> Arc < String > {
33
- if value. len ( ) as u64 > MAX_CACHE_CAPACITY_BYTES {
39
+ pub fn put ( & mut self , key : String , value : Arc < CachedLog > ) -> Arc < CachedLog > {
40
+ if value. logs . len ( ) as u64 > MAX_CACHE_CAPACITY_BYTES {
34
41
// Entry is too large, don't cache, return as is
35
42
return value;
36
43
}
37
44
38
45
// Remove duplicate or last entry when necessary
39
46
let removed = if let Some ( pos) = self . entries . iter ( ) . position ( |( k, _) | k == & key) {
40
47
self . entries . remove ( pos)
41
- } else if self . capacity + value. len ( ) as u64 >= MAX_CACHE_CAPACITY_BYTES {
48
+ } else if self . capacity + value. logs . len ( ) as u64 >= MAX_CACHE_CAPACITY_BYTES {
42
49
self . entries . pop_back ( )
43
50
} else {
44
51
None
45
52
} ;
46
53
if let Some ( removed) = removed {
47
- self . capacity -= removed. 1 . len ( ) as u64 ;
54
+ self . capacity -= removed. 1 . logs . len ( ) as u64 ;
48
55
}
49
56
50
57
// Add entry the front of the list ane return it
51
- self . capacity += value. len ( ) as u64 ;
58
+ self . capacity += value. logs . len ( ) as u64 ;
52
59
self . entries . push_front ( ( key, value. clone ( ) ) ) ;
53
60
value
54
61
}
@@ -99,34 +106,78 @@ async fn process_logs(
99
106
100
107
let log_uuid = format ! ( "{owner}/{repo}${log_id}" ) ;
101
108
102
- let logs = ' logs: {
109
+ let CachedLog {
110
+ job,
111
+ tree_roots,
112
+ logs,
113
+ } = & * ' logs: {
103
114
if let Some ( logs) = ctx. gha_logs . write ( ) . await . get ( & log_uuid) {
104
115
tracing:: info!( "gha_logs: cache hit for {log_uuid}" ) ;
105
116
break ' logs logs;
106
117
}
107
118
108
119
tracing:: info!( "gha_logs: cache miss for {log_uuid}" ) ;
109
- let logs = ctx
110
- . github
111
- . raw_job_logs (
112
- & github:: IssueRepository {
113
- organization : owner. to_string ( ) ,
114
- repository : repo. to_string ( ) ,
115
- } ,
116
- log_id,
117
- )
118
- . await
119
- . context ( "unable to get the raw logs" ) ?;
120
-
121
- let json_logs = serde_json:: to_string ( & * logs) . context ( "unable to JSON-ify the raw logs" ) ?;
122
-
123
- ctx. gha_logs
124
- . write ( )
125
- . await
126
- . put ( log_uuid. clone ( ) , json_logs. into ( ) )
120
+
121
+ let repo = github:: IssueRepository {
122
+ organization : owner. to_string ( ) ,
123
+ repository : repo. to_string ( ) ,
124
+ } ;
125
+
126
+ let job_and_tree_roots = async {
127
+ let job = ctx
128
+ . github
129
+ . workflow_run_job ( & repo, log_id)
130
+ . await
131
+ . context ( "unable to fetch job details" ) ?;
132
+ let trees = ctx
133
+ . github
134
+ . repo_git_trees ( & repo, & job. head_sha )
135
+ . await
136
+ . context ( "unable to fetch git tree for the repository" ) ?;
137
+
138
+ // To minimize false positives in paths linked to the GitHub repositories,
139
+ // we restrict matching to only the top-level directories of the repository.
140
+ // We achieve this by retrieving all "tree" objects and concatenating them
141
+ // into a regex OR pattern (e.g., `compiler|tests|src`) which is used in the
142
+ // JS regex.
143
+ let tree_roots = trees
144
+ . tree
145
+ . iter ( )
146
+ . filter_map ( |t| ( t. object_type == "tree" ) . then_some ( & t. path ) )
147
+ . join ( "|" ) ;
148
+
149
+ anyhow:: Result :: < _ > :: Ok ( ( job, tree_roots) )
150
+ } ;
151
+
152
+ let logs = async {
153
+ let logs = ctx
154
+ . github
155
+ . raw_job_logs ( & repo, log_id)
156
+ . await
157
+ . context ( "unable to get the raw logs" ) ?;
158
+
159
+ let json_logs =
160
+ serde_json:: to_string ( & * logs) . context ( "unable to JSON-ify the raw logs" ) ?;
161
+
162
+ anyhow:: Result :: < _ > :: Ok ( json_logs)
163
+ } ;
164
+
165
+ let ( job_and_tree_roots, logs) = futures:: join!( job_and_tree_roots, logs) ;
166
+ let ( ( job, tree_roots) , logs) = ( job_and_tree_roots?, logs?) ;
167
+
168
+ ctx. gha_logs . write ( ) . await . put (
169
+ log_uuid. clone ( ) ,
170
+ CachedLog {
171
+ job,
172
+ tree_roots,
173
+ logs,
174
+ }
175
+ . into ( ) ,
176
+ )
127
177
} ;
128
178
129
179
let nonce = Uuid :: new_v4 ( ) . to_hyphenated ( ) . to_string ( ) ;
180
+ let sha = & * job. head_sha ;
130
181
131
182
let html = format ! (
132
183
r###"<!DOCTYPE html>
@@ -157,6 +208,9 @@ async fn process_logs(
157
208
.warning-marker {{
158
209
color: #c69026;
159
210
}}
211
+ .path-marker {{
212
+ color: #26c6a8;
213
+ }}
160
214
</style>
161
215
<script type="module" nonce="{nonce}">
162
216
import {{ AnsiUp }} from '{ANSI_UP_URL}'
@@ -189,11 +243,30 @@ async fn process_logs(
189
243
`<span class="warning-marker">##[warning]</span>`
190
244
);
191
245
192
- // 5. Add the html to the DOM
246
+ // 5. Add anchors around some paths
247
+ // Detailed examples of what the regex does is at https://regex101.com/r/vCnx9Y/2
248
+ //
249
+ // But simply speaking the regex tries to find absolute (with `/checkout` prefix) and
250
+ // relative paths, the path must start with one of the repository top-level directory.
251
+ // We also try to retrieve the lines and cols if given (`<path>:line:col`).
252
+ //
253
+ // Some examples of paths we want to find:
254
+ // - src/tools/test-float-parse/src/traits.rs:173:11
255
+ // - /checkout/compiler/rustc_macros
256
+ // - /checkout/src/doc/rustdoc/src/advanced-features.md
257
+ //
258
+ // Any other paths, in particular if prefixed by `./` or `obj/` should not taken.
259
+ const pathRegex = /(?<boundary>[^a-zA-Z0-9.\\/])(?<inner>(?:[\\\/]?(?:checkout[\\\/])?(?<path>(?:{tree_roots})[\\\/][a-zA-Z0-9_$\-.\\\/]+))(?::(?<line>[0-9]+):(?<col>[0-9]+))?)/g;
260
+ html = html.replace(pathRegex, (match, boundary, inner, path, line, col) => {{
261
+ const pos = (line !== undefined) ? `#L${{line}}` : "";
262
+ return `${{boundary}}<a href="https://github.com/{owner}/{repo}/blob/{sha}/${{path}}${{pos}}" class="path-marker">${{inner}}</a>`;
263
+ }});
264
+
265
+ // 6. Add the html to the DOM
193
266
var cdiv = document.getElementById("console");
194
267
cdiv.innerHTML = html;
195
268
196
- // 6 . If no anchor is given, scroll to the last error
269
+ // 7 . If no anchor is given, scroll to the last error
197
270
if (location.hash === "" && errorCounter >= 0) {{
198
271
const hasSmallViewport = window.innerWidth <= 750;
199
272
document.getElementById(`error-${{errorCounter}}`).scrollIntoView({{
0 commit comments