@@ -3,7 +3,6 @@ use crate::handlers::Context;
3
3
use anyhow:: Context as _;
4
4
use hyper:: header:: { CACHE_CONTROL , CONTENT_SECURITY_POLICY , CONTENT_TYPE } ;
5
5
use hyper:: { Body , Response , StatusCode } ;
6
- use itertools:: Itertools ;
7
6
use std:: collections:: VecDeque ;
8
7
use std:: str:: FromStr ;
9
8
use std:: sync:: Arc ;
@@ -100,11 +99,15 @@ async fn process_logs(
100
99
. context ( "unable to retrieve team repos" ) ?;
101
100
102
101
let Some ( repos) = repos. repos . get ( owner) else {
103
- anyhow:: bail!( "Organization `{owner}` is not part of team repos" )
102
+ return Ok ( bad_request ( format ! (
103
+ "organization `{owner}` is not part of the Rust Project team repos"
104
+ ) ) ) ;
104
105
} ;
105
106
106
107
if !repos. iter ( ) . any ( |r| r. name == repo) {
107
- anyhow:: bail!( "Repository `{repo}` is not part of team repos" ) ;
108
+ return Ok ( bad_request ( format ! (
109
+ "repository `{owner}` is not part of the Rust Project team repos"
110
+ ) ) ) ;
108
111
}
109
112
110
113
let log_uuid = format ! ( "{owner}/{repo}${log_id}" ) ;
@@ -132,22 +135,57 @@ async fn process_logs(
132
135
. workflow_run_job ( & repo, log_id)
133
136
. await
134
137
. context ( "unable to fetch job details" ) ?;
135
- let trees = ctx
138
+
139
+ // To minimize false positives in paths linked to the GitHub repositories, we
140
+ // restrict matching to only the second-level directories of the repository.
141
+ //
142
+ // We achieve this by retrieving the contents of the root repository and then
143
+ // retrive the content of the top-level directory which we then serialize for
144
+ // the JS so they can be escaped and concatenated into a regex OR pattern
145
+ // (e.g., `compiler/rustc_ast|tests/ui|src/version`) which is used in the JS regex.
146
+ let mut root_trees = ctx
136
147
. github
137
148
. repo_git_trees ( & repo, & job. head_sha )
138
149
. await
139
150
. context ( "unable to fetch git tree for the repository" ) ?;
140
151
141
- // To minimize false positives in paths linked to the GitHub repositories,
142
- // we restrict matching to only the top-level directories of the repository.
143
- // We achieve this by retrieving all "tree" objects and concatenating them
144
- // into a regex OR pattern (e.g., `compiler|tests|src`) which is used in the
145
- // JS regex.
146
- let tree_roots = trees
152
+ // Prune every entry that isn't a tree (aka directory)
153
+ root_trees. tree . retain ( |t| t. object_type == "tree" ) ;
154
+
155
+ // Retrive all the sub-directories trees (for rust-lang/rust it's 6 API calls)
156
+ let roots_trees: Vec < _ > = root_trees
157
+ . tree
158
+ . iter ( )
159
+ . map ( |t| async { ctx. github . repo_git_trees ( & repo, & t. sha ) . await } )
160
+ . collect ( ) ;
161
+
162
+ // Join all futures and fail fast if one of them returns an error
163
+ let roots_trees = futures:: future:: try_join_all ( roots_trees)
164
+ . await
165
+ . context ( "unable to fetch content details" ) ?;
166
+
167
+ // Collect and fix-up all the paths to directories and files (avoid submodules)
168
+ let mut tree_roots: Vec < _ > = root_trees
147
169
. tree
148
170
. iter ( )
149
- . filter_map ( |t| ( t. object_type == "tree" ) . then_some ( & t. path ) )
150
- . join ( "|" ) ;
171
+ . zip ( & roots_trees)
172
+ . map ( |( root, childs) | {
173
+ childs
174
+ . tree
175
+ . iter ( )
176
+ . filter ( |t| t. object_type == "tree" || t. object_type == "blob" )
177
+ . map ( |t| format ! ( "{}/{}" , root. path, t. path) )
178
+ } )
179
+ . flatten ( )
180
+ . collect ( ) ;
181
+
182
+ // We need to sort the tree roots by descending order, otherwise `library/std` will
183
+ // be matched before `library/stdarch`
184
+ tree_roots. sort_by ( |a, b| b. cmp ( a) ) ;
185
+
186
+ // Serialize to a JS(ON) array so we can escape them in the browser
187
+ let tree_roots =
188
+ serde_json:: to_string ( & tree_roots) . context ( "unable to serialize the tree roots" ) ?;
151
189
152
190
anyhow:: Result :: < _ > :: Ok ( ( job, tree_roots) )
153
191
} ;
@@ -233,6 +271,7 @@ async fn process_logs(
233
271
import {{ AnsiUp }} from '{ANSI_UP_URL}'
234
272
235
273
var logs = {logs};
274
+ var tree_roots = {tree_roots};
236
275
var ansi_up = new AnsiUp();
237
276
238
277
// 1. Tranform the ANSI escape codes to HTML
@@ -264,7 +303,7 @@ async fn process_logs(
264
303
// Detailed examples of what the regex does is at https://regex101.com/r/vCnx9Y/2
265
304
//
266
305
// But simply speaking the regex tries to find absolute (with `/checkout` prefix) and
267
- // relative paths, the path must start with one of the repository top- level directory .
306
+ // relative paths, the path must start with one of the repository level-2 directories .
268
307
// We also try to retrieve the lines and cols if given (`<path>:line:col`).
269
308
//
270
309
// Some examples of paths we want to find:
@@ -273,7 +312,12 @@ async fn process_logs(
273
312
// - /checkout/src/doc/rustdoc/src/advanced-features.md
274
313
//
275
314
// Any other paths, in particular if prefixed by `./` or `obj/` should not taken.
276
- const pathRegex = /(?<boundary>[^a-zA-Z0-9.\\/])(?<inner>(?:[\\\/]?(?:checkout[\\\/])?(?<path>(?:{tree_roots})[\\\/][a-zA-Z0-9_$\-.\\\/]+))(?::(?<line>[0-9]+):(?<col>[0-9]+))?)/g;
315
+ const pathRegex = new RegExp(
316
+ "(?<boundary>[^a-zA-Z0-9.\\/])(?<inner>(?:[\\\/]?(?:checkout[\\\/])?(?<path>(?:"
317
+ + tree_roots.map(p => RegExp.escape(p)).join("|") +
318
+ ")(?:[\\\/][a-zA-Z0-9_$\\\-.\\\/]+)?))(?::(?<line>[0-9]+):(?<col>[0-9]+))?)",
319
+ "g"
320
+ );
277
321
html = html.replace(pathRegex, (match, boundary, inner, path, line, col) => {{
278
322
const pos = (line !== undefined) ? `#L${{line}}` : "";
279
323
return `${{boundary}}<a href="https://github.com/{owner}/{repo}/blob/{sha}/${{path}}${{pos}}" class="path-marker">${{inner}}</a>`;
@@ -345,3 +389,10 @@ pub fn failure_svg() -> anyhow::Result<Response<Body>, hyper::Error> {
345
389
. body ( Body :: from ( FAILURE_SVG ) )
346
390
. unwrap ( ) )
347
391
}
392
+
393
+ fn bad_request ( body : String ) -> Response < Body > {
394
+ Response :: builder ( )
395
+ . status ( StatusCode :: BAD_REQUEST )
396
+ . body ( Body :: from ( body) )
397
+ . unwrap ( )
398
+ }
0 commit comments