@@ -3,7 +3,6 @@ use crate::handlers::Context;
3
3
use anyhow:: Context as _;
4
4
use hyper:: header:: { CACHE_CONTROL , CONTENT_SECURITY_POLICY , CONTENT_TYPE } ;
5
5
use hyper:: { Body , Response , StatusCode } ;
6
- use itertools:: Itertools ;
7
6
use std:: collections:: VecDeque ;
8
7
use std:: str:: FromStr ;
9
8
use std:: sync:: Arc ;
@@ -136,22 +135,57 @@ async fn process_logs(
136
135
. workflow_run_job ( & repo, log_id)
137
136
. await
138
137
. context ( "unable to fetch job details" ) ?;
139
- let trees = ctx
138
+
139
+ // To minimize false positives in paths linked to the GitHub repositories, we
140
+ // restrict matching to only the second-level directories of the repository.
141
+ //
142
+ // We achieve this by retrieving the contents of the root repository and then
143
+ // retrive the content of the top-level directory which we then serialize for
144
+ // the JS so they can be escaped and concatenated into a regex OR pattern
145
+ // (e.g., `compiler/rustc_ast|tests/ui|src/version`) which is used in the JS regex.
146
+ let mut root_trees = ctx
140
147
. github
141
148
. repo_git_trees ( & repo, & job. head_sha )
142
149
. await
143
150
. context ( "unable to fetch git tree for the repository" ) ?;
144
151
145
- // To minimize false positives in paths linked to the GitHub repositories,
146
- // we restrict matching to only the top-level directories of the repository.
147
- // We achieve this by retrieving all "tree" objects and concatenating them
148
- // into a regex OR pattern (e.g., `compiler|tests|src`) which is used in the
149
- // JS regex.
150
- let tree_roots = trees
152
+ // Prune every entry that isn't a tree (aka directory)
153
+ root_trees. tree . retain ( |t| t. object_type == "tree" ) ;
154
+
155
+ // Retrive all the sub-directories trees (for rust-lang/rust it's 6 API calls)
156
+ let roots_trees: Vec < _ > = root_trees
157
+ . tree
158
+ . iter ( )
159
+ . map ( |t| async { ctx. github . repo_git_trees ( & repo, & t. sha ) . await } )
160
+ . collect ( ) ;
161
+
162
+ // Join all futures and fail fast if one of them returns an error
163
+ let roots_trees = futures:: future:: try_join_all ( roots_trees)
164
+ . await
165
+ . context ( "unable to fetch content details" ) ?;
166
+
167
+ // Collect and fix-up all the paths to directories and files (avoid submodules)
168
+ let mut tree_roots: Vec < _ > = root_trees
151
169
. tree
152
170
. iter ( )
153
- . filter_map ( |t| ( t. object_type == "tree" ) . then_some ( & t. path ) )
154
- . join ( "|" ) ;
171
+ . zip ( & roots_trees)
172
+ . map ( |( root, childs) | {
173
+ childs
174
+ . tree
175
+ . iter ( )
176
+ . filter ( |t| t. object_type == "tree" || t. object_type == "blob" )
177
+ . map ( |t| format ! ( "{}/{}" , root. path, t. path) )
178
+ } )
179
+ . flatten ( )
180
+ . collect ( ) ;
181
+
182
+ // We need to sort the tree roots by descending order, otherwise `library/std` will
183
+ // be matched before `library/stdarch`
184
+ tree_roots. sort_by ( |a, b| b. cmp ( a) ) ;
185
+
186
+ // Serialize to a JS(ON) array so we can escape them in the browser
187
+ let tree_roots =
188
+ serde_json:: to_string ( & tree_roots) . context ( "unable to serialize the tree roots" ) ?;
155
189
156
190
anyhow:: Result :: < _ > :: Ok ( ( job, tree_roots) )
157
191
} ;
@@ -237,6 +271,7 @@ async fn process_logs(
237
271
import {{ AnsiUp }} from '{ANSI_UP_URL}'
238
272
239
273
var logs = {logs};
274
+ var tree_roots = {tree_roots};
240
275
var ansi_up = new AnsiUp();
241
276
242
277
// 1. Tranform the ANSI escape codes to HTML
@@ -268,7 +303,7 @@ async fn process_logs(
268
303
// Detailed examples of what the regex does is at https://regex101.com/r/vCnx9Y/2
269
304
//
270
305
// But simply speaking the regex tries to find absolute (with `/checkout` prefix) and
271
- // relative paths, the path must start with one of the repository top- level directory .
306
+ // relative paths, the path must start with one of the repository level-2 directories .
272
307
// We also try to retrieve the lines and cols if given (`<path>:line:col`).
273
308
//
274
309
// Some examples of paths we want to find:
@@ -277,7 +312,12 @@ async fn process_logs(
277
312
// - /checkout/src/doc/rustdoc/src/advanced-features.md
278
313
//
279
314
// Any other paths, in particular if prefixed by `./` or `obj/` should not taken.
280
- const pathRegex = /(?<boundary>[^a-zA-Z0-9.\\/])(?<inner>(?:[\\\/]?(?:checkout[\\\/])?(?<path>(?:{tree_roots})[\\\/][a-zA-Z0-9_$\-.\\\/]+))(?::(?<line>[0-9]+):(?<col>[0-9]+))?)/g;
315
+ const pathRegex = new RegExp(
316
+ "(?<boundary>[^a-zA-Z0-9.\\/])(?<inner>(?:[\\\/]?(?:checkout[\\\/])?(?<path>(?:"
317
+ + tree_roots.map(p => RegExp.escape(p)).join("|") +
318
+ ")(?:[\\\/][a-zA-Z0-9_$\\\-.\\\/]+)?))(?::(?<line>[0-9]+):(?<col>[0-9]+))?)",
319
+ "g"
320
+ );
281
321
html = html.replace(pathRegex, (match, boundary, inner, path, line, col) => {{
282
322
const pos = (line !== undefined) ? `#L${{line}}` : "";
283
323
return `${{boundary}}<a href="https://github.com/{owner}/{repo}/blob/{sha}/${{path}}${{pos}}" class="path-marker">${{inner}}</a>`;
0 commit comments