Skip to content

Commit b76e8bd

Browse files
committed
first stab at optimizing for-file
1 parent 49c7430 commit b76e8bd

File tree

2 files changed

+327
-1
lines changed

2 files changed

+327
-1
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
## Re-architect `codeowners-rs`
2+
3+
The CLI behaviors live in `src/cli.rs`.
4+
5+
### What this tool does
6+
- **Generate CODEOWNERS**: Builds `.github/CODEOWNERS` from multiple ownership sources. See mappers in `src/ownership/mapper/`.
7+
- **Answer per-file ownership**: The `for-file` command returns the owner of a given file even if the checked-in `CODEOWNERS` is not up to date.
8+
9+
### Current implementations
10+
- **Fast but potentially inaccurate**: `Runner::for_file_from_codeowners` (in `src/runner.rs`) parses the existing `CODEOWNERS` file to find a file’s owner. It’s very fast, but can be wrong if `CODEOWNERS` is stale.
11+
- **Accurate but slow**: `Runner::for_file` is correct but can take up to ~4 seconds on large repositories because it effectively determines ownership for many files and then returns the single result needed.
12+
13+
Ownership resolution is complex because definitions often live in other files (packages, team configs, globs, etc.).
14+
15+
## Assignment
16+
Implement a faster `for_file` that remains accurate.
17+
18+
### Requirements
19+
- **Performance**: Under 1 second on large codebases (e.g., `$HOME/workspace/large`).
20+
- **Correctness**: All existing tests must continue to pass.
21+
- **Compatibility**: No changes to the external behavior of the `for-file` CLI command.

src/runner.rs

Lines changed: 306 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ pub fn for_file(run_config: &RunConfig, file_path: &str, fast: bool) -> RunResul
3939
if fast {
4040
for_file_from_codeowners(run_config, file_path)
4141
} else {
42-
run_with_runner(run_config, |runner| runner.for_file(file_path))
42+
for_file_optimized(run_config, file_path)
4343
}
4444
}
4545

@@ -83,6 +83,311 @@ pub fn team_for_file_from_codeowners(run_config: &RunConfig, file_path: &str) ->
8383
.map_err(|e| Error::Io(e.to_string()))?)
8484
}
8585

86+
use std::collections::{HashMap, HashSet};
87+
use std::fs;
88+
use fast_glob::glob_match;
89+
use glob::glob;
90+
use lazy_static::lazy_static;
91+
use regex::Regex;
92+
93+
fn for_file_optimized(run_config: &RunConfig, file_path: &str) -> RunResult {
94+
let config = match config_from_path(&run_config.config_path) {
95+
Ok(c) => c,
96+
Err(err) => {
97+
return RunResult {
98+
io_errors: vec![err.to_string()],
99+
..Default::default()
100+
}
101+
}
102+
};
103+
104+
let absolute_file_path = std::path::Path::new(file_path);
105+
let relative_file_path = absolute_file_path
106+
.strip_prefix(&run_config.project_root)
107+
.unwrap_or(absolute_file_path)
108+
.to_path_buf();
109+
110+
let teams = match load_teams(&run_config.project_root, &config.team_file_glob) {
111+
Ok(t) => t,
112+
Err(err) => {
113+
return RunResult {
114+
io_errors: vec![err.to_string()],
115+
..Default::default()
116+
}
117+
}
118+
};
119+
let teams_by_name = build_teams_by_name_map(&teams);
120+
121+
let mut sources_by_team: HashMap<String, Vec<crate::ownership::mapper::Source>> = HashMap::new();
122+
123+
if let Some(team_name) = read_top_of_file_team(&absolute_file_path.to_path_buf()) {
124+
if let Some(team) = teams_by_name.get(&team_name) {
125+
sources_by_team.entry(team.name.clone()).or_default().push(crate::ownership::mapper::Source::TeamFile);
126+
}
127+
}
128+
129+
if let Some((owner_team_name, dir_source)) = most_specific_directory_owner(
130+
&run_config.project_root,
131+
&relative_file_path,
132+
&teams_by_name,
133+
) {
134+
sources_by_team.entry(owner_team_name).or_default().push(dir_source);
135+
}
136+
137+
if let Some((owner_team_name, package_source)) = nearest_package_owner(
138+
&run_config.project_root,
139+
&relative_file_path,
140+
&config,
141+
&teams_by_name,
142+
) {
143+
sources_by_team.entry(owner_team_name).or_default().push(package_source);
144+
}
145+
146+
if let Some((owner_team_name, gem_source)) = vendored_gem_owner(&relative_file_path, &config, &teams) {
147+
sources_by_team.entry(owner_team_name).or_default().push(gem_source);
148+
}
149+
150+
if let Some(rel_str) = relative_file_path.to_str() {
151+
for team in &teams {
152+
let subtracts: HashSet<&str> = team.subtracted_globs.iter().map(|s| s.as_str()).collect();
153+
for owned_glob in &team.owned_globs {
154+
if glob_match(owned_glob, rel_str) && !subtracts.iter().any(|sub| glob_match(sub, rel_str)) {
155+
sources_by_team
156+
.entry(team.name.clone())
157+
.or_default()
158+
.push(crate::ownership::mapper::Source::TeamGlob(owned_glob.clone()));
159+
}
160+
}
161+
}
162+
}
163+
164+
for team in &teams {
165+
let team_rel = team
166+
.path
167+
.strip_prefix(&run_config.project_root)
168+
.unwrap_or(&team.path)
169+
.to_path_buf();
170+
if team_rel == relative_file_path {
171+
sources_by_team.entry(team.name.clone()).or_default().push(crate::ownership::mapper::Source::TeamYml);
172+
}
173+
}
174+
175+
let mut file_owners: Vec<FileOwner> = Vec::new();
176+
for (team_name, sources) in sources_by_team.into_iter() {
177+
if let Some(team) = teams_by_name.get(&team_name) {
178+
let relative_team_yml_path = team
179+
.path
180+
.strip_prefix(&run_config.project_root)
181+
.unwrap_or(&team.path)
182+
.to_string_lossy()
183+
.to_string();
184+
file_owners.push(FileOwner {
185+
team: team.clone(),
186+
team_config_file_path: relative_team_yml_path,
187+
sources,
188+
});
189+
}
190+
}
191+
192+
let info_messages: Vec<String> = match file_owners.len() {
193+
0 => vec![format!("{}", FileOwner::default())],
194+
1 => vec![format!("{}", file_owners[0])],
195+
_ => {
196+
let mut error_messages = vec!["Error: file is owned by multiple teams!".to_string()];
197+
for file_owner in file_owners {
198+
error_messages.push(format!("\n{}", file_owner));
199+
}
200+
return RunResult {
201+
validation_errors: error_messages,
202+
..Default::default()
203+
};
204+
}
205+
};
206+
RunResult { info_messages, ..Default::default() }
207+
}
208+
209+
fn build_teams_by_name_map(teams: &[Team]) -> HashMap<String, Team> {
210+
let mut map = HashMap::new();
211+
for team in teams {
212+
map.insert(team.name.clone(), team.clone());
213+
map.insert(team.github_team.clone(), team.clone());
214+
}
215+
map
216+
}
217+
218+
fn load_teams(project_root: &std::path::Path, team_file_globs: &[String]) -> std::result::Result<Vec<Team>, String> {
219+
let mut teams: Vec<Team> = Vec::new();
220+
for glob_str in team_file_globs {
221+
let absolute_glob = format!("{}/{}", project_root.display(), glob_str);
222+
let paths = glob(&absolute_glob).map_err(|e| e.to_string())?;
223+
for path in paths.flatten() {
224+
match Team::from_team_file_path(path.clone()) {
225+
Ok(team) => teams.push(team),
226+
Err(e) => {
227+
eprintln!("Error parsing team file: {}", e);
228+
continue;
229+
}
230+
}
231+
}
232+
}
233+
Ok(teams)
234+
}
235+
236+
lazy_static! {
237+
static ref TOP_OF_FILE_TEAM_REGEX: Regex = Regex::new(r#"^(?:#|//) @team (.*)$"#).expect("error compiling regular expression");
238+
}
239+
240+
fn read_top_of_file_team(path: &std::path::PathBuf) -> Option<String> {
241+
let content = fs::read_to_string(path).ok()?;
242+
let first_line = content.lines().next()?;
243+
TOP_OF_FILE_TEAM_REGEX
244+
.captures(first_line)
245+
.and_then(|cap| cap.get(1))
246+
.map(|m| m.as_str().to_string())
247+
}
248+
249+
fn most_specific_directory_owner(
250+
project_root: &std::path::Path,
251+
relative_file_path: &std::path::Path,
252+
teams_by_name: &HashMap<String, Team>,
253+
) -> Option<(String, crate::ownership::mapper::Source)> {
254+
let mut current = project_root.join(relative_file_path);
255+
let mut best: Option<(String, crate::ownership::mapper::Source)> = None;
256+
loop {
257+
let parent_opt = current.parent().map(|p| p.to_path_buf());
258+
let Some(parent) = parent_opt else { break };
259+
let codeowner_path = parent.join(".codeowner");
260+
if let Ok(owner_str) = fs::read_to_string(&codeowner_path) {
261+
let owner = owner_str.trim();
262+
if let Some(team) = teams_by_name.get(owner) {
263+
let relative_dir = parent
264+
.strip_prefix(project_root)
265+
.unwrap_or(parent.as_path())
266+
.to_string_lossy()
267+
.to_string();
268+
let candidate = (
269+
team.name.clone(),
270+
crate::ownership::mapper::Source::Directory(relative_dir),
271+
);
272+
match &best {
273+
None => best = Some(candidate),
274+
Some((_, existing_source)) => {
275+
let existing_len = source_directory_depth(existing_source);
276+
let candidate_len = source_directory_depth(&candidate.1);
277+
if candidate_len > existing_len {
278+
best = Some(candidate);
279+
}
280+
}
281+
}
282+
}
283+
}
284+
if parent == project_root { break; }
285+
current = parent.clone();
286+
}
287+
best
288+
}
289+
290+
fn nearest_package_owner(
291+
project_root: &std::path::Path,
292+
relative_file_path: &std::path::Path,
293+
config: &Config,
294+
teams_by_name: &HashMap<String, Team>,
295+
) -> Option<(String, crate::ownership::mapper::Source)> {
296+
let mut current = project_root.join(relative_file_path);
297+
loop {
298+
let parent_opt = current.parent().map(|p| p.to_path_buf());
299+
let Some(parent) = parent_opt else { break };
300+
let parent_rel = parent.strip_prefix(project_root).unwrap_or(parent.as_path());
301+
if let Some(rel_str) = parent_rel.to_str() {
302+
if glob_list_matches(rel_str, &config.ruby_package_paths) {
303+
let pkg_yml = parent.join("package.yml");
304+
if pkg_yml.exists() {
305+
if let Ok(owner) = read_ruby_package_owner(&pkg_yml) {
306+
if let Some(team) = teams_by_name.get(&owner) {
307+
let package_path = parent_rel.join("package.yml");
308+
let package_glob = format!("{}/**/**", rel_str);
309+
return Some((
310+
team.name.clone(),
311+
crate::ownership::mapper::Source::Package(
312+
package_path.to_string_lossy().to_string(),
313+
package_glob,
314+
),
315+
));
316+
}
317+
}
318+
}
319+
}
320+
if glob_list_matches(rel_str, &config.javascript_package_paths) {
321+
let pkg_json = parent.join("package.json");
322+
if pkg_json.exists() {
323+
if let Ok(owner) = read_js_package_owner(&pkg_json) {
324+
if let Some(team) = teams_by_name.get(&owner) {
325+
let package_path = parent_rel.join("package.json");
326+
let package_glob = format!("{}/**/**", rel_str);
327+
return Some((
328+
team.name.clone(),
329+
crate::ownership::mapper::Source::Package(
330+
package_path.to_string_lossy().to_string(),
331+
package_glob,
332+
),
333+
));
334+
}
335+
}
336+
}
337+
}
338+
}
339+
if parent == project_root { break; }
340+
current = parent;
341+
}
342+
None
343+
}
344+
345+
fn source_directory_depth(source: &crate::ownership::mapper::Source) -> usize {
346+
match source {
347+
crate::ownership::mapper::Source::Directory(path) => path.matches('/').count(),
348+
_ => 0,
349+
}
350+
}
351+
352+
fn glob_list_matches(path: &str, globs: &[String]) -> bool {
353+
globs.iter().any(|g| glob_match(g, path))
354+
}
355+
356+
fn read_ruby_package_owner(path: &std::path::Path) -> std::result::Result<String, String> {
357+
let file = std::fs::File::open(path).map_err(|e| e.to_string())?;
358+
let deserializer: crate::project::deserializers::RubyPackage = serde_yaml::from_reader(file).map_err(|e| e.to_string())?;
359+
deserializer.owner.ok_or_else(|| "Missing owner".to_string())
360+
}
361+
362+
fn read_js_package_owner(path: &std::path::Path) -> std::result::Result<String, String> {
363+
let file = std::fs::File::open(path).map_err(|e| e.to_string())?;
364+
let deserializer: crate::project::deserializers::JavascriptPackage = serde_json::from_reader(file).map_err(|e| e.to_string())?;
365+
deserializer
366+
.metadata
367+
.and_then(|m| m.owner)
368+
.ok_or_else(|| "Missing owner".to_string())
369+
}
370+
371+
fn vendored_gem_owner(
372+
relative_file_path: &std::path::Path,
373+
config: &Config,
374+
teams: &[Team],
375+
) -> Option<(String, crate::ownership::mapper::Source)> {
376+
use std::path::Component;
377+
let mut comps = relative_file_path.components();
378+
let first = comps.next()?;
379+
let second = comps.next()?;
380+
let first_str = match first { Component::Normal(s) => s.to_string_lossy(), _ => return None };
381+
if first_str != config.vendored_gems_path { return None; }
382+
let gem_name = match second { Component::Normal(s) => s.to_string_lossy().to_string(), _ => return None };
383+
for team in teams {
384+
if team.owned_gems.iter().any(|g| g == &gem_name) {
385+
return Some((team.name.clone(), crate::ownership::mapper::Source::TeamGem));
386+
}
387+
}
388+
None
389+
}
390+
86391
pub fn for_team(run_config: &RunConfig, team_name: &str) -> RunResult {
87392
run_with_runner(run_config, |runner| runner.for_team(team_name))
88393
}

0 commit comments

Comments
 (0)