Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "codeowners"
version = "0.2.4"
version = "0.2.5"
edition = "2024"

[profile.release]
Expand All @@ -14,7 +14,7 @@ clap = { version = "4.5.20", features = ["derive"] }
clap_derive = "4.5.18"
error-stack = "0.5.0"
enum_dispatch = "0.3.13"
fast-glob = "0.4.0"
fast-glob = "1.0.0"
glob = "0.3.2"
ignore = "0.4.23"
itertools = "0.14.0"
Expand Down
21 changes: 21 additions & 0 deletions ai-prompts/for-file-optimization.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
## Re-architect `codeowners-rs`

The CLI behaviors live in `src/cli.rs`.

### What this tool does
- **Generate CODEOWNERS**: Builds `.github/CODEOWNERS` from multiple ownership sources. See mappers in `src/ownership/mapper/`.
- **Answer per-file ownership**: The `for-file` command returns the owner of a given file even if the checked-in `CODEOWNERS` is not up to date.

### Current implementations
- **Fast but potentially inaccurate**: `Runner::for_file_from_codeowners` (in `src/runner.rs`) parses the existing `CODEOWNERS` file to find a file’s owner. It’s very fast, but can be wrong if `CODEOWNERS` is stale.
- **Accurate but slow**: `Runner::for_file` is correct but can take up to ~4 seconds on large repositories because it effectively determines ownership for many files and then returns the single result needed.

Ownership resolution is complex because definitions often live in other files (packages, team configs, globs, etc.).

## Assignment
Implement a faster `for_file` that remains accurate.

### Requirements
- **Performance**: Under 1 second on large codebases (e.g., `$HOME/workspace/large`).
- **Correctness**: All existing tests must continue to pass.
- **Compatibility**: No changes to the external behavior of the `for-file` CLI command.
179 changes: 179 additions & 0 deletions src/bin/compare_for_file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
// This is a tool to compare the output of the original codeowners CLI with the optimized version.
// It's useful for verifying that the optimized version is correct.
//
// It's not used in CI, but it's useful for debugging.
//
// To run it, use `cargo run --bin compare_for_file <absolute_project_root>`
//
// It will compare the output of the original codeowners CLI with the optimized version for all files in the project.

use std::{
fs::File,
io::{self, Write},
path::{Path, PathBuf},
process::Command,
};

use codeowners::config::Config as OwnershipConfig;
use codeowners::ownership::{FileOwner, for_file_fast};
use codeowners::runner::{RunConfig, Runner};
use ignore::WalkBuilder;

fn main() {
let project_root = std::env::args().nth(1).expect("usage: compare_for_file <absolute_project_root>");
let project_root = PathBuf::from(project_root);
if !project_root.is_absolute() {
eprintln!("Project root must be absolute");
std::process::exit(2);
}

let codeowners_file_path = project_root.join(".github/CODEOWNERS");
let config_path = project_root.join("config/code_ownership.yml");

let run_config = RunConfig {
project_root: project_root.clone(),
codeowners_file_path,
config_path: config_path.clone(),
no_cache: false,
};

// Build the original, accurate-but-slower runner once
let runner = match Runner::new(&run_config) {
Ok(r) => r,
Err(e) => {
eprintln!("Failed to initialize Runner: {}", e);
std::process::exit(1);
}
};

// Load config once for the optimized path
let config_file = match File::open(&config_path) {
Ok(f) => f,
Err(e) => {
eprintln!("Can't open config file {}: {}", config_path.display(), e);
std::process::exit(1);
}
};
let optimized_config: OwnershipConfig = match serde_yaml::from_reader(config_file) {
Ok(c) => c,
Err(e) => {
eprintln!("Can't parse config file {}: {}", config_path.display(), e);
std::process::exit(1);
}
};

let mut total_files: usize = 0;
let mut diff_count: usize = 0;

// Prefer tracked files from git; fall back to walking the FS if git is unavailable
let tracked_files_output = Command::new("git").arg("-C").arg(&project_root).arg("ls-files").arg("-z").output();

match tracked_files_output {
Ok(output) if output.status.success() => {
let bytes = output.stdout;
for rel in bytes.split(|b| *b == 0u8) {
if rel.is_empty() {
continue;
}
let rel_str = match std::str::from_utf8(rel) {
Ok(s) => s,
Err(_) => continue,
};
let abs_path = project_root.join(rel_str);
// Only process regular files that currently exist
if !abs_path.is_file() {
continue;
}

total_files += 1;
let original = run_original(&runner, &abs_path);
let optimized = run_optimized(&project_root, &optimized_config, &abs_path);

if original != optimized {
diff_count += 1;
println!("\n==== {} ====", abs_path.display());
println!("ORIGINAL:\n{}", original);
println!("OPTIMIZED:\n{}", optimized);
let _ = io::stdout().flush();
}

if total_files % 1000 == 0 {
eprintln!("Processed {} files... diffs so far: {}", total_files, diff_count);
}
}
}
_ => {
eprintln!("git ls-files failed; falling back to filesystem walk (untracked files may be included)");
let walker = WalkBuilder::new(&project_root)
.hidden(false)
.git_ignore(true)
.git_exclude(true)
.follow_links(false)
.build();

for result in walker {
let entry = match result {
Ok(e) => e,
Err(err) => {
eprintln!("walk error: {}", err);
continue;
}
};
if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
continue;
}
let path = entry.path();
total_files += 1;

let original = run_original(&runner, path);
let optimized = run_optimized(&project_root, &optimized_config, path);

if original != optimized {
diff_count += 1;
println!("\n==== {} ====", path.display());
println!("ORIGINAL:\n{}", original);
println!("OPTIMIZED:\n{}", optimized);
let _ = io::stdout().flush();
}

if total_files % 1000 == 0 {
eprintln!("Processed {} files... diffs so far: {}", total_files, diff_count);
}
}
}
}

println!("Checked {} files. Diffs: {}", total_files, diff_count);
if diff_count > 0 {
std::process::exit(3);
}
}

fn run_original(runner: &Runner, file_path: &Path) -> String {
let result = runner.for_file(&file_path.to_string_lossy());
if !result.validation_errors.is_empty() {
return result.validation_errors.join("\n");
}
if !result.io_errors.is_empty() {
return format!("IO_ERROR: {}", result.io_errors.join(" | "));
}
result.info_messages.join("\n")
}

fn run_optimized(project_root: &Path, config: &OwnershipConfig, file_path: &Path) -> String {
let owners: Vec<FileOwner> = match for_file_fast::find_file_owners(project_root, config, file_path) {
Ok(v) => v,
Err(e) => return format!("IO_ERROR: {}", e),
};
match owners.len() {
0 => format!("{}", FileOwner::default()),
1 => format!("{}", owners[0]),
_ => {
let mut lines = vec!["Error: file is owned by multiple teams!".to_string()];
for owner in owners {
lines.push(format!("\n{}", owner));
}
lines.join("\n")
}
}
}
14 changes: 10 additions & 4 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ enum Command {
about = "Generate the CODEOWNERS file and save it to '--codeowners-file-path'.",
visible_alias = "g"
)]
Generate,
Generate {
#[arg(long, short, default_value = "false", help = "Skip staging the CODEOWNERS file")]
skip_stage: bool,
},

#[clap(
about = "Validate the validity of the CODEOWNERS file. A validation failure will exit with a failure code and a detailed output of the validation errors.",
Expand All @@ -35,7 +38,10 @@ enum Command {
Validate,

#[clap(about = "Chains both `generate` and `validate` commands.", visible_alias = "gv")]
GenerateAndValidate,
GenerateAndValidate {
#[arg(long, short, default_value = "false", help = "Skip staging the CODEOWNERS file")]
skip_stage: bool,
},

#[clap(about = "Delete the cache file.", visible_alias = "d")]
DeleteCache,
Expand Down Expand Up @@ -101,8 +107,8 @@ pub fn cli() -> Result<RunResult, RunnerError> {

let runner_result = match args.command {
Command::Validate => runner::validate(&run_config, vec![]),
Command::Generate => runner::generate(&run_config),
Command::GenerateAndValidate => runner::generate_and_validate(&run_config, vec![]),
Command::Generate { skip_stage } => runner::generate(&run_config, !skip_stage),
Command::GenerateAndValidate { skip_stage } => runner::generate_and_validate(&run_config, vec![], !skip_stage),
Command::ForFile { name, fast } => runner::for_file(&run_config, &name, fast),
Command::ForTeam { name } => runner::for_team(&run_config, &name),
Command::DeleteCache => runner::delete_cache(&run_config),
Expand Down
3 changes: 2 additions & 1 deletion src/ownership.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use tracing::{info, instrument};

mod file_generator;
mod file_owner_finder;
pub mod for_file_fast;
pub(crate) mod mapper;
pub(crate) mod parser;
mod validator;
Expand All @@ -32,7 +33,7 @@ use self::{
pub struct Ownership {
project: Arc<Project>,
}

#[derive(Debug)]
pub struct FileOwner {
pub team: Team,
pub team_config_file_path: String,
Expand Down
Loading