diff --git a/gix-blame/src/error.rs b/gix-blame/src/error.rs index d90a05127cc..f78c02f85a4 100644 --- a/gix-blame/src/error.rs +++ b/gix-blame/src/error.rs @@ -30,7 +30,7 @@ pub enum Error { #[error(transparent)] DiffTreeWithRewrites(#[from] gix_diff::tree_with_rewrites::Error), #[error("Invalid line range was given, line range is expected to be a 1-based inclusive range in the format ','")] - InvalidLineRange, + InvalidOneBasedLineRange, #[error("Failure to decode commit during traversal")] DecodeCommit(#[from] gix_object::decode::Error), #[error("Failed to get parent from commitgraph during traversal")] diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index 8d914d3b7ee..773ac1425cb 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -24,14 +24,12 @@ use crate::{types::BlamePathEntry, BlameEntry, Error, Options, Outcome, Statisti /// - The first commit to be responsible for parts of `file_path`. /// * `cache` /// - Optionally, the commitgraph cache. -/// * `file_path` -/// - A *slash-separated* worktree-relative path to the file to blame. -/// * `range` -/// - A 1-based inclusive range, in order to mirror `git`’s behaviour. `Some(20..40)` represents -/// 21 lines, spanning from line 20 up to and including line 40. This will be converted to -/// `19..40` internally as the algorithm uses 0-based ranges that are exclusive at the end. /// * `resource_cache` /// - Used for diffing trees. +/// * `file_path` +/// - A *slash-separated* worktree-relative path to the file to blame. +/// * `options` +/// - An instance of [`Options`]. /// /// ## The algorithm /// @@ -95,16 +93,11 @@ pub fn file( return Ok(Outcome::default()); } - let ranges = options.range.to_zero_based_exclusive(num_lines_in_blamed)?; - let mut hunks_to_blame = Vec::with_capacity(ranges.len()); - - for range in ranges { - hunks_to_blame.push(UnblamedHunk { - range_in_blamed_file: range.clone(), - suspects: [(suspect, range)].into(), - source_file_name: None, - }); - } + let ranges_to_blame = options.ranges.to_zero_based_exclusive_ranges(num_lines_in_blamed); + let mut hunks_to_blame = ranges_to_blame + .into_iter() + .map(|range| UnblamedHunk::new(range, suspect)) + .collect::>(); let (mut buf, mut buf2) = (Vec::new(), Vec::new()); let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?; diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs index 6eabafbb4c4..2e605242059 100644 --- a/gix-blame/src/file/tests.rs +++ b/gix-blame/src/file/tests.rs @@ -985,3 +985,129 @@ mod process_changes { ); } } + +mod blame_ranges { + use crate::{BlameRanges, Error}; + + #[test] + fn create_with_invalid_range() { + let ranges = BlameRanges::from_one_based_inclusive_range(0..=10); + + assert!(matches!(ranges, Err(Error::InvalidOneBasedLineRange))); + } + + #[test] + fn create_from_single_range() { + let ranges = BlameRanges::from_one_based_inclusive_range(20..=40).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![19..40]); + } + + #[test] + fn create_from_multiple_ranges() { + let ranges = BlameRanges::from_one_based_inclusive_ranges(vec![1..=4, 10..=14]).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..4, 9..14]); + } + + #[test] + fn create_with_empty_ranges() { + let ranges = BlameRanges::from_one_based_inclusive_ranges(vec![]).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..100]); + } + + #[test] + fn add_range_merges_overlapping() { + let mut ranges = BlameRanges::from_one_based_inclusive_range(1..=5).unwrap(); + ranges.add_one_based_inclusive_range(3..=7).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..7]); + } + + #[test] + fn add_range_merges_overlapping_both() { + let mut ranges = BlameRanges::from_one_based_inclusive_range(1..=3).unwrap(); + ranges.add_one_based_inclusive_range(5..=7).unwrap(); + ranges.add_one_based_inclusive_range(2..=6).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..7]); + } + + #[test] + fn add_range_non_sorted() { + let mut ranges = BlameRanges::from_one_based_inclusive_range(5..=7).unwrap(); + ranges.add_one_based_inclusive_range(1..=3).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..3, 4..7]); + } + + #[test] + fn add_range_merges_adjacent() { + let mut ranges = BlameRanges::from_one_based_inclusive_range(1..=5).unwrap(); + ranges.add_one_based_inclusive_range(6..=10).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..10]); + } + + #[test] + fn non_sorted_ranges() { + let ranges = BlameRanges::from_one_based_inclusive_ranges(vec![10..=15, 1..=5]).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..5, 9..15]); + } + + #[test] + fn convert_to_zero_based_exclusive() { + let ranges = BlameRanges::from_one_based_inclusive_ranges(vec![1..=5, 10..=15]).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..5, 9..15]); + } + + #[test] + fn convert_full_file_to_zero_based() { + let ranges = BlameRanges::WholeFile; + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..100]); + } + + #[test] + fn adding_a_range_turns_whole_file_into_partial_file() { + let mut ranges = BlameRanges::default(); + + ranges.add_one_based_inclusive_range(1..=10).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(100), vec![0..10]); + } + + #[test] + fn to_zero_based_exclusive_ignores_range_past_max_lines() { + let mut ranges = BlameRanges::from_one_based_inclusive_range(1..=5).unwrap(); + ranges.add_one_based_inclusive_range(16..=20).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(7), vec![0..5]); + } + + #[test] + fn to_zero_based_exclusive_range_doesnt_exceed_max_lines() { + let mut ranges = BlameRanges::from_one_based_inclusive_range(1..=5).unwrap(); + ranges.add_one_based_inclusive_range(6..=10).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(7), vec![0..7]); + } + + #[test] + fn to_zero_based_exclusive_merged_ranges_dont_exceed_max_lines() { + let mut ranges = BlameRanges::from_one_based_inclusive_range(1..=4).unwrap(); + ranges.add_one_based_inclusive_range(6..=10).unwrap(); + + assert_eq!(ranges.to_zero_based_exclusive_ranges(7), vec![0..4, 5..7]); + } + + #[test] + fn default_is_full_file() { + let ranges = BlameRanges::default(); + + assert!(matches!(ranges, BlameRanges::WholeFile)); + } +} diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index bbd54591eb9..79c49b7a62c 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -21,12 +21,14 @@ use crate::Error; /// use gix_blame::BlameRanges; /// /// // Blame lines 20 through 40 (inclusive) -/// let range = BlameRanges::from_range(20..=40); +/// let range = BlameRanges::from_one_based_inclusive_range(20..=40); /// /// // Blame multiple ranges -/// let mut ranges = BlameRanges::new(); -/// ranges.add_range(1..=4); // Lines 1-4 -/// ranges.add_range(10..=14); // Lines 10-14 +/// let mut ranges = BlameRanges::from_one_based_inclusive_ranges(vec![ +/// 1..=4, // Lines 1-4 +/// 10..=14, // Lines 10-14 +/// ] +/// ); /// ``` /// /// # Line Number Representation @@ -36,105 +38,115 @@ use crate::Error; /// - This will be converted to `19..40` internally as the algorithm uses 0-based ranges that are exclusive at the end /// /// # Empty Ranges -/// -/// An empty `BlameRanges` (created via `BlameRanges::new()` or `BlameRanges::default()`) means -/// to blame the entire file, similar to running `git blame` without line number arguments. +/// You can blame the entire file by calling `BlameRanges::default()`, or by passing an empty vector to `from_one_based_inclusive_ranges`. #[derive(Debug, Clone, Default)] -pub struct BlameRanges { - /// The ranges to blame, stored as 1-based inclusive ranges - /// An empty Vec means blame the entire file - ranges: Vec>, +pub enum BlameRanges { + /// Blame the entire file. + #[default] + WholeFile, + /// Blame ranges in 0-based exclusive format. + PartialFile(Vec>), } /// Lifecycle impl BlameRanges { - /// Create a new empty BlameRanges instance. + /// Create from a single 0-based range. /// - /// An empty instance means to blame the entire file. - pub fn new() -> Self { - Self::default() + /// Note that the input range is 1-based inclusive, as used by git, and + /// the output is a zero-based `BlameRanges` instance. + pub fn from_one_based_inclusive_range(range: RangeInclusive) -> Result { + let zero_based_range = Self::inclusive_to_zero_based_exclusive(range)?; + Ok(Self::PartialFile(vec![zero_based_range])) } - /// Create from a single range. + /// Create from multiple 0-based ranges. + /// + /// Note that the input ranges are 1-based inclusive, as used by git, and + /// the output is a zero-based `BlameRanges` instance. /// - /// The range is 1-based, similar to git's line number format. - pub fn from_range(range: RangeInclusive) -> Self { - Self { ranges: vec![range] } + /// If the input vector is empty, the result will be `WholeFile`. + pub fn from_one_based_inclusive_ranges(ranges: Vec>) -> Result { + if ranges.is_empty() { + return Ok(Self::WholeFile); + } + + let zero_based_ranges = ranges + .into_iter() + .map(Self::inclusive_to_zero_based_exclusive) + .collect::>(); + let mut result = Self::PartialFile(vec![]); + for range in zero_based_ranges { + result.merge_zero_based_exclusive_range(range?); + } + Ok(result) } - /// Create from multiple ranges. - /// - /// All ranges are 1-based. - /// Overlapping or adjacent ranges will be merged. - pub fn from_ranges(ranges: Vec>) -> Self { - let mut result = Self::new(); - for range in ranges { - result.merge_range(range); + /// Convert a 1-based inclusive range to a 0-based exclusive range. + fn inclusive_to_zero_based_exclusive(range: RangeInclusive) -> Result, Error> { + if range.start() == &0 { + return Err(Error::InvalidOneBasedLineRange); } - result + let start = range.start() - 1; + let end = *range.end(); + Ok(start..end) } } impl BlameRanges { /// Add a single range to blame. /// - /// The range should be 1-based inclusive. - /// If the new range overlaps with or is adjacent to an existing range, - /// they will be merged into a single range. - pub fn add_range(&mut self, new_range: RangeInclusive) { - self.merge_range(new_range); - } + /// The new range will be merged with any overlapping existing ranges. + pub fn add_one_based_inclusive_range(&mut self, new_range: RangeInclusive) -> Result<(), Error> { + let zero_based_range = Self::inclusive_to_zero_based_exclusive(new_range)?; + self.merge_zero_based_exclusive_range(zero_based_range); - /// Attempts to merge the new range with any existing ranges. - /// If no merge is possible, add it as a new range. - fn merge_range(&mut self, new_range: RangeInclusive) { - // Check if this range can be merged with any existing range - for range in &mut self.ranges { - // Check if ranges overlap or are adjacent - if new_range.start() <= range.end() && range.start() <= new_range.end() { - *range = *range.start().min(new_range.start())..=*range.end().max(new_range.end()); - return; - } - } - // If no overlap found, add it as a new range - self.ranges.push(new_range); + Ok(()) } - /// Convert the 1-based inclusive ranges to 0-based exclusive ranges. - /// - /// This is used internally by the blame algorithm to convert from git's line number format - /// to the internal format used for processing. - /// - /// # Errors - /// - /// Returns `Error::InvalidLineRange` if: - /// - Any range starts at 0 (must be 1-based) - /// - Any range extends beyond the file's length - /// - Any range has the same start and end - pub fn to_zero_based_exclusive(&self, max_lines: u32) -> Result>, Error> { - if self.ranges.is_empty() { - let range = 0..max_lines; - return Ok(vec![range]); - } + /// Adds a new ranges, merging it with any existing overlapping ranges. + fn merge_zero_based_exclusive_range(&mut self, new_range: Range) { + match self { + Self::PartialFile(ref mut ranges) => { + // Partition ranges into those that don't overlap and those that do. + let (mut non_overlapping, overlapping): (Vec<_>, Vec<_>) = ranges + .drain(..) + .partition(|range| new_range.end < range.start || range.end < new_range.start); - let mut result = Vec::with_capacity(self.ranges.len()); - for range in &self.ranges { - if *range.start() == 0 { - return Err(Error::InvalidLineRange); - } - let start = range.start() - 1; - let end = *range.end(); - if start >= max_lines || end > max_lines || start == end { - return Err(Error::InvalidLineRange); + let merged_range = overlapping.into_iter().fold(new_range, |acc, range| { + acc.start.min(range.start)..acc.end.max(range.end) + }); + + non_overlapping.push(merged_range); + + *ranges = non_overlapping; + ranges.sort_by(|a, b| a.start.cmp(&b.start)); } - result.push(start..end); + Self::WholeFile => *self = Self::PartialFile(vec![new_range]), } - Ok(result) } - /// Returns true if no specific ranges are set (meaning blame entire file) - pub fn is_empty(&self) -> bool { - self.ranges.is_empty() + /// Gets zero-based exclusive ranges. + pub fn to_zero_based_exclusive_ranges(&self, max_lines: u32) -> Vec> { + match self { + Self::WholeFile => { + let full_range = 0..max_lines; + vec![full_range] + } + Self::PartialFile(ranges) => ranges + .iter() + .filter_map(|range| { + if range.end < max_lines { + return Some(range.clone()); + } + + if range.start < max_lines { + Some(range.start..max_lines) + } else { + None + } + }) + .collect(), + } } } @@ -144,7 +156,7 @@ pub struct Options { /// The algorithm to use for diffing. pub diff_algorithm: gix_diff::blob::Algorithm, /// The ranges to blame in the file. - pub range: BlameRanges, + pub ranges: BlameRanges, /// Don't consider commits before the given date. pub since: Option, /// Determine if rename tracking should be performed, and how. @@ -380,6 +392,17 @@ pub struct UnblamedHunk { } impl UnblamedHunk { + pub(crate) fn new(from_range_in_blamed_file: Range, suspect: ObjectId) -> Self { + let range_start = from_range_in_blamed_file.start; + let range_end = from_range_in_blamed_file.end; + + UnblamedHunk { + range_in_blamed_file: range_start..range_end, + suspects: [(suspect, range_start..range_end)].into(), + source_file_name: None, + } + } + pub(crate) fn has_suspect(&self, suspect: &ObjectId) -> bool { self.suspects.iter().any(|entry| entry.0 == *suspect) } diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 6b1f33edfb6..bd9e1f50c35 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -229,7 +229,7 @@ macro_rules! mktest { source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::default(), + ranges: BlameRanges::default(), since: None, rewrites: Some(gix_diff::Rewrites::default()), debug_track_path: false, @@ -315,7 +315,7 @@ fn diff_disparity() { source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::default(), + ranges: BlameRanges::default(), since: None, rewrites: Some(gix_diff::Rewrites::default()), debug_track_path: false, @@ -382,7 +382,7 @@ fn since() -> gix_testtools::Result { source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::default(), + ranges: BlameRanges::default(), since: Some(gix_date::parse("2025-01-31", None)?), rewrites: Some(gix_diff::Rewrites::default()), debug_track_path: false, @@ -422,7 +422,7 @@ mod blame_ranges { source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::from_range(1..=2), + ranges: BlameRanges::from_one_based_inclusive_range(1..=2).unwrap(), since: None, rewrites: Some(gix_diff::Rewrites::default()), debug_track_path: false, @@ -448,10 +448,12 @@ mod blame_ranges { suspect, } = Fixture::new()?; - let mut ranges = BlameRanges::new(); - ranges.add_range(1..=2); // Lines 1-2 - ranges.add_range(1..=1); // Duplicate range, should be ignored - ranges.add_range(4..=4); // Line 4 + let ranges = BlameRanges::from_one_based_inclusive_ranges(vec![ + 1..=2, // Lines 1-2 + 1..=1, // Duplicate range, should be ignored + 4..=4, // Line 4 + ]) + .unwrap(); let source_file_name: gix_object::bstr::BString = "simple.txt".into(); @@ -463,7 +465,7 @@ mod blame_ranges { source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: ranges, + ranges, since: None, rewrites: None, debug_track_path: false, @@ -492,7 +494,7 @@ mod blame_ranges { suspect, } = Fixture::new()?; - let ranges = BlameRanges::from_ranges(vec![1..=2, 1..=1, 4..=4]); + let ranges = BlameRanges::from_one_based_inclusive_ranges(vec![1..=2, 1..=1, 4..=4]).unwrap(); let source_file_name: gix_object::bstr::BString = "simple.txt".into(); @@ -504,7 +506,7 @@ mod blame_ranges { source_file_name.as_ref(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: ranges, + ranges, since: None, rewrites: None, debug_track_path: false, @@ -550,7 +552,7 @@ mod rename_tracking { source_file_name.into(), gix_blame::Options { diff_algorithm: gix_diff::blob::Algorithm::Histogram, - range: BlameRanges::default(), + ranges: BlameRanges::default(), since: None, rewrites: Some(gix_diff::Rewrites::default()), debug_track_path: false, diff --git a/gix/tests/gix/repository/blame.rs b/gix/tests/gix/repository/blame.rs index eb3f0d1ac0c..d3a7c23c947 100644 --- a/gix/tests/gix/repository/blame.rs +++ b/gix/tests/gix/repository/blame.rs @@ -18,7 +18,7 @@ fn with_options() -> crate::Result { let repo = crate::named_repo("make_blame_repo.sh")?; let options = gix::blame::Options { - range: gix::blame::BlameRanges::from_range(1..=2), + ranges: gix::blame::BlameRanges::from_one_based_inclusive_range(1..=2)?, ..Default::default() }; diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 1d0bb96343c..3f7a743e093 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1625,7 +1625,7 @@ pub fn main() -> Result<()> { &file, gix::blame::Options { diff_algorithm, - range: gix::blame::BlameRanges::from_ranges(ranges), + ranges: gix::blame::BlameRanges::from_one_based_inclusive_ranges(ranges)?, since, rewrites: Some(gix::diff::Rewrites::default()), debug_track_path: false, diff --git a/tests/it/src/commands/blame_copy_royal.rs b/tests/it/src/commands/blame_copy_royal.rs index 6eaa2e51e09..b1714573386 100644 --- a/tests/it/src/commands/blame_copy_royal.rs +++ b/tests/it/src/commands/blame_copy_royal.rs @@ -37,7 +37,7 @@ pub(super) mod function { let options = gix::blame::Options { diff_algorithm, - range: gix::blame::BlameRanges::default(), + ranges: gix::blame::BlameRanges::default(), since: None, rewrites: Some(gix::diff::Rewrites::default()), debug_track_path: true,