diff --git a/gix-diff/src/blob/unified_diff/impls.rs b/gix-diff/src/blob/unified_diff/impls.rs index 959560fba7f..a2afdd450ba 100644 --- a/gix-diff/src/blob/unified_diff/impls.rs +++ b/gix-diff/src/blob/unified_diff/impls.rs @@ -260,7 +260,8 @@ where } impl DiffLineKind { - const fn to_prefix(self) -> char { + /// TODO: Document. + pub const fn to_prefix(self) -> char { match self { DiffLineKind::Context => ' ', DiffLineKind::Add => '+', diff --git a/gix-diff/tests/diff/blob/mod.rs b/gix-diff/tests/diff/blob/mod.rs index 1959c4e6fdb..5742dfd4350 100644 --- a/gix-diff/tests/diff/blob/mod.rs +++ b/gix-diff/tests/diff/blob/mod.rs @@ -1,3 +1,4 @@ pub(crate) mod pipeline; mod platform; +mod slider; mod unified_diff; diff --git a/gix-diff/tests/diff/blob/slider.rs b/gix-diff/tests/diff/blob/slider.rs new file mode 100644 index 00000000000..16315229cb3 --- /dev/null +++ b/gix-diff/tests/diff/blob/slider.rs @@ -0,0 +1,252 @@ +use std::{iter::Peekable, path::PathBuf}; + +use gix_diff::blob::{ + intern::TokenSource, + unified_diff::{ConsumeHunk, ContextSize, HunkHeader}, + Algorithm, UnifiedDiff, +}; +use gix_object::bstr::{self, BString, ByteVec}; + +#[derive(Debug, PartialEq)] +struct DiffHunk { + header: HunkHeader, + lines: BString, +} + +struct DiffHunkRecorder { + inner: Vec, +} + +impl DiffHunkRecorder { + fn new() -> Self { + Self { inner: Vec::new() } + } +} + +impl ConsumeHunk for DiffHunkRecorder { + type Out = Vec; + + fn consume_hunk( + &mut self, + header: HunkHeader, + lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])], + ) -> std::io::Result<()> { + let mut buf = Vec::new(); + + for &(kind, line) in lines { + buf.push(kind.to_prefix() as u8); + buf.extend_from_slice(line); + buf.push(b'\n'); + } + + let diff_hunk = DiffHunk { + header, + lines: buf.into(), + }; + + self.inner.push(diff_hunk); + + Ok(()) + } + + fn finish(self) -> Self::Out { + self.inner + } +} + +struct Baseline<'a> { + lines: Peekable>, +} + +mod baseline { + use std::path::Path; + + use gix_diff::blob::unified_diff::HunkHeader; + use gix_object::bstr::ByteSlice; + + use super::{Baseline, DiffHunk}; + + static START_OF_HEADER: &[u8; 4] = b"@@ -"; + + impl Baseline<'_> { + pub fn collect(baseline_path: impl AsRef) -> std::io::Result> { + let content = std::fs::read(baseline_path)?; + + let mut baseline = Baseline { + lines: content.lines().peekable(), + }; + + baseline.skip_header(); + + Ok(baseline.collect()) + } + + fn skip_header(&mut self) { + // diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + // index ccccccc..ddddddd 100644 + // --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + // +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + + let line = self.lines.next().expect("line to be present"); + assert!(line.starts_with(b"diff --git ")); + + let line = self.lines.next().expect("line to be present"); + assert!(line.starts_with(b"index ")); + + let line = self.lines.next().expect("line to be present"); + assert!(line.starts_with(b"--- ")); + + let line = self.lines.next().expect("line to be present"); + assert!(line.starts_with(b"+++ ")); + } + + /// Parse diff hunk headers that conform to the unified diff hunk header format. + /// + /// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This + /// allows us to split the input on ` ` and `,` only. + /// + /// @@ -18,6 +18,7 @@ abc def ghi + /// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@ + fn parse_hunk_header(&self, line: &[u8]) -> gix_testtools::Result { + let Some(line) = line.strip_prefix(START_OF_HEADER) else { + todo!() + }; + + let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect(); + let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else { + todo!() + }; + + Ok(HunkHeader { + before_hunk_start: self.parse_number(before_hunk_start), + before_hunk_len: self.parse_number(before_hunk_len), + after_hunk_start: self.parse_number(after_hunk_start), + after_hunk_len: self.parse_number(after_hunk_len), + }) + } + + fn parse_number(&self, bytes: &[u8]) -> u32 { + bytes + .to_str() + .expect("to be a valid UTF-8 string") + .parse::() + .expect("to be a number") + } + } + + impl Iterator for Baseline<'_> { + type Item = DiffHunk; + + fn next(&mut self) -> Option { + let mut hunk_header = None; + let mut hunk_lines = Vec::new(); + + while let Some(line) = self.lines.next() { + if line.starts_with(START_OF_HEADER) { + assert!(hunk_header.is_none(), "should not overwrite existing hunk_header"); + hunk_header = self.parse_hunk_header(line).ok(); + + continue; + } + + match line[0] { + b' ' | b'+' | b'-' => { + hunk_lines.extend_from_slice(line); + hunk_lines.push(b'\n'); + } + _ => todo!(), + } + + match self.lines.peek() { + Some(next_line) if next_line.starts_with(START_OF_HEADER) => break, + None => break, + _ => {} + } + } + + hunk_header.map(|hunk_header| DiffHunk { + header: hunk_header, + lines: hunk_lines.into(), + }) + } + } +} + +#[test] +fn sliders() -> gix_testtools::Result { + let worktree_path = fixture_path()?; + let asset_dir = worktree_path.join("assets"); + let git_dir = worktree_path.join(".git"); + + let dir = std::fs::read_dir(&git_dir)?; + + for entry in dir { + let entry = entry?; + let file_name = entry.file_name().into_string().expect("to be string"); + + if !file_name.ends_with(".baseline") { + continue; + } + + let parts: Vec<_> = file_name.split(".").collect(); + let name = parts[0]; + + let parts: Vec<_> = name.split("-").collect(); + let [old_blob_id, new_blob_id] = parts[..] else { + unimplemented!(); + }; + + let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.blob")))?; + let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.blob")))?; + + let interner = gix_diff::blob::intern::InternedInput::new( + tokens_for_diffing(old_data.as_slice()), + tokens_for_diffing(new_data.as_slice()), + ); + + let actual = gix_diff::blob::diff( + Algorithm::Myers, + &interner, + UnifiedDiff::new(&interner, DiffHunkRecorder::new(), ContextSize::symmetrical(3)), + )?; + + let baseline_path = git_dir.join(file_name); + let baseline = Baseline::collect(baseline_path).unwrap(); + + let actual = actual + .iter() + .fold(BString::default(), |mut acc, diff_hunk| { + acc.push_str(diff_hunk.header.to_string().as_str()); + acc.push(b'\n'); + + acc.extend_from_slice(&diff_hunk.lines); + + acc + }) + .to_string(); + + let baseline = baseline + .iter() + .fold(BString::default(), |mut acc, diff_hunk| { + acc.push_str(diff_hunk.header.to_string().as_str()); + acc.push(b'\n'); + + acc.extend_from_slice(&diff_hunk.lines); + + acc + }) + .to_string(); + + pretty_assertions::assert_eq!(actual, baseline); + } + + Ok(()) +} + +fn tokens_for_diffing(data: &[u8]) -> impl TokenSource { + gix_diff::blob::sources::byte_lines(data) +} + +fn fixture_path() -> gix_testtools::Result { + gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh") +} diff --git a/tests/it/src/args.rs b/tests/it/src/args.rs index 78f1873d64b..ea173d679ec 100644 --- a/tests/it/src/args.rs +++ b/tests/it/src/args.rs @@ -123,6 +123,24 @@ pub enum Subcommands { #[clap(value_parser = AsPathSpec)] patterns: Vec, }, + /// TODO: add description. + CreateDiffCases { + /// TODO: add description. + #[clap(long)] + sliders_file: PathBuf, + /// The git root to extract the diff-related parts from. + #[clap(long)] + worktree_dir: PathBuf, + /// The directory into which to copy the files. + #[clap(long)] + destination_dir: PathBuf, + /// TODO: add description. + #[clap(long, default_value_t = 10)] + count: usize, + /// The directory to place assets in. + #[clap(long)] + asset_dir: Option, + }, /// Check for executable bits that disagree with shebangs. /// /// This checks committed and staged files, but not anything unstaged, to find shell scripts diff --git a/tests/it/src/commands/create_diff_cases.rs b/tests/it/src/commands/create_diff_cases.rs new file mode 100644 index 00000000000..438503f243d --- /dev/null +++ b/tests/it/src/commands/create_diff_cases.rs @@ -0,0 +1,126 @@ +pub(super) mod function { + use anyhow::Context; + use std::{ + collections::HashSet, + path::{Path, PathBuf}, + }; + + use gix::{ + bstr::{BString, ByteSlice}, + objs::FindExt, + }; + + pub fn create_diff_cases( + sliders_file: PathBuf, + worktree_dir: &Path, + destination_dir: PathBuf, + count: usize, + asset_dir: Option, + ) -> anyhow::Result<()> { + // TODO: turn into parameter. + let dry_run = false; + + let prefix = if dry_run { "WOULD" } else { "Will" }; + let sliders = std::fs::read_to_string(&sliders_file)?; + + eprintln!( + "read {} which has {} lines", + sliders_file.display(), + sliders.lines().count() + ); + + let sliders: HashSet<_> = sliders + .lines() + .take(count) + .map(|line| { + let parts: Vec<_> = line.split_ascii_whitespace().collect(); + + match parts[..] { + [before, after, ..] => (before, after), + _ => todo!(), + } + }) + .collect(); + + eprintln!("{sliders:?}"); + + let repo = gix::open(worktree_dir)?; + + let asset_dir = asset_dir.unwrap_or("assets".into()); + let assets = destination_dir.join(asset_dir.to_os_str()?); + + eprintln!("{prefix} create directory '{assets}'", assets = assets.display()); + if !dry_run { + std::fs::create_dir_all(&assets)?; + } + + let mut buf = Vec::new(); + + let script_name = "make_diff_for_sliders_repo.sh"; + + let mut blocks: Vec = vec![format!( + r#"#!/usr/bin/env bash +set -eu -o pipefail + +ROOT="$(cd "$(dirname "${{BASH_SOURCE[0]}}")" && pwd)" + +git init +echo .gitignore >> .gitignore +echo {asset_dir}/ >> .gitignore +echo {script_name} >> .gitignore + +mkdir -p {asset_dir} +"# + )]; + + for (before, after) in sliders.iter() { + let revspec = repo.rev_parse(*before)?; + let old_blob_id = revspec + .single() + .context(format!("rev-spec '{before}' must resolve to a single object"))?; + let (old_path, _) = revspec + .path_and_mode() + .context(format!("rev-spec '{before}' must contain a path"))?; + + let revspec = repo.rev_parse(*after)?; + let new_blob_id = revspec + .single() + .context(format!("rev-spec '{after}' must resolve to a single object"))?; + let (new_path, _) = revspec + .path_and_mode() + .context(format!("rev-spec '{after}' must contain a path"))?; + + eprintln!("{old_blob_id:?} {old_path:?} {new_blob_id:?} {new_path:?}"); + + let dst_old_blob = assets.join(format!("{old_blob_id}.blob")); + let dst_new_blob = assets.join(format!("{new_blob_id}.blob")); + if !dry_run { + let old_blob = repo.objects.find_blob(&old_blob_id, &mut buf)?.data; + std::fs::write(dst_old_blob, old_blob)?; + + let new_blob = repo.objects.find_blob(&new_blob_id, &mut buf)?.data; + std::fs::write(dst_new_blob, new_blob)?; + } + + blocks.push(format!( + r#"git diff --no-index "$ROOT/{asset_dir}/{old_blob_id}.blob" "$ROOT/{asset_dir}/{new_blob_id}.blob" > .git/{old_blob_id}-{new_blob_id}.baseline || true +cp "$ROOT/{asset_dir}/{old_blob_id}.blob" assets/ +cp "$ROOT/{asset_dir}/{new_blob_id}.blob" assets/ +"# + )); + } + + let script_file = destination_dir.join(script_name); + eprintln!( + "{prefix} write script file at '{script_file}'", + script_file = script_file.display() + ); + + if !dry_run { + let script = blocks.join("\n"); + std::fs::write(script_file, script)?; + } + + Ok(()) + } +} diff --git a/tests/it/src/commands/mod.rs b/tests/it/src/commands/mod.rs index 6bec01671df..ed3abfd0cfc 100644 --- a/tests/it/src/commands/mod.rs +++ b/tests/it/src/commands/mod.rs @@ -7,6 +7,9 @@ pub use copy_royal::function::copy_royal; pub mod git_to_sh; pub use git_to_sh::function::git_to_sh; +pub mod create_diff_cases; +pub use create_diff_cases::function::create_diff_cases; + pub mod check_mode; pub use check_mode::function::check_mode; diff --git a/tests/it/src/main.rs b/tests/it/src/main.rs index df22a7e8c6f..57dad9b41cd 100644 --- a/tests/it/src/main.rs +++ b/tests/it/src/main.rs @@ -46,6 +46,13 @@ fn main() -> anyhow::Result<()> { destination_dir, patterns, } => commands::copy_royal(dry_run, &worktree_root, destination_dir, patterns), + Subcommands::CreateDiffCases { + sliders_file, + worktree_dir, + destination_dir, + count, + asset_dir, + } => commands::create_diff_cases(sliders_file, &worktree_dir, destination_dir, count, asset_dir), Subcommands::CheckMode {} => commands::check_mode(), Subcommands::Env {} => commands::env(), }