Skip to content
Draft
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion gix-diff/src/blob/unified_diff/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ where
}

impl DiffLineKind {
const fn to_prefix(self) -> char {
/// TODO: Document.
pub const fn to_prefix(self) -> char {
match self {
DiffLineKind::Context => ' ',
DiffLineKind::Add => '+',
Expand Down
1 change: 1 addition & 0 deletions gix-diff/tests/diff/blob/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub(crate) mod pipeline;
mod platform;
mod slider;
mod unified_diff;
228 changes: 228 additions & 0 deletions gix-diff/tests/diff/blob/slider.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
use std::{iter::Peekable, path::PathBuf};

use gix_diff::blob::{
intern::TokenSource,
unified_diff::{ConsumeHunk, ContextSize, HunkHeader},
Algorithm, UnifiedDiff,
};
use gix_object::bstr::{self, BString};

#[derive(Debug, PartialEq)]
struct DiffHunk {
header: HunkHeader,
lines: BString,
}

struct DiffHunkRecorder {
inner: Vec<DiffHunk>,
}

impl DiffHunkRecorder {
fn new() -> Self {
Self { inner: Vec::new() }
}
}

impl ConsumeHunk for DiffHunkRecorder {
type Out = Vec<DiffHunk>;

fn consume_hunk(
&mut self,
header: HunkHeader,
lines: &[(gix_diff::blob::unified_diff::DiffLineKind, &[u8])],
) -> std::io::Result<()> {
let mut buf = Vec::new();

for &(kind, line) in lines {
buf.push(kind.to_prefix() as u8);
buf.extend_from_slice(line);
buf.push(b'\n');
}

let diff_hunk = DiffHunk {
header,
lines: buf.into(),
};

self.inner.push(diff_hunk);

Ok(())
}

fn finish(self) -> Self::Out {
self.inner
}
}

struct Baseline<'a> {
lines: Peekable<bstr::Lines<'a>>,
}

mod baseline {
use std::path::Path;

use gix_diff::blob::unified_diff::HunkHeader;
use gix_object::bstr::ByteSlice;

use super::{Baseline, DiffHunk};

static START_OF_HEADER: &[u8; 4] = b"@@ -";

impl Baseline<'_> {
pub fn collect(baseline_path: impl AsRef<Path>) -> std::io::Result<Vec<DiffHunk>> {
let content = std::fs::read(baseline_path)?;

let mut baseline = Baseline {
lines: content.lines().peekable(),
};

baseline.skip_header();

Ok(baseline.collect())
}

fn skip_header(&mut self) {
// diff --git a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
// index ccccccc..ddddddd 100644
// --- a/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
// +++ b/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb

let line = self.lines.next().expect("line to be present");
assert!(line.starts_with(b"diff --git "));

let line = self.lines.next().expect("line to be present");
assert!(line.starts_with(b"index "));

let line = self.lines.next().expect("line to be present");
assert!(line.starts_with(b"--- "));

let line = self.lines.next().expect("line to be present");
assert!(line.starts_with(b"+++ "));
}

/// Parse diff hunk headers that conform to the unified diff hunk header format.
///
/// The parser is very primitive and relies on the fact that `+18` is parsed as `18`. This
/// allows us to split the input on ` ` and `,` only.
///
/// @@ -18,6 +18,7 @@ abc def ghi
/// @@ -{before_hunk_start},{before_hunk_len} +{after_hunk_start},{after_hunk_len} @@
fn parse_hunk_header(&self, line: &[u8]) -> gix_testtools::Result<HunkHeader> {
let Some(line) = line.strip_prefix(START_OF_HEADER) else {
todo!()
};

let parts: Vec<_> = line.split(|b| *b == b' ' || *b == b',').collect();
let [before_hunk_start, before_hunk_len, after_hunk_start, after_hunk_len, ..] = parts[..] else {
todo!()
};

Ok(HunkHeader {
before_hunk_start: self.parse_number(before_hunk_start),
before_hunk_len: self.parse_number(before_hunk_len),
after_hunk_start: self.parse_number(after_hunk_start),
after_hunk_len: self.parse_number(after_hunk_len),
})
}

fn parse_number(&self, bytes: &[u8]) -> u32 {
bytes
.to_str()
.expect("to be a valid UTF-8 string")
.parse::<u32>()
.expect("to be a number")
}
}

impl Iterator for Baseline<'_> {
type Item = DiffHunk;

fn next(&mut self) -> Option<Self::Item> {
let mut hunk_header = None;
let mut hunk_lines = Vec::new();

while let Some(line) = self.lines.next() {
if line.starts_with(START_OF_HEADER) {
assert!(hunk_header.is_none(), "should not overwrite existing hunk_header");
hunk_header = self.parse_hunk_header(line).ok();

continue;
}

match line[0] {
b' ' | b'+' | b'-' => {
hunk_lines.extend_from_slice(line);
hunk_lines.push(b'\n');
}
_ => todo!(),
}

match self.lines.peek() {
Some(next_line) if next_line.starts_with(START_OF_HEADER) => break,
None => break,
_ => {}
}
}

hunk_header.map(|hunk_header| DiffHunk {
header: hunk_header,
lines: hunk_lines.into(),
})
}
}
}

#[test]
fn sliders() -> gix_testtools::Result {
let worktree_path = fixture_path()?;
let asset_dir = worktree_path.join("assets");
let git_dir = worktree_path.join(".git");

let dir = std::fs::read_dir(&git_dir)?;

for entry in dir {
let entry = entry?;
let file_name = entry.file_name().into_string().expect("to be string");

if !file_name.ends_with(".baseline") {
continue;
}

let parts: Vec<_> = file_name.split(".").collect();
let name = parts[0];

let parts: Vec<_> = name.split("-").collect();
let [old_blob_id, new_blob_id] = parts[..] else {
unimplemented!();
};

let old_data = std::fs::read(asset_dir.join(format!("{old_blob_id}.commit")))?;
let new_data = std::fs::read(asset_dir.join(format!("{new_blob_id}.commit")))?;

let interner = gix_diff::blob::intern::InternedInput::new(
tokens_for_diffing(old_data.as_slice()),
tokens_for_diffing(new_data.as_slice()),
);

let actual = gix_diff::blob::diff(
Algorithm::Myers,
&interner,
UnifiedDiff::new(&interner, DiffHunkRecorder::new(), ContextSize::symmetrical(3)),
)?;

let baseline_path = git_dir.join(file_name);
let baseline = Baseline::collect(baseline_path).unwrap();

pretty_assertions::assert_eq!(actual, baseline);
}

Ok(())
}

fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {
gix_diff::blob::sources::byte_lines(data)
}

fn fixture_path() -> gix_testtools::Result<PathBuf> {
gix_testtools::scripted_fixture_read_only_standalone("make_diff_for_sliders_repo.sh")
}
18 changes: 18 additions & 0 deletions tests/it/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,24 @@ pub enum Subcommands {
#[clap(value_parser = AsPathSpec)]
patterns: Vec<gix::pathspec::Pattern>,
},
/// TODO: add description.
CreateDiffCases {
/// TODO: add description.
#[clap(long)]
sliders_file: PathBuf,
/// The git root to extract the diff-related parts from.
#[clap(long)]
worktree_dir: PathBuf,
/// The directory into which to copy the files.
#[clap(long)]
destination_dir: PathBuf,
/// TODO: add description.
#[clap(long, default_value_t = 10)]
count: usize,
/// The directory to place assets in.
#[clap(long)]
asset_dir: Option<BString>,
},
/// Check for executable bits that disagree with shebangs.
///
/// This checks committed and staged files, but not anything unstaged, to find shell scripts
Expand Down
129 changes: 129 additions & 0 deletions tests/it/src/commands/create_diff_cases.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
pub(super) mod function {
use anyhow::Context;
use std::{
collections::HashSet,
path::{Path, PathBuf},
};

use gix::{
bstr::{BString, ByteSlice},
objs::FindExt,
};

pub fn create_diff_cases(
sliders_file: PathBuf,
worktree_dir: &Path,
destination_dir: PathBuf,
count: usize,
asset_dir: Option<BString>,
) -> anyhow::Result<()> {
// TODO: turn into parameter.
let dry_run = false;

let prefix = if dry_run { "WOULD" } else { "Will" };
let sliders = std::fs::read_to_string(&sliders_file)?;

eprintln!(
"read {} which has {} lines",
sliders_file.display(),
sliders.lines().count()
);

let sliders: HashSet<_> = sliders
.lines()
.take(count)
.map(|line| {
let parts: Vec<_> = line.split_ascii_whitespace().collect();

match parts[..] {
[before, after, ..] => (before, after),
_ => todo!(),
}
})
.collect();

eprintln!("{sliders:?}");

let repo = gix::open(worktree_dir)?;

let asset_dir = asset_dir.unwrap_or("assets".into());
let assets = destination_dir.join(asset_dir.to_os_str()?);

eprintln!("{prefix} create directory '{assets}'", assets = assets.display());
if !dry_run {
std::fs::create_dir_all(&assets)?;
}

let mut buf = Vec::new();

let script_name = "make_diff_for_sliders_repo.sh";

let mut blocks: Vec<String> = vec![format!(
r#"#!/usr/bin/env bash
# TODO:
# `git diff --no-index` returns 1 when there's differences, but 1 is treated as an error by the
# shell.
# set -eu -o pipefail
ROOT="$(cd "$(dirname "${{BASH_SOURCE[0]}}")" && pwd)"
git init
echo .gitignore >> .gitignore
echo {asset_dir}/ >> .gitignore
echo {script_name} >> .gitignore
mkdir -p {asset_dir}
"#
)];

for (before, after) in sliders.iter() {
let revspec = repo.rev_parse(*before)?;
let old_blob_id = revspec
.single()
.context(format!("rev-spec '{before}' must resolve to a single object"))?;
let (old_path, _) = revspec
.path_and_mode()
.context(format!("rev-spec '{before}' must contain a path"))?;

let revspec = repo.rev_parse(*after)?;
let new_blob_id = revspec
.single()
.context(format!("rev-spec '{after}' must resolve to a single object"))?;
let (new_path, _) = revspec
.path_and_mode()
.context(format!("rev-spec '{after}' must contain a path"))?;

eprintln!("{old_blob_id:?} {old_path:?} {new_blob_id:?} {new_path:?}");

let dst_old_blob = assets.join(format!("{old_blob_id}.commit"));
let dst_new_blob = assets.join(format!("{new_blob_id}.commit"));
if !dry_run {
let old_blob = repo.objects.find_blob(&old_blob_id, &mut buf)?.data;
std::fs::write(dst_old_blob, old_blob)?;

let new_blob = repo.objects.find_blob(&new_blob_id, &mut buf)?.data;
std::fs::write(dst_new_blob, new_blob)?;
}

blocks.push(format!(
r#"git diff --no-index "$ROOT/{asset_dir}/{old_blob_id}.commit" "$ROOT/{asset_dir}/{new_blob_id}.commit" > .git/{old_blob_id}-{new_blob_id}.baseline
cp "$ROOT/{asset_dir}/{old_blob_id}.commit" assets/
cp "$ROOT/{asset_dir}/{new_blob_id}.commit" assets/
"#
));
}

let script_file = destination_dir.join(script_name);
eprintln!(
"{prefix} write script file at '{script_file}'",
script_file = script_file.display()
);

if !dry_run {
let script = blocks.join("\n");
std::fs::write(script_file, script)?;
}

Ok(())
}
}
Loading
Loading