Skip to content

Commit 439d041

Browse files
committed
add all relevant tests for the merge processing pipeline
1 parent ad9587a commit 439d041

File tree

5 files changed

+551
-91
lines changed

5 files changed

+551
-91
lines changed

gix-merge/src/blob/pipeline.rs

Lines changed: 81 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::{BuiltinDriver, Pipeline, ResourceKind};
2-
use bstr::{BStr, ByteSlice};
2+
use bstr::{BStr, BString, ByteSlice};
33
use gix_filter::attributes;
44
use gix_filter::driver::apply::{Delay, MaybeDelayed};
55
use gix_filter::pipeline::convert::{ToGitOutcome, ToWorktreeOutcome};
@@ -8,7 +8,7 @@ use std::io::Read;
88
use std::path::{Path, PathBuf};
99

1010
/// Options for use in a [`Pipeline`].
11-
#[derive(Default, Clone, Copy, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
11+
#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
1212
pub struct Options {
1313
/// The amount of bytes that an object has to reach before being treated as binary.
1414
/// These objects will not be queried, nor will their data be processed in any way.
@@ -22,10 +22,10 @@ pub struct Options {
2222
pub large_file_threshold_bytes: u64,
2323
/// Capabilities of the file system which affect how we read worktree files.
2424
pub fs: gix_fs::Capabilities,
25-
/// Define which driver to use if the `merge` attribute for a resource is unspecified.
25+
/// Define which driver to use by name if the `merge` attribute for a resource is unspecified.
2626
///
2727
/// This is the value of the `merge.default` git configuration.
28-
pub default_driver: Option<BuiltinDriver>,
28+
pub default_driver: Option<BString>,
2929
}
3030

3131
/// The specific way to convert a resource.
@@ -117,11 +117,10 @@ impl Pipeline {
117117
pub enum Data {
118118
/// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`].
119119
Buffer,
120-
/// The size that the binary blob had at the given revision, without having applied filters, as it's either
121-
/// considered binary or above the big-file threshold.
120+
/// The file or blob is above the big-file threshold and cannot be processed.
122121
///
123-
/// In this state, the binary file cannot be merged.
124-
Binary {
122+
/// In this state, the file cannot be merged.
123+
TooLarge {
125124
/// The size of the object prior to performing any filtering or as it was found on disk.
126125
///
127126
/// Note that technically, the size isn't always representative of the same 'state' of the
@@ -242,25 +241,28 @@ impl Pipeline {
242241
let driver = match attr.assignment.state {
243242
attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text),
244243
attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary),
245-
attributes::StateRef::Value(name) => {
246-
let name = name.as_bstr();
247-
self.drivers
248-
.binary_search_by(|d| d.name.as_bstr().cmp(name))
249-
.ok()
250-
.map(DriverChoice::Index)
251-
.or_else(|| {
252-
name.to_str()
253-
.ok()
254-
.and_then(BuiltinDriver::by_name)
255-
.map(DriverChoice::BuiltIn)
256-
})
257-
.unwrap_or_default()
244+
attributes::StateRef::Value(_) | attributes::StateRef::Unspecified => {
245+
let name = match attr.assignment.state {
246+
attributes::StateRef::Value(name) => Some(name.as_bstr()),
247+
attributes::StateRef::Unspecified => {
248+
self.options.default_driver.as_ref().map(|name| name.as_bstr())
249+
}
250+
_ => unreachable!("only value and unspecified are possible here"),
251+
};
252+
name.and_then(|name| {
253+
self.drivers
254+
.binary_search_by(|d| d.name.as_bstr().cmp(name))
255+
.ok()
256+
.map(DriverChoice::Index)
257+
.or_else(|| {
258+
name.to_str()
259+
.ok()
260+
.and_then(BuiltinDriver::by_name)
261+
.map(DriverChoice::BuiltIn)
262+
})
263+
})
264+
.unwrap_or_default()
258265
}
259-
attributes::StateRef::Unspecified => self
260-
.options
261-
.default_driver
262-
.map(DriverChoice::BuiltIn)
263-
.unwrap_or_default(),
264266
};
265267
match self.roots.by_kind(kind) {
266268
Some(root) => {
@@ -279,7 +281,7 @@ impl Pipeline {
279281
.transpose()?;
280282
let data = match size_in_bytes {
281283
Some(None) => None, // missing as identified by the size check
282-
Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::Binary { size }),
284+
Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::TooLarge { size }),
283285
_ => {
284286
let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| {
285287
convert_to_mergeable::Error::OpenOrRead {
@@ -295,7 +297,13 @@ impl Pipeline {
295297
file,
296298
gix_path::from_bstr(rela_path).as_ref(),
297299
attributes,
298-
&mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())),
300+
&mut |buf| {
301+
if convert == Mode::Renormalize {
302+
Ok(None)
303+
} else {
304+
objects.try_find(id, buf).map(|obj| obj.map(|_| ()))
305+
}
306+
},
299307
)?;
300308

301309
match res {
@@ -324,13 +332,7 @@ impl Pipeline {
324332
}
325333
}
326334

327-
Some(if is_binary_buf(out) {
328-
let size = out.len() as u64;
329-
out.clear();
330-
Data::Binary { size }
331-
} else {
332-
Data::Buffer
333-
})
335+
Some(Data::Buffer)
334336
} else {
335337
None
336338
}
@@ -349,70 +351,66 @@ impl Pipeline {
349351
let is_binary = self.options.large_file_threshold_bytes > 0
350352
&& header.size > self.options.large_file_threshold_bytes;
351353
let data = if is_binary {
352-
Data::Binary { size: header.size }
354+
Data::TooLarge { size: header.size }
353355
} else {
354356
objects
355357
.try_find(id, out)
356358
.map_err(gix_object::find::existing_object::Error::Find)?
357359
.ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
358360

359361
if convert == Mode::Renormalize {
360-
let res = self
361-
.filter
362-
.convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?;
362+
{
363+
let res = self
364+
.filter
365+
.convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?;
366+
367+
match res {
368+
ToWorktreeOutcome::Unchanged(_) => {}
369+
ToWorktreeOutcome::Buffer(src) => {
370+
out.clear();
371+
out.try_reserve(src.len())?;
372+
out.extend_from_slice(src);
373+
}
374+
ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => {
375+
std::io::copy(&mut stream, out).map_err(|err| {
376+
convert_to_mergeable::Error::StreamCopy {
377+
rela_path: rela_path.to_owned(),
378+
source: err,
379+
}
380+
})?;
381+
}
382+
ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => {
383+
unreachable!("we prohibit this")
384+
}
385+
};
386+
}
387+
388+
let res = self.filter.convert_to_git(
389+
&**out,
390+
&gix_path::from_bstr(rela_path),
391+
attributes,
392+
&mut |_buf| Ok(None),
393+
)?;
363394

364395
match res {
365-
ToWorktreeOutcome::Unchanged(_) => {}
366-
ToWorktreeOutcome::Buffer(src) => {
367-
out.clear();
368-
out.try_reserve(src.len())?;
369-
out.extend_from_slice(src);
370-
}
371-
ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => {
372-
std::io::copy(&mut stream, out).map_err(|err| {
373-
convert_to_mergeable::Error::StreamCopy {
396+
ToGitOutcome::Unchanged(_) => {}
397+
ToGitOutcome::Process(mut stream) => {
398+
stream
399+
.read_to_end(out)
400+
.map_err(|err| convert_to_mergeable::Error::OpenOrRead {
374401
rela_path: rela_path.to_owned(),
375402
source: err,
376-
}
377-
})?;
403+
})?;
378404
}
379-
ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => {
380-
unreachable!("we prohibit this")
405+
ToGitOutcome::Buffer(buf) => {
406+
out.clear();
407+
out.try_reserve(buf.len())?;
408+
out.extend_from_slice(buf);
381409
}
382-
};
383-
}
384-
385-
let res = self.filter.convert_to_git(
386-
&**out,
387-
&gix_path::from_bstr(rela_path),
388-
attributes,
389-
&mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())),
390-
)?;
391-
392-
match res {
393-
ToGitOutcome::Unchanged(_) => {}
394-
ToGitOutcome::Process(mut stream) => {
395-
stream
396-
.read_to_end(out)
397-
.map_err(|err| convert_to_mergeable::Error::OpenOrRead {
398-
rela_path: rela_path.to_owned(),
399-
source: err,
400-
})?;
401-
}
402-
ToGitOutcome::Buffer(buf) => {
403-
out.clear();
404-
out.try_reserve(buf.len())?;
405-
out.extend_from_slice(buf);
406410
}
407411
}
408412

409-
if is_binary_buf(out) {
410-
let size = out.len() as u64;
411-
out.clear();
412-
Data::Binary { size }
413-
} else {
414-
Data::Buffer
415-
}
413+
Data::Buffer
416414
};
417415
Some(data)
418416
};
@@ -429,8 +427,3 @@ fn none_if_missing<T>(res: std::io::Result<T>) -> std::io::Result<Option<T>> {
429427
Err(err) => Err(err),
430428
}
431429
}
432-
433-
fn is_binary_buf(buf: &[u8]) -> bool {
434-
let buf = &buf[..buf.len().min(8000)];
435-
buf.contains(&0)
436-
}

gix-merge/src/blob/platform.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pub mod resource {
4646
ResourceRef {
4747
data: cache.conversion.data.map_or(Data::Missing, |data| match data {
4848
pipeline::Data::Buffer => Data::Buffer(&cache.buffer),
49-
pipeline::Data::Binary { size } => Data::Binary { size },
49+
pipeline::Data::TooLarge { size } => Data::Binary { size },
5050
}),
5151
driver_choice: cache.conversion.driver,
5252
rela_path: cache.rela_path.as_ref(),

gix-merge/tests/merge/blob/builtin_driver.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ mod text {
123123
"Number of expected diverging cases must match the actual one - probably the implementation improved"
124124
);
125125
assert_eq!(
126-
(num_diverging as f32 / num_cases as f32) * 100.0,
127-
12.053572,
126+
((num_diverging as f32 / num_cases as f32) * 100.0) as usize,
127+
12,
128128
"Just to show the percentage of skipped tests - this should get better"
129129
);
130130
Ok(())

gix-merge/tests/merge/blob/mod.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,51 @@
11
mod builtin_driver;
2+
mod pipeline;
3+
4+
mod util {
5+
use std::collections::HashMap;
6+
7+
use gix_hash::oid;
8+
use gix_object::{bstr::BString, find::Error};
9+
10+
#[derive(Default)]
11+
pub struct ObjectDb {
12+
data_by_id: HashMap<gix_hash::ObjectId, BString>,
13+
}
14+
15+
impl gix_object::FindHeader for ObjectDb {
16+
fn try_header(&self, id: &oid) -> Result<Option<gix_object::Header>, Error> {
17+
match self.data_by_id.get(&id.to_owned()) {
18+
Some(data) => Ok(Some(gix_object::Header {
19+
kind: gix_object::Kind::Blob,
20+
size: data.len() as u64,
21+
})),
22+
None => Ok(None),
23+
}
24+
}
25+
}
26+
27+
impl gix_object::Find for ObjectDb {
28+
fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec<u8>) -> Result<Option<gix_object::Data<'a>>, Error> {
29+
match self.data_by_id.get(&id.to_owned()) {
30+
Some(data) => {
31+
buffer.clear();
32+
buffer.extend_from_slice(data);
33+
Ok(Some(gix_object::Data {
34+
kind: gix_object::Kind::Blob,
35+
data: buffer.as_slice(),
36+
}))
37+
}
38+
None => Ok(None),
39+
}
40+
}
41+
}
42+
43+
impl ObjectDb {
44+
/// Insert `data` and return its hash. That can be used to find it again.
45+
pub fn insert(&mut self, data: &str) -> gix_hash::ObjectId {
46+
let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Blob, data.as_bytes());
47+
self.data_by_id.insert(id, data.into());
48+
id
49+
}
50+
}
51+
}

0 commit comments

Comments
 (0)