Skip to content

Commit 61c2e34

Browse files
committed
feat!: Check the hash when reading via File::at() just like git, or skip the check.
Note that indices written with `index.skipHash=true` will be vastly faster to read by a factor of 2 or more.
1 parent 3ff5ac0 commit 61c2e34

File tree

7 files changed

+76
-15
lines changed

7 files changed

+76
-15
lines changed

gix-index/src/access/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ mod tests {
344344
let file = PathBuf::from("tests")
345345
.join("fixtures")
346346
.join(Path::new("loose_index").join("conflicting-file.git-index"));
347-
let file = crate::File::at(file, gix_hash::Kind::Sha1, Default::default()).expect("valid file");
347+
let file = crate::File::at(file, gix_hash::Kind::Sha1, false, Default::default()).expect("valid file");
348348
assert_eq!(
349349
file.entries().len(),
350350
3,

gix-index/src/extension/link.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ impl Link {
7272
self,
7373
split_index: &mut crate::File,
7474
object_hash: gix_hash::Kind,
75+
skip_hash: bool,
7576
options: crate::decode::Options,
7677
) -> Result<(), crate::file::init::Error> {
7778
let shared_index_path = split_index
@@ -82,6 +83,7 @@ impl Link {
8283
let mut shared_index = crate::File::at(
8384
&shared_index_path,
8485
object_hash,
86+
skip_hash,
8587
crate::decode::Options {
8688
expected_checksum: self.shared_index_checksum.into(),
8789
..options

gix-index/src/file/init.rs

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,18 @@ pub use error::Error;
2626
/// Initialization
2727
impl File {
2828
/// Try to open the index file at `path` with `options`, assuming `object_hash` is used throughout the file, or create a new
29-
/// index that merely exists in memory and is empty.
29+
/// index that merely exists in memory and is empty. `skip_hash` will increase the performance by a factor of 2, at the cost of
30+
/// possibly not detecting corruption.
3031
///
3132
/// Note that the `path` will not be written if it doesn't exist.
3233
pub fn at_or_default(
3334
path: impl Into<PathBuf>,
3435
object_hash: gix_hash::Kind,
36+
skip_hash: bool,
3537
options: decode::Options,
3638
) -> Result<Self, Error> {
3739
let path = path.into();
38-
Ok(match Self::at(&path, object_hash, options) {
40+
Ok(match Self::at(&path, object_hash, skip_hash, options) {
3941
Ok(f) => f,
4042
Err(Error::Io(err)) if err.kind() == std::io::ErrorKind::NotFound => {
4143
File::from_state(State::new(object_hash), path)
@@ -44,25 +46,60 @@ impl File {
4446
})
4547
}
4648

47-
/// Open an index file at `path` with `options`, assuming `object_hash` is used throughout the file.
49+
/// Open an index file at `path` with `options`, assuming `object_hash` is used throughout the file. If `skip_hash` is `true`,
50+
/// we will not get or compare the checksum of the index at all, which generally increases performance of this method by a factor
51+
/// of 2 or more.
4852
///
4953
/// Note that the verification of the file hash depends on `options`, and even then it's performed after the file was read and not
5054
/// before it is read. That way, invalid files would see a more descriptive error message as we try to parse them.
51-
pub fn at(path: impl Into<PathBuf>, object_hash: gix_hash::Kind, options: decode::Options) -> Result<Self, Error> {
55+
pub fn at(
56+
path: impl Into<PathBuf>,
57+
object_hash: gix_hash::Kind,
58+
skip_hash: bool,
59+
options: decode::Options,
60+
) -> Result<Self, Error> {
5261
let _span = gix_features::trace::detail!("gix_index::File::at()");
5362
let path = path.into();
5463
let (data, mtime) = {
55-
// SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
5664
let file = std::fs::File::open(&path)?;
65+
// SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file.
5766
#[allow(unsafe_code)]
5867
let data = unsafe { Mmap::map(&file)? };
68+
69+
if !skip_hash {
70+
// Note that even though it's trivial to offload this into a thread, which is worth it for all but the smallest
71+
// index files, we choose more safety here just like git does and don't even try to decode the index if the hashes
72+
// don't match.
73+
// Thanks to `skip_hash`, we can get performance and it's under caller control, at the cost of some safety.
74+
let expected = gix_hash::ObjectId::from(&data[data.len() - object_hash.len_in_bytes()..]);
75+
if !expected.is_null() {
76+
let _span = gix_features::trace::detail!("gix::open_index::hash_index", path = ?path);
77+
let meta = file.metadata()?;
78+
let num_bytes_to_hash = meta.len() - object_hash.len_in_bytes() as u64;
79+
let actual_hash = gix_features::hash::bytes(
80+
&file,
81+
num_bytes_to_hash as usize,
82+
object_hash,
83+
&mut gix_features::progress::Discard,
84+
&Default::default(),
85+
)?;
86+
87+
if actual_hash != expected {
88+
return Err(Error::Decode(decode::Error::ChecksumMismatch {
89+
actual_checksum: actual_hash,
90+
expected_checksum: expected,
91+
}));
92+
}
93+
}
94+
}
95+
5996
(data, filetime::FileTime::from_last_modification_time(&file.metadata()?))
6097
};
6198

6299
let (state, checksum) = State::from_bytes(&data, mtime, object_hash, options)?;
63100
let mut file = File { state, path, checksum };
64101
if let Some(mut link) = file.link.take() {
65-
link.dissolve_into(&mut file, object_hash, options)?;
102+
link.dissolve_into(&mut file, object_hash, skip_hash, options)?;
66103
}
67104

68105
Ok(file)

gix-index/tests/index/file/init.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ mod at_or_new {
66
gix_index::File::at_or_default(
77
Generated("v4_more_files_IEOT").to_path(),
88
gix_hash::Kind::Sha1,
9+
false,
910
Default::default(),
1011
)
1112
.expect("file exists and can be opened");
@@ -16,6 +17,7 @@ mod at_or_new {
1617
let index = gix_index::File::at_or_default(
1718
"__definitely no file that exists ever__",
1819
gix_hash::Kind::Sha1,
20+
false,
1921
Default::default(),
2022
)
2123
.expect("file is defaulting to a new one");
@@ -46,7 +48,7 @@ mod from_state {
4648
let new_index_path = tmp.path().join(fixture.to_name());
4749
assert!(!new_index_path.exists());
4850

49-
let index = gix_index::File::at(fixture.to_path(), gix_hash::Kind::Sha1, Default::default())?;
51+
let index = gix_index::File::at(fixture.to_path(), gix_hash::Kind::Sha1, false, Default::default())?;
5052
let mut index = gix_index::File::from_state(index.into(), new_index_path.clone());
5153
assert!(index.checksum().is_none());
5254
assert_eq!(index.path(), new_index_path);

gix-index/tests/index/file/read.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@ fn verify(index: gix_index::File) -> gix_index::File {
1919

2020
pub(crate) fn loose_file(name: &str) -> gix_index::File {
2121
let path = loose_file_path(name);
22-
let file = gix_index::File::at(path, gix_hash::Kind::Sha1, Default::default()).unwrap();
22+
let file = gix_index::File::at(path, gix_hash::Kind::Sha1, false, Default::default()).unwrap();
2323
verify(file)
2424
}
2525
pub(crate) fn try_file(name: &str) -> Result<gix_index::File, gix_index::file::init::Error> {
2626
let file = gix_index::File::at(
2727
crate::fixture_index_path(name),
2828
gix_hash::Kind::Sha1,
29+
false,
2930
Default::default(),
3031
)?;
3132
Ok(verify(file))
@@ -34,7 +35,7 @@ pub(crate) fn file(name: &str) -> gix_index::File {
3435
try_file(name).unwrap()
3536
}
3637
fn file_opt(name: &str, opts: gix_index::decode::Options) -> gix_index::File {
37-
let file = gix_index::File::at(crate::fixture_index_path(name), gix_hash::Kind::Sha1, opts).unwrap();
38+
let file = gix_index::File::at(crate::fixture_index_path(name), gix_hash::Kind::Sha1, false, opts).unwrap();
3839
verify(file)
3940
}
4041

@@ -140,6 +141,7 @@ fn split_index_without_any_extension() {
140141
let file = gix_index::File::at(
141142
find_shared_index_for(crate::fixture_index_path("v2_split_index")),
142143
gix_hash::Kind::Sha1,
144+
false,
143145
Default::default(),
144146
)
145147
.unwrap();
@@ -337,8 +339,15 @@ fn split_index_and_regular_index_of_same_content_are_indeed_the_same() {
337339
)
338340
.unwrap();
339341

340-
let split =
341-
verify(gix_index::File::at(base.join("split/.git/index"), gix_hash::Kind::Sha1, Default::default()).unwrap());
342+
let split = verify(
343+
gix_index::File::at(
344+
base.join("split/.git/index"),
345+
gix_hash::Kind::Sha1,
346+
false,
347+
Default::default(),
348+
)
349+
.unwrap(),
350+
);
342351

343352
assert!(
344353
split.link().is_none(),
@@ -349,6 +358,7 @@ fn split_index_and_regular_index_of_same_content_are_indeed_the_same() {
349358
gix_index::File::at(
350359
base.join("regular/.git/index"),
351360
gix_hash::Kind::Sha1,
361+
false,
352362
Default::default(),
353363
)
354364
.unwrap(),

gix-index/tests/index/file/write.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,12 @@ fn skip_hash() -> crate::Result {
5050
skip_hash: false,
5151
})?;
5252

53-
let actual = gix_index::File::at(&path, expected.checksum().expect("present").kind(), Default::default())?;
53+
let actual = gix_index::File::at(
54+
&path,
55+
expected.checksum().expect("present").kind(),
56+
false,
57+
Default::default(),
58+
)?;
5459
assert_eq!(
5560
actual.checksum(),
5661
expected.checksum(),
@@ -62,7 +67,12 @@ fn skip_hash() -> crate::Result {
6267
skip_hash: true,
6368
})?;
6469

65-
let actual = gix_index::File::at(&path, expected.checksum().expect("present").kind(), Default::default())?;
70+
let actual = gix_index::File::at(
71+
&path,
72+
expected.checksum().expect("present").kind(),
73+
false,
74+
Default::default(),
75+
)?;
6676
assert_eq!(actual.checksum(), None, "no hash is produced in this case");
6777

6878
Ok(())

gix-index/tests/index/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ impl Fixture {
5050
}
5151

5252
pub fn open(&self) -> gix_index::File {
53-
gix_index::File::at(self.to_path(), gix_hash::Kind::Sha1, Default::default())
53+
gix_index::File::at(self.to_path(), gix_hash::Kind::Sha1, false, Default::default())
5454
.expect("fixtures are always readable")
5555
}
5656
}

0 commit comments

Comments
 (0)