Skip to content

Commit f15c9f9

Browse files
bk2204gitster
authored andcommitted
rust: add functionality to hash an object
In a future commit, we'll want to hash some data when dealing with an object map. Let's make this easy by creating a structure to hash objects and calling into the C functions as necessary to perform the hashing. For now, we only implement safe hashing, but in the future we could add unsafe hashing if we want. Implement Clone and Drop to appropriately manage our memory. Additionally implement Write to make it easy to use with other formats that implement this trait. While we're at it, add some tests for the various hashing cases. Signed-off-by: brian m. carlson <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent ddeec7a commit f15c9f9

File tree

1 file changed

+142
-1
lines changed

1 file changed

+142
-1
lines changed

src/hash.rs

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
use std::error::Error;
1414
use std::fmt::{self, Debug, Display};
15+
use std::io::{self, Write};
1516
use std::os::raw::c_void;
1617

1718
pub const GIT_MAX_RAWSZ: usize = 32;
@@ -111,6 +112,100 @@ impl Debug for ObjectID {
111112
}
112113
}
113114

115+
/// A trait to implement hashing with a cryptographic algorithm.
116+
pub trait CryptoDigest {
117+
/// Return true if this digest is safe for use with untrusted data, false otherwise.
118+
fn is_safe(&self) -> bool;
119+
120+
/// Update the digest with the specified data.
121+
fn update(&mut self, data: &[u8]);
122+
123+
/// Return an object ID, consuming the hasher.
124+
fn into_oid(self) -> ObjectID;
125+
126+
/// Return a hash as a `Vec`, consuming the hasher.
127+
fn into_vec(self) -> Vec<u8>;
128+
}
129+
130+
/// A structure to hash data with a cryptographic hash algorithm.
131+
///
132+
/// Instances of this class are safe for use with untrusted data, provided Git has been compiled
133+
/// with a collision-detecting implementation of SHA-1.
134+
pub struct CryptoHasher {
135+
algo: HashAlgorithm,
136+
ctx: *mut c_void,
137+
}
138+
139+
impl CryptoHasher {
140+
/// Create a new hasher with the algorithm specified with `algo`.
141+
///
142+
/// This hasher is safe to use on untrusted data. If SHA-1 is selected and Git was compiled
143+
/// with a collision-detecting implementation of SHA-1, then this function will use that
144+
/// implementation and detect any attempts at a collision.
145+
pub fn new(algo: HashAlgorithm) -> Self {
146+
let ctx = unsafe { c::git_hash_alloc() };
147+
unsafe { c::git_hash_init(ctx, algo.hash_algo_ptr()) };
148+
Self { algo, ctx }
149+
}
150+
}
151+
152+
impl CryptoDigest for CryptoHasher {
153+
/// Return true if this digest is safe for use with untrusted data, false otherwise.
154+
fn is_safe(&self) -> bool {
155+
true
156+
}
157+
158+
/// Update the hasher with the specified data.
159+
fn update(&mut self, data: &[u8]) {
160+
unsafe { c::git_hash_update(self.ctx, data.as_ptr() as *const c_void, data.len()) };
161+
}
162+
163+
/// Return an object ID, consuming the hasher.
164+
fn into_oid(self) -> ObjectID {
165+
let mut oid = ObjectID {
166+
hash: [0u8; 32],
167+
algo: self.algo as u32,
168+
};
169+
unsafe { c::git_hash_final_oid(&mut oid as *mut ObjectID as *mut c_void, self.ctx) };
170+
oid
171+
}
172+
173+
/// Return a hash as a `Vec`, consuming the hasher.
174+
fn into_vec(self) -> Vec<u8> {
175+
let mut v = vec![0u8; self.algo.raw_len()];
176+
unsafe { c::git_hash_final(v.as_mut_ptr(), self.ctx) };
177+
v
178+
}
179+
}
180+
181+
impl Clone for CryptoHasher {
182+
fn clone(&self) -> Self {
183+
let ctx = unsafe { c::git_hash_alloc() };
184+
unsafe { c::git_hash_clone(ctx, self.ctx) };
185+
Self {
186+
algo: self.algo,
187+
ctx,
188+
}
189+
}
190+
}
191+
192+
impl Drop for CryptoHasher {
193+
fn drop(&mut self) {
194+
unsafe { c::git_hash_free(self.ctx) };
195+
}
196+
}
197+
198+
impl Write for CryptoHasher {
199+
fn write(&mut self, data: &[u8]) -> io::Result<usize> {
200+
self.update(data);
201+
Ok(data.len())
202+
}
203+
204+
fn flush(&mut self) -> io::Result<()> {
205+
Ok(())
206+
}
207+
}
208+
114209
/// A hash algorithm,
115210
#[repr(C)]
116211
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
@@ -239,19 +334,33 @@ impl HashAlgorithm {
239334
pub fn hash_algo_ptr(self) -> *const c_void {
240335
unsafe { c::hash_algo_ptr_by_number(self as u32) }
241336
}
337+
338+
/// Create a hasher for this algorithm.
339+
pub fn hasher(self) -> CryptoHasher {
340+
CryptoHasher::new(self)
341+
}
242342
}
243343

244344
pub mod c {
245345
use std::os::raw::c_void;
246346

247347
extern "C" {
248348
pub fn hash_algo_ptr_by_number(n: u32) -> *const c_void;
349+
pub fn unsafe_hash_algo(algop: *const c_void) -> *const c_void;
350+
pub fn git_hash_alloc() -> *mut c_void;
351+
pub fn git_hash_free(ctx: *mut c_void);
352+
pub fn git_hash_init(dst: *mut c_void, algop: *const c_void);
353+
pub fn git_hash_clone(dst: *mut c_void, src: *const c_void);
354+
pub fn git_hash_update(ctx: *mut c_void, inp: *const c_void, len: usize);
355+
pub fn git_hash_final(hash: *mut u8, ctx: *mut c_void);
356+
pub fn git_hash_final_oid(hash: *mut c_void, ctx: *mut c_void);
249357
}
250358
}
251359

252360
#[cfg(test)]
253361
mod tests {
254-
use super::HashAlgorithm;
362+
use super::{CryptoDigest, HashAlgorithm, ObjectID};
363+
use std::io::Write;
255364

256365
fn all_algos() -> &'static [HashAlgorithm] {
257366
&[HashAlgorithm::SHA1, HashAlgorithm::SHA256]
@@ -322,4 +431,36 @@ mod tests {
322431
assert_eq!(format!("{:?}", oid), *debug);
323432
}
324433
}
434+
435+
#[test]
436+
fn hasher_works_correctly() {
437+
for algo in all_algos() {
438+
let tests: &[(&[u8], &ObjectID)] = &[
439+
(b"blob 0\0", algo.empty_blob()),
440+
(b"tree 0\0", algo.empty_tree()),
441+
];
442+
for (data, oid) in tests {
443+
let mut h = algo.hasher();
444+
assert!(h.is_safe());
445+
// Test that this works incrementally.
446+
h.update(&data[0..2]);
447+
h.update(&data[2..]);
448+
449+
let h2 = h.clone();
450+
451+
let actual_oid = h.into_oid();
452+
assert_eq!(**oid, actual_oid);
453+
454+
let v = h2.into_vec();
455+
assert_eq!((*oid).as_slice().unwrap(), &v);
456+
457+
let mut h = algo.hasher();
458+
h.write_all(&data[0..2]).unwrap();
459+
h.write_all(&data[2..]).unwrap();
460+
461+
let actual_oid = h.into_oid();
462+
assert_eq!(**oid, actual_oid);
463+
}
464+
}
465+
}
325466
}

0 commit comments

Comments
 (0)