11use bytes:: Buf ;
2+ use merkledb:: { prelude:: MerkleDBHighLevelMethodsV1 , Chunk , MerkleMemDB } ;
23use merklehash:: { DataHash , MerkleHash } ;
34use std:: {
45 cmp:: min,
@@ -454,6 +455,12 @@ impl CasObject {
454455 chunk_boundaries : & Vec < u32 > ,
455456 compression_scheme : CompressionScheme ,
456457 ) -> Result < ( Self , usize ) , CasObjectError > {
458+
459+ // validate hash against contents
460+ if !Self :: validate_root_hash ( data, chunk_boundaries, hash) {
461+ return Err ( CasObjectError :: HashMismatch ) ;
462+ }
463+
457464 let mut cas = CasObject :: default ( ) ;
458465 cas. info . cashash . copy_from_slice ( hash. as_slice ( ) ) ;
459466 cas. info . num_chunks = chunk_boundaries. len ( ) as u32 + 1 ; // extra entry for dummy, see [chunk_size_info] for details.
@@ -473,7 +480,6 @@ impl CasObject {
473480 let chunk_size = chunk_boundary - raw_start_idx;
474481
475482 // now serialize chunk directly to writer (since chunks come first!)
476- // TODO: add compression scheme to this call
477483 let chunk_written_bytes =
478484 serialize_chunk ( & chunk_raw_bytes, writer, compression_scheme) ?;
479485 total_written_bytes += chunk_written_bytes;
@@ -506,15 +512,40 @@ impl CasObject {
506512
507513 Ok ( ( cas, total_written_bytes) )
508514 }
515+
516+ pub fn validate_root_hash ( data : & [ u8 ] , chunk_boundaries : & [ u32 ] , hash : & MerkleHash ) -> bool {
517+ // at least 1 chunk, and last entry in chunk boundary must match the length
518+ if chunk_boundaries. is_empty ( )
519+ || chunk_boundaries[ chunk_boundaries. len ( ) - 1 ] as usize != data. len ( )
520+ {
521+ return false ;
522+ }
523+
524+ let mut chunks: Vec < Chunk > = Vec :: new ( ) ;
525+ let mut left_edge: usize = 0 ;
526+ for i in chunk_boundaries {
527+ let right_edge = * i as usize ;
528+ let hash = merklehash:: compute_data_hash ( & data[ left_edge..right_edge] ) ;
529+ let length = right_edge - left_edge;
530+ chunks. push ( Chunk { hash, length } ) ;
531+ left_edge = right_edge;
532+ }
533+
534+ let mut db = MerkleMemDB :: default ( ) ;
535+ let mut staging = db. start_insertion_staging ( ) ;
536+ db. add_file ( & mut staging, & chunks) ;
537+ let ret = db. finalize ( staging) ;
538+ * ret. hash ( ) == * hash
539+ }
540+
509541}
510542
511543#[ cfg( test) ]
512544mod tests {
513545
514- use crate :: cas_chunk_format:: serialize_chunk;
515-
516546 use super :: * ;
517- use merklehash:: compute_data_hash;
547+ use crate :: cas_chunk_format:: serialize_chunk;
548+ use merkledb:: { prelude:: MerkleDBHighLevelMethodsV1 , Chunk , MerkleMemDB } ;
518549 use rand:: Rng ;
519550 use std:: io:: Cursor ;
520551
@@ -633,10 +664,10 @@ mod tests {
633664 chunk_size_info. push ( chunk_info) ;
634665
635666 c. info . num_chunks = chunk_size_info. len ( ) as u32 ;
636-
637- c. info . cashash = compute_data_hash ( & writer. get_ref ( ) ) ;
638667 c. info . chunk_size_info = chunk_size_info;
639668
669+ c. info . cashash = gen_hash ( & data_contents_raw, & c. get_chunk_boundaries ( ) ) ;
670+
640671 // now serialize info to end Xorb length
641672 let len = c. info . serialize ( & mut writer) . unwrap ( ) ;
642673 c. info_length = len as u32 ;
@@ -646,6 +677,92 @@ mod tests {
646677 ( c, writer. get_ref ( ) . to_vec ( ) , data_contents_raw)
647678 }
648679
680+ fn gen_hash ( data : & [ u8 ] , chunk_boundaries : & [ u32 ] ) -> DataHash {
681+ let mut chunks: Vec < Chunk > = Vec :: new ( ) ;
682+ let mut left_edge: usize = 0 ;
683+ for i in chunk_boundaries {
684+ let right_edge = * i as usize ;
685+ let hash = merklehash:: compute_data_hash ( & data[ left_edge..right_edge] ) ;
686+ let length = right_edge - left_edge;
687+ chunks. push ( Chunk { hash, length } ) ;
688+ left_edge = right_edge;
689+ }
690+
691+ let mut db = MerkleMemDB :: default ( ) ;
692+ let mut staging = db. start_insertion_staging ( ) ;
693+ db. add_file ( & mut staging, & chunks) ;
694+ let ret = db. finalize ( staging) ;
695+ * ret. hash ( )
696+ }
697+
698+ #[ test]
699+ fn test_compress_decompress ( ) {
700+ // Arrange
701+ let ( c, _cas_data, raw_data) = build_cas_object ( 55 , 53212 , false , true ) ;
702+ let hash = gen_hash ( & & raw_data, & c. get_chunk_boundaries ( ) ) ;
703+
704+ // Act & Assert
705+ let mut writer: Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
706+ assert ! ( CasObject :: serialize(
707+ & mut writer,
708+ & c. info. cashash,
709+ & raw_data,
710+ & c. get_chunk_boundaries( ) ,
711+ CompressionScheme :: LZ4
712+ )
713+ . is_ok( ) ) ;
714+
715+ let mut reader = writer. clone ( ) ;
716+ reader. set_position ( 0 ) ;
717+ let res = CasObject :: deserialize ( & mut reader) ;
718+ assert ! ( res. is_ok( ) ) ;
719+ let c = res. unwrap ( ) ;
720+
721+ let c_bytes = c. get_all_bytes ( & mut reader) . unwrap ( ) ;
722+ let c_boundaries = c. get_chunk_boundaries ( ) ;
723+ let c_hash = gen_hash ( & c_bytes, & c_boundaries) ;
724+
725+ let mut writer: Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
726+ assert ! ( CasObject :: serialize(
727+ & mut writer,
728+ & c_hash,
729+ & c_bytes,
730+ & c_boundaries,
731+ CompressionScheme :: None
732+ )
733+ . is_ok( ) ) ;
734+
735+ let mut reader = writer. clone ( ) ;
736+ reader. set_position ( 0 ) ;
737+ let res = CasObject :: deserialize ( & mut reader) ;
738+ assert ! ( res. is_ok( ) ) ;
739+ let c2 = res. unwrap ( ) ;
740+
741+ assert_eq ! ( hash, c_hash) ;
742+ assert_eq ! ( c. info. cashash, hash) ;
743+ assert_eq ! ( c2. info. cashash, c. info. cashash) ;
744+ }
745+
746+ #[ test]
747+ fn test_hash_generation_compression ( ) {
748+ // Arrange
749+ let ( c, cas_data, raw_data) = build_cas_object ( 55 , 53212 , false , true ) ;
750+ // Act & Assert
751+ let mut buf: Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
752+ assert ! ( CasObject :: serialize(
753+ & mut buf,
754+ & c. info. cashash,
755+ & raw_data,
756+ & c. get_chunk_boundaries( ) ,
757+ CompressionScheme :: LZ4
758+ )
759+ . is_ok( ) ) ;
760+
761+ assert_eq ! ( c. info. cashash, gen_hash( & raw_data, & c. get_chunk_boundaries( ) ) ) ;
762+ assert_eq ! ( raw_data, c. get_all_bytes( & mut buf) . unwrap( ) ) ;
763+ assert_eq ! ( & cas_data, buf. get_ref( ) ) ;
764+ }
765+
649766 #[ test]
650767 fn test_basic_serialization_mem ( ) {
651768 // Arrange
0 commit comments