11use bytes:: Buf ;
22use merkledb:: { prelude:: MerkleDBHighLevelMethodsV1 , Chunk , MerkleMemDB } ;
33use merklehash:: { DataHash , MerkleHash } ;
4+ use tracing:: warn;
45use std:: {
56 cmp:: min,
67 io:: { Cursor , Error , Read , Seek , Write } ,
@@ -403,7 +404,7 @@ impl CasObject {
403404 let mut res = Vec :: < u8 > :: new ( ) ;
404405
405406 while reader. has_remaining ( ) {
406- let data = deserialize_chunk ( & mut reader) ?;
407+ let ( data, _ ) = deserialize_chunk ( & mut reader) ?;
407408 res. extend_from_slice ( & data) ;
408409 }
409410 Ok ( res)
@@ -538,6 +539,82 @@ impl CasObject {
538539 * ret. hash ( ) == * hash
539540 }
540541
542+ /// Validate CasObject.
543+ /// Verifies each chunk is valid and correctly represented in CasObjectInfo, along with
544+ /// recomputing the hash and validating it matches CasObjectInfo.
545+ ///
546+ /// Returns Ok(true) if recomputed hash matches what is passed in.
547+ pub fn validate_cas_object < R : Read + Seek > ( reader : & mut R , hash : & MerkleHash ) -> Result < bool , CasObjectError > {
548+
549+ // 1. deserialize to get Info
550+ let cas = CasObject :: deserialize ( reader) ?;
551+
552+ // 2. walk chunks from Info (skip the final dummy chunk)
553+ let mut hash_chunks: Vec < Chunk > = Vec :: new ( ) ;
554+ let mut cumulative_uncompressed_length: u32 = 0 ;
555+ let mut cumulative_compressed_length: u32 = 0 ;
556+
557+ if let Some ( c) = cas. info . chunk_size_info . first ( ) {
558+ if c. start_byte_index != 0 {
559+ // for 1st chunk verify that its start_byte_index is 0
560+ warn ! ( "XORB Validation: Byte 0 does not contain 1st chunk." ) ;
561+ return Ok ( false ) ;
562+ }
563+ } else {
564+ return Err ( CasObjectError :: FormatError ( anyhow ! ( "Invalid Xorb, no chunks" ) ) ) ;
565+ }
566+
567+ for ( idx, c) in cas. info . chunk_size_info [ ..cas. info . chunk_size_info . len ( ) - 1 ] . iter ( ) . enumerate ( ) {
568+
569+ // 3. verify on each chunk:
570+ reader. seek ( std:: io:: SeekFrom :: Start ( c. start_byte_index as u64 ) ) ?;
571+ let ( data, compressed_chunk_length) = deserialize_chunk ( reader) ?;
572+ let chunk_uncompressed_length = data. len ( ) ;
573+
574+ // 3a. compute hash
575+ hash_chunks. push ( Chunk { hash : merklehash:: compute_data_hash ( & data) , length : chunk_uncompressed_length} ) ;
576+
577+ cumulative_uncompressed_length += data. len ( ) as u32 ;
578+ cumulative_compressed_length += compressed_chunk_length as u32 ;
579+
580+ // 3b. verify deserialized chunk is expected size from Info object
581+ if cumulative_uncompressed_length != c. cumulative_uncompressed_len {
582+ warn ! ( "XORB Validation: Chunk length does not match Info object." ) ;
583+ return Ok ( false ) ;
584+ }
585+
586+ // 3c. verify start byte index of next chunk matches current byte index + compressed length
587+ if cas. info . chunk_size_info [ idx+1 ] . start_byte_index != ( c. start_byte_index + compressed_chunk_length as u32 ) {
588+ warn ! ( "XORB Validation: Chunk start byte index does not match Info object." ) ;
589+ return Ok ( false ) ;
590+ }
591+ }
592+
593+ // validate that Info/footer begins immediately after final content xorb.
594+ // end of for loop completes the content chunks, now should be able to deserialize an Info directly
595+ let cur_position = reader. stream_position ( ) ? as u32 ;
596+ let expected_position = cumulative_compressed_length;
597+ let expected_from_end_position = reader. seek ( std:: io:: SeekFrom :: End ( 0 ) ) ? as u32 - cas. info_length - size_of :: < u32 > ( ) as u32 ;
598+ if cur_position != expected_position || cur_position != expected_from_end_position {
599+ warn ! ( "XORB Validation: Content bytes after known chunks in Info object." ) ;
600+ return Ok ( false ) ;
601+ }
602+
603+ // 4. combine hashes to get full xorb hash, compare to provided
604+ let mut db = MerkleMemDB :: default ( ) ;
605+ let mut staging = db. start_insertion_staging ( ) ;
606+ db. add_file ( & mut staging, & hash_chunks) ;
607+ let ret = db. finalize ( staging) ;
608+
609+ if * ret. hash ( ) != * hash || * ret. hash ( ) != cas. info . cashash {
610+ warn ! ( "XORB Validation: Computed hash does not match provided hash or Info hash." ) ;
611+ return Ok ( false ) ;
612+ }
613+
614+ Ok ( true )
615+
616+ }
617+
541618}
542619
543620#[ cfg( test) ]
@@ -767,43 +844,38 @@ mod tests {
767844 fn test_basic_serialization_mem ( ) {
768845 // Arrange
769846 let ( c, _cas_data, raw_data) = build_cas_object ( 3 , 100 , false , false ) ;
770- let mut writer : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
847+ let mut buf : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
771848 // Act & Assert
772849 assert ! ( CasObject :: serialize(
773- & mut writer ,
850+ & mut buf ,
774851 & c. info. cashash,
775852 & raw_data,
776853 & c. get_chunk_boundaries( ) ,
777854 CompressionScheme :: None
778855 )
779856 . is_ok( ) ) ;
780857
781- let mut reader = writer. clone ( ) ;
782- reader. set_position ( 0 ) ;
783- let res = CasObject :: deserialize ( & mut reader) ;
784- assert ! ( res. is_ok( ) ) ;
785- let c2 = res. unwrap ( ) ;
786- assert_eq ! ( c, c2) ;
787- assert_eq ! ( c. info. cashash, c2. info. cashash) ;
788- assert_eq ! ( c. info. num_chunks, c2. info. num_chunks) ;
858+ assert ! ( CasObject :: validate_cas_object( & mut buf, & c. info. cashash) . unwrap( ) ) ;
789859 }
790860
791861 #[ test]
792862 fn test_serialization_deserialization_mem_medium ( ) {
793863 // Arrange
794864 let ( c, _cas_data, raw_data) = build_cas_object ( 32 , 16384 , false , false ) ;
795- let mut writer : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
865+ let mut buf : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
796866 // Act & Assert
797867 assert ! ( CasObject :: serialize(
798- & mut writer ,
868+ & mut buf ,
799869 & c. info. cashash,
800870 & raw_data,
801871 & c. get_chunk_boundaries( ) ,
802872 CompressionScheme :: None
803873 )
804874 . is_ok( ) ) ;
805875
806- let mut reader = writer. clone ( ) ;
876+ assert ! ( CasObject :: validate_cas_object( & mut buf, & c. info. cashash) . unwrap( ) ) ;
877+
878+ let mut reader = buf. clone ( ) ;
807879 reader. set_position ( 0 ) ;
808880 let res = CasObject :: deserialize ( & mut reader) ;
809881 assert ! ( res. is_ok( ) ) ;
@@ -820,18 +892,20 @@ mod tests {
820892 fn test_serialization_deserialization_mem_large_random ( ) {
821893 // Arrange
822894 let ( c, _cas_data, raw_data) = build_cas_object ( 32 , 65536 , true , false ) ;
823- let mut writer : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
895+ let mut buf : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
824896 // Act & Assert
825897 assert ! ( CasObject :: serialize(
826- & mut writer ,
898+ & mut buf ,
827899 & c. info. cashash,
828900 & raw_data,
829901 & c. get_chunk_boundaries( ) ,
830902 CompressionScheme :: None
831903 )
832904 . is_ok( ) ) ;
833905
834- let mut reader = writer. clone ( ) ;
906+ assert ! ( CasObject :: validate_cas_object( & mut buf, & c. info. cashash) . unwrap( ) ) ;
907+
908+ let mut reader = buf. clone ( ) ;
835909 reader. set_position ( 0 ) ;
836910 let res = CasObject :: deserialize ( & mut reader) ;
837911 assert ! ( res. is_ok( ) ) ;
@@ -847,18 +921,20 @@ mod tests {
847921 fn test_serialization_deserialization_file_large_random ( ) {
848922 // Arrange
849923 let ( c, _cas_data, raw_data) = build_cas_object ( 256 , 65536 , true , false ) ;
850- let mut writer : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
924+ let mut buf : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
851925 // Act & Assert
852926 assert ! ( CasObject :: serialize(
853- & mut writer ,
927+ & mut buf ,
854928 & c. info. cashash,
855929 & raw_data,
856930 & c. get_chunk_boundaries( ) ,
857931 CompressionScheme :: None
858932 )
859933 . is_ok( ) ) ;
860934
861- let mut reader = writer. clone ( ) ;
935+ assert ! ( CasObject :: validate_cas_object( & mut buf, & c. info. cashash) . unwrap( ) ) ;
936+
937+ let mut reader = buf. clone ( ) ;
862938 reader. set_position ( 0 ) ;
863939 let res = CasObject :: deserialize ( & mut reader) ;
864940 assert ! ( res. is_ok( ) ) ;
@@ -902,18 +978,20 @@ mod tests {
902978 fn test_serialization_deserialization_mem_medium_lz4 ( ) {
903979 // Arrange
904980 let ( c, _cas_data, raw_data) = build_cas_object ( 32 , 16384 , false , true ) ;
905- let mut writer : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
981+ let mut buf : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
906982 // Act & Assert
907983 assert ! ( CasObject :: serialize(
908- & mut writer ,
984+ & mut buf ,
909985 & c. info. cashash,
910986 & raw_data,
911987 & c. get_chunk_boundaries( ) ,
912988 CompressionScheme :: LZ4
913989 )
914990 . is_ok( ) ) ;
915991
916- let mut reader = writer. clone ( ) ;
992+ assert ! ( CasObject :: validate_cas_object( & mut buf, & c. info. cashash) . unwrap( ) ) ;
993+
994+ let mut reader = buf. clone ( ) ;
917995 reader. set_position ( 0 ) ;
918996 let res = CasObject :: deserialize ( & mut reader) ;
919997 assert ! ( res. is_ok( ) ) ;
@@ -930,18 +1008,20 @@ mod tests {
9301008 fn test_serialization_deserialization_mem_large_random_lz4 ( ) {
9311009 // Arrange
9321010 let ( c, _cas_data, raw_data) = build_cas_object ( 32 , 65536 , true , true ) ;
933- let mut writer : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
1011+ let mut buf : Cursor < Vec < u8 > > = Cursor :: new ( Vec :: new ( ) ) ;
9341012 // Act & Assert
9351013 assert ! ( CasObject :: serialize(
936- & mut writer ,
1014+ & mut buf ,
9371015 & c. info. cashash,
9381016 & raw_data,
9391017 & c. get_chunk_boundaries( ) ,
9401018 CompressionScheme :: LZ4
9411019 )
9421020 . is_ok( ) ) ;
9431021
944- let mut reader = writer. clone ( ) ;
1022+ assert ! ( CasObject :: validate_cas_object( & mut buf, & c. info. cashash) . unwrap( ) ) ;
1023+
1024+ let mut reader = buf. clone ( ) ;
9451025 reader. set_position ( 0 ) ;
9461026 let res = CasObject :: deserialize ( & mut reader) ;
9471027 assert ! ( res. is_ok( ) ) ;
0 commit comments