Skip to content

Commit 148b720

Browse files
authored
CasObject::serialize validates hash (#18)
* Added tests for CAS hash generation * Added hash validation to CasObject::serialize - Removed from LocalClient::put - Updated unit-tests across LocalClient, StagingClient, CacheClient Now all paths to creating Xorbs will validate the hash provided matches the contents being offered. * Fix clippy
1 parent 9ba46cb commit 148b720

File tree

7 files changed

+138
-44
lines changed

7 files changed

+138
-44
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cas_client/src/caching_client.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,10 @@ mod tests {
256256
// put the different value with the same hash
257257
// this should fail
258258
assert_eq!(
259-
CasClientError::HashMismatch,
259+
CasClientError::CasObjectError(cas_object::error::CasObjectError::HashMismatch),
260260
client
261261
.put(
262-
"key",
262+
"hellp",
263263
&hello_hash,
264264
"hellp world".as_bytes().to_vec(),
265265
vec![hello.len() as u64],

cas_client/src/local_client.rs

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ use crate::error::{CasClientError, Result};
22
use crate::interface::Client;
33
use cas::key::Key;
44
use cas_object::CasObject;
5-
use merkledb::prelude::*;
6-
use merkledb::{Chunk, MerkleMemDB};
75
use merklehash::MerkleHash;
86
use std::fs::{metadata, File};
97
use std::io::{BufReader, BufWriter, Write};
@@ -128,30 +126,6 @@ impl LocalClient {
128126
let _ = std::fs::remove_file(file_path);
129127
}
130128

131-
fn validate_root_hash(data: &[u8], chunk_boundaries: &[u64], hash: &MerkleHash) -> bool {
132-
// at least 1 chunk, and last entry in chunk boundary must match the length
133-
if chunk_boundaries.is_empty()
134-
|| chunk_boundaries[chunk_boundaries.len() - 1] as usize != data.len()
135-
{
136-
return false;
137-
}
138-
139-
let mut chunks: Vec<Chunk> = Vec::new();
140-
let mut left_edge: usize = 0;
141-
for i in chunk_boundaries {
142-
let right_edge = *i as usize;
143-
let hash = merklehash::compute_data_hash(&data[left_edge..right_edge]);
144-
let length = right_edge - left_edge;
145-
chunks.push(Chunk { hash, length });
146-
left_edge = right_edge;
147-
}
148-
149-
let mut db = MerkleMemDB::default();
150-
let mut staging = db.start_insertion_staging();
151-
db.add_file(&mut staging, &chunks);
152-
let ret = db.finalize(staging);
153-
*ret.hash() == *hash
154-
}
155129
}
156130

157131
/// LocalClient is responsible for writing/reading Xorbs on local disk.
@@ -176,10 +150,7 @@ impl Client for LocalClient {
176150
return Err(CasClientError::InvalidArguments);
177151
}
178152

179-
// validate hash
180-
if !Self::validate_root_hash(&data, &chunk_boundaries, hash) {
181-
return Err(CasClientError::HashMismatch);
182-
}
153+
// moved hash validation into [CasObject::serialize], so removed from here.
183154

184155
if let Ok(xorb_size) = self.get_length(prefix, hash).await {
185156
if xorb_size > 0 {
@@ -315,6 +286,7 @@ impl Client for LocalClient {
315286
mod tests {
316287

317288
use super::*;
289+
use merkledb::{prelude::MerkleDBHighLevelMethodsV1, Chunk, MerkleMemDB};
318290
use merklehash::{compute_data_hash, DataHash};
319291
use rand::Rng;
320292

@@ -432,10 +404,10 @@ mod tests {
432404
// put the different value with the same hash
433405
// this should fail
434406
assert_eq!(
435-
CasClientError::HashMismatch,
407+
CasClientError::CasObjectError(cas_object::error::CasObjectError::HashMismatch),
436408
client
437409
.put(
438-
"key",
410+
"hellp",
439411
&hello_hash,
440412
"hellp world".as_bytes().to_vec(),
441413
vec![hello.len() as u64],
@@ -448,7 +420,7 @@ mod tests {
448420
CasClientError::InvalidArguments,
449421
client
450422
.put(
451-
"key",
423+
"hellp2",
452424
&hello_hash,
453425
"hellp wod".as_bytes().to_vec(),
454426
vec![hello.len() as u64],
@@ -462,7 +434,7 @@ mod tests {
462434
CasClientError::InvalidArguments,
463435
client
464436
.put(
465-
"key",
437+
"again",
466438
&hello_hash,
467439
"hello world again".as_bytes().to_vec(),
468440
vec![hello.len() as u64],

cas_client/src/staging_client.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,10 +424,10 @@ mod tests {
424424
// put the different value with the same hash
425425
// this should fail
426426
assert_eq!(
427-
CasClientError::HashMismatch,
427+
CasClientError::CasObjectError(cas_object::error::CasObjectError::HashMismatch),
428428
client
429429
.put(
430-
"key",
430+
"hellp",
431431
&hello_hash,
432432
"hellp world".as_bytes().to_vec(),
433433
vec![hello.len() as u64],

cas_object/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77
anyhow = "1.0.88"
88
bincode = "1.3.3"
99
http = "1.1.0"
10+
merkledb = { path = "../merkledb" }
1011
merklehash = { path = "../merklehash" }
1112
tempfile = "3.12.0"
1213
tracing = "0.1.40"

cas_object/src/cas_object_format.rs

Lines changed: 123 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use bytes::Buf;
2+
use merkledb::{prelude::MerkleDBHighLevelMethodsV1, Chunk, MerkleMemDB};
23
use merklehash::{DataHash, MerkleHash};
34
use std::{
45
cmp::min,
@@ -454,6 +455,12 @@ impl CasObject {
454455
chunk_boundaries: &Vec<u32>,
455456
compression_scheme: CompressionScheme,
456457
) -> Result<(Self, usize), CasObjectError> {
458+
459+
// validate hash against contents
460+
if !Self::validate_root_hash(data, chunk_boundaries, hash) {
461+
return Err(CasObjectError::HashMismatch);
462+
}
463+
457464
let mut cas = CasObject::default();
458465
cas.info.cashash.copy_from_slice(hash.as_slice());
459466
cas.info.num_chunks = chunk_boundaries.len() as u32 + 1; // extra entry for dummy, see [chunk_size_info] for details.
@@ -473,7 +480,6 @@ impl CasObject {
473480
let chunk_size = chunk_boundary - raw_start_idx;
474481

475482
// now serialize chunk directly to writer (since chunks come first!)
476-
// TODO: add compression scheme to this call
477483
let chunk_written_bytes =
478484
serialize_chunk(&chunk_raw_bytes, writer, compression_scheme)?;
479485
total_written_bytes += chunk_written_bytes;
@@ -506,15 +512,40 @@ impl CasObject {
506512

507513
Ok((cas, total_written_bytes))
508514
}
515+
516+
pub fn validate_root_hash(data: &[u8], chunk_boundaries: &[u32], hash: &MerkleHash) -> bool {
517+
// at least 1 chunk, and last entry in chunk boundary must match the length
518+
if chunk_boundaries.is_empty()
519+
|| chunk_boundaries[chunk_boundaries.len() - 1] as usize != data.len()
520+
{
521+
return false;
522+
}
523+
524+
let mut chunks: Vec<Chunk> = Vec::new();
525+
let mut left_edge: usize = 0;
526+
for i in chunk_boundaries {
527+
let right_edge = *i as usize;
528+
let hash = merklehash::compute_data_hash(&data[left_edge..right_edge]);
529+
let length = right_edge - left_edge;
530+
chunks.push(Chunk { hash, length });
531+
left_edge = right_edge;
532+
}
533+
534+
let mut db = MerkleMemDB::default();
535+
let mut staging = db.start_insertion_staging();
536+
db.add_file(&mut staging, &chunks);
537+
let ret = db.finalize(staging);
538+
*ret.hash() == *hash
539+
}
540+
509541
}
510542

511543
#[cfg(test)]
512544
mod tests {
513545

514-
use crate::cas_chunk_format::serialize_chunk;
515-
516546
use super::*;
517-
use merklehash::compute_data_hash;
547+
use crate::cas_chunk_format::serialize_chunk;
548+
use merkledb::{prelude::MerkleDBHighLevelMethodsV1, Chunk, MerkleMemDB};
518549
use rand::Rng;
519550
use std::io::Cursor;
520551

@@ -633,10 +664,10 @@ mod tests {
633664
chunk_size_info.push(chunk_info);
634665

635666
c.info.num_chunks = chunk_size_info.len() as u32;
636-
637-
c.info.cashash = compute_data_hash(&writer.get_ref());
638667
c.info.chunk_size_info = chunk_size_info;
639668

669+
c.info.cashash = gen_hash(&data_contents_raw, &c.get_chunk_boundaries());
670+
640671
// now serialize info to end Xorb length
641672
let len = c.info.serialize(&mut writer).unwrap();
642673
c.info_length = len as u32;
@@ -646,6 +677,92 @@ mod tests {
646677
(c, writer.get_ref().to_vec(), data_contents_raw)
647678
}
648679

680+
fn gen_hash(data: &[u8], chunk_boundaries: &[u32]) -> DataHash {
681+
let mut chunks: Vec<Chunk> = Vec::new();
682+
let mut left_edge: usize = 0;
683+
for i in chunk_boundaries {
684+
let right_edge = *i as usize;
685+
let hash = merklehash::compute_data_hash(&data[left_edge..right_edge]);
686+
let length = right_edge - left_edge;
687+
chunks.push(Chunk { hash, length });
688+
left_edge = right_edge;
689+
}
690+
691+
let mut db = MerkleMemDB::default();
692+
let mut staging = db.start_insertion_staging();
693+
db.add_file(&mut staging, &chunks);
694+
let ret = db.finalize(staging);
695+
*ret.hash()
696+
}
697+
698+
#[test]
699+
fn test_compress_decompress() {
700+
// Arrange
701+
let (c, _cas_data, raw_data) = build_cas_object(55, 53212, false, true);
702+
let hash = gen_hash(&&raw_data, &c.get_chunk_boundaries());
703+
704+
// Act & Assert
705+
let mut writer: Cursor<Vec<u8>> = Cursor::new(Vec::new());
706+
assert!(CasObject::serialize(
707+
&mut writer,
708+
&c.info.cashash,
709+
&raw_data,
710+
&c.get_chunk_boundaries(),
711+
CompressionScheme::LZ4
712+
)
713+
.is_ok());
714+
715+
let mut reader = writer.clone();
716+
reader.set_position(0);
717+
let res = CasObject::deserialize(&mut reader);
718+
assert!(res.is_ok());
719+
let c = res.unwrap();
720+
721+
let c_bytes = c.get_all_bytes(&mut reader).unwrap();
722+
let c_boundaries = c.get_chunk_boundaries();
723+
let c_hash = gen_hash(&c_bytes, &c_boundaries);
724+
725+
let mut writer: Cursor<Vec<u8>> = Cursor::new(Vec::new());
726+
assert!(CasObject::serialize(
727+
&mut writer,
728+
&c_hash,
729+
&c_bytes,
730+
&c_boundaries,
731+
CompressionScheme::None
732+
)
733+
.is_ok());
734+
735+
let mut reader = writer.clone();
736+
reader.set_position(0);
737+
let res = CasObject::deserialize(&mut reader);
738+
assert!(res.is_ok());
739+
let c2 = res.unwrap();
740+
741+
assert_eq!(hash, c_hash);
742+
assert_eq!(c.info.cashash, hash);
743+
assert_eq!(c2.info.cashash, c.info.cashash);
744+
}
745+
746+
#[test]
747+
fn test_hash_generation_compression() {
748+
// Arrange
749+
let (c, cas_data, raw_data) = build_cas_object(55, 53212, false, true);
750+
// Act & Assert
751+
let mut buf: Cursor<Vec<u8>> = Cursor::new(Vec::new());
752+
assert!(CasObject::serialize(
753+
&mut buf,
754+
&c.info.cashash,
755+
&raw_data,
756+
&c.get_chunk_boundaries(),
757+
CompressionScheme::LZ4
758+
)
759+
.is_ok());
760+
761+
assert_eq!(c.info.cashash, gen_hash(&raw_data, &c.get_chunk_boundaries()));
762+
assert_eq!(raw_data, c.get_all_bytes(&mut buf).unwrap());
763+
assert_eq!(&cas_data, buf.get_ref());
764+
}
765+
649766
#[test]
650767
fn test_basic_serialization_mem() {
651768
// Arrange

cas_object/src/error.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ pub enum CasObjectError {
1515
#[error("Format Error: {0}")]
1616
FormatError(anyhow::Error),
1717

18+
#[error("Hash Mismatch")]
19+
HashMismatch,
20+
1821
#[error("Internal IO Error: {0}")]
1922
InternalIOError(#[from] std::io::Error),
2023

0 commit comments

Comments
 (0)