Skip to content

Commit fa5f1eb

Browse files
Alenardlachaume
andcommitted
feat(aggregator): file archiver now compute and return size of uncompressed data
Co-authored-by: Damien Lachaume <[email protected]>
1 parent c03c079 commit fa5f1eb

File tree

5 files changed

+68
-30
lines changed

5 files changed

+68
-30
lines changed

mithril-aggregator/src/artifact_builder/cardano_database_artifacts/ancillary.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ mod tests {
377377
.upload_ancillary_archive(FileArchive::new(
378378
PathBuf::from("archive_path"),
379379
0,
380+
0,
380381
CompressionAlgorithm::Gzip,
381382
))
382383
.await
@@ -416,6 +417,7 @@ mod tests {
416417
.upload_ancillary_archive(FileArchive::new(
417418
PathBuf::from("archive_path"),
418419
0,
420+
0,
419421
CompressionAlgorithm::Gzip,
420422
))
421423
.await
@@ -443,7 +445,7 @@ mod tests {
443445
"upload_ancillary_archive_should_remove_archive_after_upload",
444446
);
445447
let archive_path = create_fake_archive(&source_dir, "ancillary.tar.gz");
446-
let archive = FileArchive::new(archive_path.clone(), 0, CompressionAlgorithm::Gzip);
448+
let archive = FileArchive::new(archive_path.clone(), 0, 0, CompressionAlgorithm::Gzip);
447449
let uploader = fake_uploader(
448450
archive_path.as_os_str().to_str().unwrap(),
449451
"an_uri",
@@ -472,7 +474,7 @@ mod tests {
472474
"upload_ancillary_archive_should_remove_archive_when_no_uploader_succeed",
473475
);
474476
let archive_path = create_fake_archive(&source_dir, "ancillary.tar.gz");
475-
let archive = FileArchive::new(archive_path.clone(), 0, CompressionAlgorithm::Gzip);
477+
let archive = FileArchive::new(archive_path.clone(), 0, 0, CompressionAlgorithm::Gzip);
476478
let uploader = fake_uploader_returning_error();
477479

478480
let builder = AncillaryArtifactBuilder::new(

mithril-aggregator/src/artifact_builder/cardano_immutable_files_full.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ impl CardanoImmutableFilesFullArtifactBuilder {
107107
snapshot_digest,
108108
self.cardano_network,
109109
beacon,
110-
ongoing_snapshot.get_file_size(),
110+
ongoing_snapshot.get_archive_size(),
111111
remote_locations,
112112
ongoing_snapshot.get_compression_algorithm(),
113113
&self.cardano_node_version,
@@ -211,7 +211,7 @@ mod tests {
211211
snapshot_digest.to_owned(),
212212
fake_data::network(),
213213
beacon,
214-
last_ongoing_snapshot.get_file_size(),
214+
last_ongoing_snapshot.get_archive_size(),
215215
remote_locations,
216216
CompressionAlgorithm::Zstandard,
217217
&Version::parse("1.0.0").unwrap(),
@@ -226,6 +226,7 @@ mod tests {
226226
let snapshot = FileArchive::new(
227227
file_path.to_path_buf(),
228228
7331,
229+
7331,
229230
CompressionAlgorithm::default(),
230231
);
231232

@@ -321,6 +322,7 @@ mod tests {
321322
let snapshot = FileArchive::new(
322323
file_path.to_path_buf(),
323324
7331,
325+
7331,
324326
CompressionAlgorithm::default(),
325327
);
326328
let mut snapshot_uploader = MockFileUploader::new();

mithril-aggregator/src/services/snapshotter/test_doubles.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ impl Snapshotter for DumbSnapshotter {
5252
self.compression_algorithm.tar_file_extension()
5353
)),
5454
0,
55+
0,
5556
self.compression_algorithm,
5657
);
5758
*value = Some(snapshot.clone());
@@ -127,6 +128,7 @@ impl Snapshotter for FakeSnapshotter {
127128
Ok(FileArchive::new(
128129
fake_archive_path,
129130
0,
131+
0,
130132
self.compression_algorithm,
131133
))
132134
}
@@ -185,13 +187,13 @@ mod tests {
185187
let snapshot = snapshotter.snapshot_all("archive").unwrap();
186188

187189
assert_eq!(PathBuf::from("archive.tar.gz"), *snapshot.get_file_path());
188-
assert_eq!(0, snapshot.get_file_size());
190+
assert_eq!(0, snapshot.get_archive_size());
189191

190192
let snapshot = snapshotter
191193
.snapshot_subset("archive", vec![PathBuf::from("whatever")])
192194
.unwrap();
193195
assert_eq!(PathBuf::from("archive.tar.gz"), *snapshot.get_file_path());
194-
assert_eq!(0, snapshot.get_file_size());
196+
assert_eq!(0, snapshot.get_archive_size());
195197
}
196198

197199
#[test]

mithril-aggregator/src/tools/file_archiver/api.rs

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use mithril_common::entities::CompressionAlgorithm;
1414
use mithril_common::logging::LoggerExtensions;
1515
use mithril_common::StdResult;
1616

17+
use crate::tools::file_size;
1718
use crate::ZstandardCompressionParameters;
1819

1920
use super::appender::TarAppender;
@@ -175,16 +176,24 @@ impl FileArchiver {
175176
}
176177
}
177178

178-
let filesize = Self::get_file_size(archive_path).with_context(|| {
179+
let uncompressed_size = appender.compute_uncompressed_data_size().with_context(|| {
179180
format!(
180-
"FileArchiver can not get file size of archive with path: '{}'",
181+
"FileArchiver can not get the size of the uncompressed data to archive: '{}'",
181182
archive_path.display()
182183
)
183184
})?;
185+
let archive_filesize =
186+
file_size::compute_size_of_path(archive_path).with_context(|| {
187+
format!(
188+
"FileArchiver can not get file size of archive with path: '{}'",
189+
archive_path.display()
190+
)
191+
})?;
184192

185193
Ok(FileArchive {
186194
filepath: archive_path.to_path_buf(),
187-
filesize,
195+
archive_filesize,
196+
uncompressed_size,
188197
compression_algorithm,
189198
})
190199
}
@@ -260,18 +269,6 @@ impl FileArchiver {
260269
verify_result
261270
}
262271

263-
fn get_file_size(filepath: &Path) -> StdResult<u64> {
264-
let res = fs::metadata(filepath)
265-
.with_context(|| {
266-
format!(
267-
"FileArchiver can not get metadata of file: '{}'",
268-
filepath.display()
269-
)
270-
})?
271-
.len();
272-
Ok(res)
273-
}
274-
275272
// Helper to unpack and delete a file from en entry, for archive verification purpose
276273
fn unpack_and_delete_file_from_entry<R: Read>(
277274
entry: Entry<R>,
@@ -309,7 +306,7 @@ mod tests {
309306
use mithril_common::test_utils::assert_equivalent;
310307

311308
use crate::test_tools::TestLogger;
312-
use crate::tools::file_archiver::appender::AppenderDirAll;
309+
use crate::tools::file_archiver::appender::{AppenderDirAll, AppenderFile};
313310
use crate::tools::file_archiver::test_tools::*;
314311
use crate::ZstandardCompressionParameters;
315312

@@ -446,14 +443,14 @@ mod tests {
446443
AppenderDirAll::new(archived_directory.clone()),
447444
)
448445
.unwrap();
449-
let first_snapshot_size = first_snapshot.get_file_size();
446+
let first_snapshot_size = first_snapshot.get_archive_size();
450447

451448
create_file(&archived_directory, "another_file_to_archive.txt");
452449

453450
let second_snapshot = file_archiver
454451
.archive(archive_params, AppenderDirAll::new(archived_directory))
455452
.unwrap();
456-
let second_snapshot_size = second_snapshot.get_file_size();
453+
let second_snapshot_size = second_snapshot.get_archive_size();
457454

458455
assert_ne!(first_snapshot_size, second_snapshot_size);
459456

@@ -472,4 +469,31 @@ mod tests {
472469
file_archiver.set_verification_temp_dir("sub_dir");
473470
file_archiver.set_verification_temp_dir("sub_dir".to_string());
474471
}
472+
473+
#[test]
474+
fn compute_size_of_uncompressed_data_and_archive() {
475+
let test_dir = get_test_directory("compute_size_of_uncompressed_data_and_archive");
476+
477+
let file_path = test_dir.join("file.txt");
478+
let file = File::create(&file_path).unwrap();
479+
file.set_len(777).unwrap();
480+
481+
let file_archiver = FileArchiver::new_for_test(test_dir.join("verification"));
482+
483+
let archive_params = ArchiveParameters {
484+
archive_name_without_extension: "archive".to_string(),
485+
target_directory: test_dir.clone(),
486+
compression_algorithm: CompressionAlgorithm::Gzip,
487+
};
488+
let snapshot = file_archiver
489+
.archive(
490+
archive_params.clone(),
491+
AppenderFile::append_at_archive_root(file_path.clone()).unwrap(),
492+
)
493+
.unwrap();
494+
495+
let expected_archive_size = file_size::compute_size_of_path(&snapshot.filepath).unwrap();
496+
assert_eq!(expected_archive_size, snapshot.get_archive_size(),);
497+
assert_eq!(777, snapshot.get_uncompressed_size());
498+
}
475499
}

mithril-aggregator/src/tools/file_archiver/entities.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,23 @@ impl ArchiveParameters {
2929
#[derive(Debug, Clone, PartialEq, Eq)]
3030
pub struct FileArchive {
3131
pub(super) filepath: PathBuf,
32-
// Todo: rename to `archive_size` and add `uncompressed_size` field.
33-
pub(super) filesize: u64,
32+
pub(super) archive_filesize: u64,
33+
pub(super) uncompressed_size: u64,
3434
pub(super) compression_algorithm: CompressionAlgorithm,
3535
}
3636

3737
impl FileArchive {
3838
/// Create a new instance of FileArchive.
3939
pub fn new(
4040
filepath: PathBuf,
41-
filesize: u64,
41+
archive_filesize: u64,
42+
uncompressed_size: u64,
4243
compression_algorithm: CompressionAlgorithm,
4344
) -> Self {
4445
Self {
4546
filepath,
46-
filesize,
47+
archive_filesize,
48+
uncompressed_size,
4749
compression_algorithm,
4850
}
4951
}
@@ -53,6 +55,7 @@ impl FileArchive {
5355
Self {
5456
filepath: PathBuf::from("archive.tar.gz"),
5557
archive_filesize: 10,
58+
uncompressed_size: 789,
5659
compression_algorithm: CompressionAlgorithm::Gzip,
5760
}
5861
}
@@ -63,8 +66,13 @@ impl FileArchive {
6366
}
6467

6568
/// Get the size of the archive.
66-
pub fn get_file_size(&self) -> u64 {
67-
self.filesize
69+
pub fn get_archive_size(&self) -> u64 {
70+
self.archive_filesize
71+
}
72+
73+
/// Get the size of the data before compression.
74+
pub fn get_uncompressed_size(&self) -> u64 {
75+
self.uncompressed_size
6876
}
6977

7078
/// Get the compression algorithm used to create the archive.

0 commit comments

Comments
 (0)