Skip to content

Commit 1023860

Browse files
committed
feat(client-lib): add UnexpectedDownloadedFileVerifier
A two stages tools that first compute the expected state of a cardano database immutable folder after download, then after said download is done remove all unexpected files.
1 parent f6a99c1 commit 1023860

File tree

2 files changed

+357
-0
lines changed

2 files changed

+357
-0
lines changed

mithril-client/src/utils/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ cfg_fs! {
77
mod ancillary_verifier;
88
mod stream_reader;
99
mod bootstrap_files;
10+
mod unexpected_downloaded_file_verifier;
1011

1112
pub use ancillary_verifier::AncillaryVerifier;
13+
pub(crate) use unexpected_downloaded_file_verifier::*;
1214
pub use stream_reader::*;
1315
pub use bootstrap_files::*;
1416
}
Lines changed: 355 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,355 @@
1+
//! Toolbox to verify if unexpected files are included when downloading and unpacking
2+
//! Mithril archives and delete found offenders.
3+
//!
4+
//! This reduces the ability of adversarial aggregators to leverage Mithril archives for side
5+
//! channel attacks.
6+
//!
7+
//! Requirements:
8+
//! * Existing extra files added by users should be kept
9+
//! * Found offenders should be reported
10+
//!
11+
use std::collections::HashSet;
12+
use std::ops::RangeInclusive;
13+
use std::path::{Path, PathBuf};
14+
15+
use anyhow::Context;
16+
17+
use mithril_common::digesters::immutable_trio_names;
18+
use mithril_common::entities::ImmutableFileNumber;
19+
use mithril_common::StdResult;
20+
21+
const BASE_ERROR: &str = "Unexpected downloaded file check failed";
22+
23+
/// Tool to check and remove unexpected files when downloading and unpacking Mithril archives
24+
pub struct UnexpectedDownloadedFileVerifier {
25+
dir_to_check: PathBuf,
26+
immutable_files_range_to_expect: RangeInclusive<ImmutableFileNumber>,
27+
}
28+
29+
/// List of expected files after downloading and unpacking, yielded by `UnexpectedDownloadedFileVerifier::compute_expected_state_after_download`
30+
pub struct ExpectedFilesAfterDownload {
31+
dir_to_check: PathBuf,
32+
expected_files: HashSet<PathBuf>,
33+
}
34+
35+
impl UnexpectedDownloadedFileVerifier {
36+
/// `UnexpectedDownloadedFileVerifier` factory
37+
pub fn new<P: AsRef<Path>>(
38+
dir_to_check: P,
39+
network_kind: &str,
40+
include_ancillary: bool,
41+
last_downloaded_immutable_file_number: ImmutableFileNumber,
42+
) -> Self {
43+
Self {
44+
dir_to_check: dir_to_check.as_ref().to_path_buf(),
45+
immutable_files_range_to_expect: compute_immutable_files_range_to_expect(
46+
network_kind,
47+
include_ancillary,
48+
last_downloaded_immutable_file_number,
49+
),
50+
}
51+
}
52+
53+
/// Compute the expected state of the folder after download finish
54+
pub async fn compute_expected_state_after_download(
55+
&self,
56+
) -> StdResult<ExpectedFilesAfterDownload> {
57+
let dir_to_check = self.dir_to_check.to_path_buf();
58+
let immutable_files_range_to_expect = self.immutable_files_range_to_expect.clone();
59+
// target databases can be quite large, avoid blocking the main thread
60+
let expected_files = tokio::task::spawn_blocking(move || -> StdResult<HashSet<PathBuf>> {
61+
let mut files: HashSet<PathBuf> = std::fs::read_dir(&dir_to_check)
62+
.with_context(|| format!("Failed to read directory {}", dir_to_check.display()))?
63+
.flat_map(|e| e.map(|e| e.path()))
64+
.collect();
65+
66+
// Complete the list with all rightfully downloaded immutable files
67+
for immutable_file_name in
68+
immutable_files_range_to_expect.flat_map(immutable_trio_names)
69+
{
70+
files.insert(dir_to_check.join(immutable_file_name));
71+
}
72+
Ok(files)
73+
})
74+
.await?
75+
.with_context(|| BASE_ERROR)?;
76+
77+
Ok(ExpectedFilesAfterDownload {
78+
dir_to_check: self.dir_to_check.clone(),
79+
expected_files,
80+
})
81+
}
82+
}
83+
84+
fn compute_immutable_files_range_to_expect(
85+
network_kind: &str,
86+
include_ancillary: bool,
87+
last_downloaded_immutable_file_number: ImmutableFileNumber,
88+
) -> RangeInclusive<ImmutableFileNumber> {
89+
let is_devnet_network = network_kind.contains("devnet");
90+
let lower_bound = if is_devnet_network { 0 } else { 1 };
91+
let upper_bound = if include_ancillary {
92+
last_downloaded_immutable_file_number + 1
93+
} else {
94+
last_downloaded_immutable_file_number
95+
};
96+
97+
lower_bound..=upper_bound
98+
}
99+
100+
impl ExpectedFilesAfterDownload {
101+
/// Identify and delete unexpected files and folder
102+
///
103+
/// Returns the name of the deleted items
104+
///
105+
/// *Note: removed directories names are suffixed with a "/"*
106+
pub async fn remove_unexpected_files(self) -> StdResult<Vec<String>> {
107+
tokio::task::spawn_blocking(move || {
108+
let unexpected_entries: Vec<_> = std::fs::read_dir(&self.dir_to_check)
109+
.with_context(|| BASE_ERROR)?
110+
.flatten()
111+
.filter(|f| !self.expected_files.contains(&f.path().to_path_buf()))
112+
.collect();
113+
let mut removed_entries = Vec::new();
114+
115+
for unexpected_entry in &unexpected_entries {
116+
let unexpected_path = unexpected_entry.path();
117+
if unexpected_path.is_dir() {
118+
std::fs::remove_dir_all(&unexpected_path)
119+
.with_context(|| {
120+
format!(
121+
"failed to remove unexpected directory `{}`",
122+
unexpected_path.display()
123+
)
124+
})
125+
.with_context(|| BASE_ERROR)?;
126+
// Join a "/" to the end to make explicit that it's a directory
127+
removed_entries.push(format!(
128+
"{}/",
129+
unexpected_entry.file_name().to_string_lossy()
130+
));
131+
} else {
132+
std::fs::remove_file(&unexpected_path)
133+
.with_context(|| {
134+
format!(
135+
"failed to remove unexpected file `{}`",
136+
unexpected_path.display()
137+
)
138+
})
139+
.with_context(|| BASE_ERROR)?;
140+
removed_entries
141+
.push(unexpected_entry.file_name().to_string_lossy().to_string());
142+
}
143+
}
144+
145+
// Sort removed entries to ensure consistent output when reporting to users
146+
removed_entries.sort();
147+
Ok(removed_entries)
148+
})
149+
.await?
150+
}
151+
}
152+
153+
#[cfg(test)]
154+
mod tests {
155+
use std::fs::{create_dir, File};
156+
use std::time::Instant;
157+
158+
use mithril_common::temp_dir_create;
159+
160+
use super::*;
161+
162+
fn create_immutable_trio(dir: &Path, immutable_file_number: ImmutableFileNumber) {
163+
for immutable_file_name in immutable_trio_names(immutable_file_number) {
164+
File::create(dir.join(immutable_file_name)).unwrap();
165+
}
166+
}
167+
168+
#[test]
169+
fn test_compute_immutable_files_range_to_expect() {
170+
// Specs:
171+
// - start at 1 on all networks except 0 for devnet
172+
// - if ancillaries are included, the end bound must be increased by one (to take in an
173+
// account the additional immutable trio downloaded with them)
174+
175+
// Without ancillaries, network is not devnet
176+
assert_eq!(
177+
compute_immutable_files_range_to_expect("network", false, 143),
178+
1..=143
179+
);
180+
181+
// Without ancillaries, network is devnet
182+
assert_eq!(
183+
compute_immutable_files_range_to_expect("devnet", false, 143),
184+
0..=143
185+
);
186+
187+
// With ancillaries, network is not devnet
188+
assert_eq!(
189+
compute_immutable_files_range_to_expect("network", true, 143),
190+
1..=144
191+
);
192+
193+
// With ancillaries, network is devnet
194+
assert_eq!(
195+
compute_immutable_files_range_to_expect("devnet", true, 143),
196+
0..=144
197+
);
198+
}
199+
200+
mod compute_expected_state_after_download {
201+
use super::*;
202+
203+
#[tokio::test]
204+
async fn when_dir_empty_return_empty_if_immutable_files_range_is_empty() {
205+
let temp_dir = temp_dir_create!();
206+
let existing_files =
207+
UnexpectedDownloadedFileVerifier::new(&temp_dir, "network", false, 0)
208+
.compute_expected_state_after_download()
209+
.await
210+
.unwrap();
211+
212+
assert_eq!(existing_files.expected_files, HashSet::<PathBuf>::new());
213+
}
214+
215+
#[tokio::test]
216+
async fn when_dir_empty_return_immutables_trios_if_immutable_files_range_is_not_empty() {
217+
let temp_dir = temp_dir_create!();
218+
let existing_files =
219+
UnexpectedDownloadedFileVerifier::new(&temp_dir, "network", false, 1)
220+
.compute_expected_state_after_download()
221+
.await
222+
.unwrap();
223+
224+
assert_eq!(
225+
existing_files.expected_files,
226+
HashSet::from([
227+
temp_dir.join("00001.chunk"),
228+
temp_dir.join("00001.primary"),
229+
temp_dir.join("00001.secondary"),
230+
])
231+
);
232+
}
233+
234+
#[tokio::test]
235+
async fn add_existing_files_and_dirs() {
236+
let temp_dir = temp_dir_create!();
237+
create_dir(temp_dir.join("dir_1")).unwrap();
238+
create_dir(temp_dir.join("dir_2")).unwrap();
239+
File::create(temp_dir.join("file_1.txt")).unwrap();
240+
File::create(temp_dir.join("file_2.txt")).unwrap();
241+
File::create(temp_dir.join("dir_2").join("file_3.txt")).unwrap();
242+
243+
let existing_files =
244+
UnexpectedDownloadedFileVerifier::new(&temp_dir, "network", false, 0)
245+
.compute_expected_state_after_download()
246+
.await
247+
.unwrap();
248+
249+
assert_eq!(
250+
existing_files.expected_files,
251+
HashSet::from([
252+
temp_dir.join("dir_1"),
253+
temp_dir.join("dir_2"),
254+
temp_dir.join("file_1.txt"),
255+
temp_dir.join("file_2.txt"),
256+
])
257+
);
258+
}
259+
}
260+
261+
mod removing_unexpected_files {
262+
use mithril_common::assert_dir_eq;
263+
264+
use super::*;
265+
266+
#[tokio::test]
267+
async fn when_dir_empty_do_nothing_and_return_none() {
268+
let temp_dir = temp_dir_create!();
269+
let existing_before = ExpectedFilesAfterDownload {
270+
dir_to_check: temp_dir.clone(),
271+
expected_files: HashSet::new(),
272+
};
273+
274+
let removed_entries = existing_before.remove_unexpected_files().await.unwrap();
275+
assert_eq!(removed_entries, Vec::<String>::new());
276+
assert_dir_eq!(&temp_dir, "");
277+
}
278+
279+
#[tokio::test]
280+
async fn when_no_unexpected_file_and_folder_delete_nothing_and_return_none() {
281+
let temp_dir = temp_dir_create!();
282+
create_dir(temp_dir.join("dir_1")).unwrap();
283+
File::create(temp_dir.join("file_1.txt")).unwrap();
284+
let existing_before = ExpectedFilesAfterDownload {
285+
dir_to_check: temp_dir.clone(),
286+
expected_files: HashSet::from([
287+
temp_dir.join("file_1.txt"),
288+
temp_dir.join("dir_1"),
289+
]),
290+
};
291+
292+
let removed_entries = existing_before.remove_unexpected_files().await.unwrap();
293+
assert_eq!(removed_entries, Vec::<String>::new());
294+
assert_dir_eq!(&temp_dir, "* dir_1/\n* file_1.txt");
295+
}
296+
297+
#[tokio::test]
298+
async fn when_unexpected_dirs_and_files_are_downloaded_remove_them_and_return_their_filenames(
299+
) {
300+
let temp_dir = temp_dir_create!();
301+
let existing_before = ExpectedFilesAfterDownload {
302+
dir_to_check: temp_dir.clone(),
303+
expected_files: HashSet::new(),
304+
};
305+
306+
create_dir(temp_dir.join("dir_1")).unwrap();
307+
create_dir(temp_dir.join("dir_2")).unwrap();
308+
File::create(temp_dir.join("file_1.txt")).unwrap();
309+
File::create(temp_dir.join("file_2.txt")).unwrap();
310+
File::create(temp_dir.join("dir_2").join("file_3.txt")).unwrap();
311+
312+
let removed_entries = existing_before.remove_unexpected_files().await.unwrap();
313+
assert_eq!(
314+
removed_entries,
315+
vec![
316+
"dir_1/".to_string(),
317+
"dir_2/".to_string(),
318+
"file_1.txt".to_string(),
319+
"file_2.txt".to_string()
320+
]
321+
);
322+
assert_dir_eq!(&temp_dir, "");
323+
}
324+
}
325+
326+
// Note: this test does not have assertion, it's used is to measure time taken over a large
327+
// database to ensure that's doable without particular optimization.
328+
#[ignore]
329+
#[tokio::test]
330+
async fn checking_unexpected_file_against_a_large_immutable_directory() {
331+
let temp_dir = temp_dir_create!();
332+
let verifier = UnexpectedDownloadedFileVerifier::new(&temp_dir, "network", false, 19999);
333+
334+
for immutable_file_number in 0..=30000 {
335+
create_immutable_trio(&temp_dir, immutable_file_number);
336+
}
337+
338+
let now = Instant::now();
339+
let existing_files = verifier
340+
.compute_expected_state_after_download()
341+
.await
342+
.unwrap();
343+
println!(
344+
"elapsed time on list_existing_file (30k files): {:?}",
345+
now.elapsed()
346+
);
347+
348+
let now = Instant::now();
349+
let _removed_entries = existing_files.remove_unexpected_files().await.unwrap();
350+
println!(
351+
"elapsed time on remove_unexpected_files (10k files to remove): {:?}",
352+
now.elapsed()
353+
);
354+
}
355+
}

0 commit comments

Comments
 (0)