Skip to content

Commit d64ab45

Browse files
feat: bulk processing
1 parent fbcfb85 commit d64ab45

File tree

5 files changed

+298
-237
lines changed

5 files changed

+298
-237
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pre-compute/src/compute.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod app_runner;
2+
pub mod dataset;
23
pub mod errors;
34
pub mod pre_compute_app;
45
pub mod pre_compute_args;

pre-compute/src/compute/pre_compute_app.rs

Lines changed: 30 additions & 202 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,22 @@
11
use crate::compute::errors::ReplicateStatusCause;
22
use crate::compute::pre_compute_args::PreComputeArgs;
3-
use crate::compute::utils::file_utils::{download_file, download_from_url, write_file};
4-
use crate::compute::utils::hash_utils::{sha256, sha256_from_bytes};
5-
use aes::Aes256;
6-
use base64::{Engine as _, engine::general_purpose};
7-
use cbc::{
8-
Decryptor,
9-
cipher::{BlockDecryptMut, KeyIvInit, block_padding::Pkcs7},
10-
};
3+
use crate::compute::utils::file_utils::{download_file, write_file};
4+
use crate::compute::utils::hash_utils::sha256;
115
use log::{error, info};
126
#[cfg(test)]
137
use mockall::automock;
14-
use multiaddr::Multiaddr;
158
use std::path::{Path, PathBuf};
16-
use std::str::FromStr;
17-
18-
type Aes256CbcDec = Decryptor<Aes256>;
19-
const IPFS_GATEWAYS: &[&str] = &[
20-
"https://ipfs-gateway.v8-bellecour.iex.ec",
21-
"https://gateway.ipfs.io",
22-
"https://gateway.pinata.cloud",
23-
];
24-
const AES_KEY_LENGTH: usize = 32;
25-
const AES_IV_LENGTH: usize = 16;
269

2710
#[cfg_attr(test, automock)]
2811
pub trait PreComputeAppTrait {
2912
fn run(&mut self) -> Result<(), ReplicateStatusCause>;
3013
fn check_output_folder(&self) -> Result<(), ReplicateStatusCause>;
3114
fn download_input_files(&self) -> Result<(), ReplicateStatusCause>;
32-
fn download_encrypted_dataset(&self) -> Result<Vec<u8>, ReplicateStatusCause>;
33-
fn decrypt_dataset(&self, encrypted_content: &[u8]) -> Result<Vec<u8>, ReplicateStatusCause>;
34-
fn save_plain_dataset_file(&self, plain_content: &[u8]) -> Result<(), ReplicateStatusCause>;
15+
fn save_plain_dataset_file(
16+
&self,
17+
plain_content: &[u8],
18+
plain_dataset_filename: &str,
19+
) -> Result<(), ReplicateStatusCause>;
3520
}
3621

3722
pub struct PreComputeApp {
@@ -73,10 +58,10 @@ impl PreComputeAppTrait for PreComputeApp {
7358
fn run(&mut self) -> Result<(), ReplicateStatusCause> {
7459
self.pre_compute_args = PreComputeArgs::read_args()?;
7560
self.check_output_folder()?;
76-
if self.pre_compute_args.is_dataset_required {
77-
let encrypted_content = self.download_encrypted_dataset()?;
78-
let plain_content = self.decrypt_dataset(&encrypted_content)?;
79-
self.save_plain_dataset_file(&plain_content)?;
61+
for dataset in &self.pre_compute_args.datasets {
62+
let encrypted_content = dataset.download_encrypted_dataset(&self.chain_task_id)?;
63+
let plain_content = dataset.decrypt_dataset(&encrypted_content)?;
64+
self.save_plain_dataset_file(&plain_content, &dataset.filename)?;
8065
}
8166
self.download_input_files()?;
8267
Ok(())
@@ -134,88 +119,6 @@ impl PreComputeAppTrait for PreComputeApp {
134119
Ok(())
135120
}
136121

137-
/// Downloads the encrypted dataset file from a URL or IPFS multi-address, and verifies its checksum.
138-
///
139-
/// # Returns
140-
///
141-
/// * `Ok(Vec<u8>)` containing the dataset's encrypted content if download and verification succeed.
142-
/// * `Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed)` if the download fails or inputs are missing.
143-
/// * `Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum)` if checksum validation fails.
144-
fn download_encrypted_dataset(&self) -> Result<Vec<u8>, ReplicateStatusCause> {
145-
let args = &self.pre_compute_args;
146-
let chain_task_id = &self.chain_task_id;
147-
let encrypted_dataset_url: &str = &args.encrypted_dataset_url;
148-
149-
info!(
150-
"Downloading encrypted dataset file [chainTaskId:{chain_task_id}, url:{encrypted_dataset_url}]",
151-
);
152-
153-
let encrypted_content = if is_multi_address(encrypted_dataset_url) {
154-
IPFS_GATEWAYS.iter().find_map(|gateway| {
155-
let full_url = format!("{gateway}{encrypted_dataset_url}");
156-
info!("Attempting to download dataset from {full_url}");
157-
158-
if let Some(content) = download_from_url(&full_url) {
159-
info!("Successfully downloaded from {full_url}");
160-
Some(content)
161-
} else {
162-
info!("Failed to download from {full_url}");
163-
None
164-
}
165-
})
166-
} else {
167-
download_from_url(encrypted_dataset_url)
168-
}
169-
.ok_or(ReplicateStatusCause::PreComputeDatasetDownloadFailed)?;
170-
171-
info!("Checking encrypted dataset checksum [chainTaskId:{chain_task_id}]");
172-
let expected_checksum: &str = &args.encrypted_dataset_checksum;
173-
let actual_checksum = sha256_from_bytes(&encrypted_content);
174-
175-
if actual_checksum != expected_checksum {
176-
error!(
177-
"Invalid dataset checksum [chainTaskId:{chain_task_id}, expected:{expected_checksum}, actual:{actual_checksum}]"
178-
);
179-
return Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum);
180-
}
181-
182-
info!("Dataset downloaded and verified successfully.");
183-
Ok(encrypted_content)
184-
}
185-
186-
/// Decrypts the provided encrypted dataset bytes using AES-CBC.
187-
///
188-
/// The first 16 bytes of `encrypted_content` are treated as the IV.
189-
/// The rest is the ciphertext. The decryption key is decoded from a Base64 string.
190-
///
191-
/// # Arguments
192-
///
193-
/// * `encrypted_content` - Full encrypted dataset, including the IV prefix.
194-
///
195-
/// # Returns
196-
///
197-
/// * `Ok(Vec<u8>)` containing the plaintext dataset if decryption succeeds.
198-
/// * `Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed)` if the key is missing, decoding fails, or decryption fails.
199-
fn decrypt_dataset(&self, encrypted_content: &[u8]) -> Result<Vec<u8>, ReplicateStatusCause> {
200-
let base64_key: &str = &self.pre_compute_args.encrypted_dataset_base64_key;
201-
202-
let key = general_purpose::STANDARD
203-
.decode(base64_key)
204-
.map_err(|_| ReplicateStatusCause::PreComputeDatasetDecryptionFailed)?;
205-
206-
if encrypted_content.len() < AES_IV_LENGTH || key.len() != AES_KEY_LENGTH {
207-
return Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed);
208-
}
209-
210-
let key_slice = &key[..AES_KEY_LENGTH];
211-
let iv_slice = &encrypted_content[..AES_IV_LENGTH];
212-
let ciphertext = &encrypted_content[AES_IV_LENGTH..];
213-
214-
Aes256CbcDec::new(key_slice.into(), iv_slice.into())
215-
.decrypt_padded_vec_mut::<Pkcs7>(ciphertext)
216-
.map_err(|_| ReplicateStatusCause::PreComputeDatasetDecryptionFailed)
217-
}
218-
219122
/// Saves the decrypted (plain) dataset to disk in the configured output directory.
220123
///
221124
/// The output filename is taken from `pre_compute_args.plain_dataset_filename`.
@@ -228,11 +131,14 @@ impl PreComputeAppTrait for PreComputeApp {
228131
///
229132
/// * `Ok(())` if the file is successfully saved.
230133
/// * `Err(ReplicateStatusCause::PreComputeSavingPlainDatasetFailed)` if the path is invalid or write fails.
231-
fn save_plain_dataset_file(&self, plain_dataset: &[u8]) -> Result<(), ReplicateStatusCause> {
134+
fn save_plain_dataset_file(
135+
&self,
136+
plain_dataset: &[u8],
137+
plain_dataset_filename: &str,
138+
) -> Result<(), ReplicateStatusCause> {
232139
let chain_task_id: &str = &self.chain_task_id;
233140
let args = &self.pre_compute_args;
234141
let output_dir: &str = &args.output_dir;
235-
let plain_dataset_filename: &str = &args.plain_dataset_filename;
236142

237143
let mut path = PathBuf::from(output_dir);
238144
path.push(plain_dataset_filename);
@@ -251,15 +157,13 @@ impl PreComputeAppTrait for PreComputeApp {
251157
}
252158
}
253159

254-
fn is_multi_address(uri: &str) -> bool {
255-
!uri.trim().is_empty() && Multiaddr::from_str(uri).is_ok()
256-
}
257-
258160
#[cfg(test)]
259161
mod tests {
162+
use std::fs;
163+
260164
use super::*;
165+
use crate::compute::dataset::Dataset;
261166
use crate::compute::pre_compute_args::PreComputeArgs;
262-
use std::fs;
263167
use tempfile::TempDir;
264168
use testcontainers::core::WaitFor;
265169
use testcontainers::runners::SyncRunner;
@@ -270,7 +174,6 @@ mod tests {
270174
"0x02a12ef127dcfbdb294a090c8f0b69a0ca30b7940fc36cabf971f488efd374d7";
271175
const ENCRYPTED_DATASET_KEY: &str = "ubA6H9emVPJT91/flYAmnKHC0phSV3cfuqsLxQfgow0=";
272176
const HTTP_DATASET_URL: &str = "https://raw.githubusercontent.com/iExecBlockchainComputing/tee-worker-pre-compute-rust/main/src/tests_resources/encrypted-data.bin";
273-
const IPFS_DATASET_URL: &str = "/ipfs/QmUVhChbLFiuzNK1g2GsWyWEiad7SXPqARnWzGumgziwEp";
274177
const PLAIN_DATA_FILE: &str = "plain-data.txt";
275178

276179
fn get_pre_compute_app(
@@ -284,10 +187,13 @@ mod tests {
284187
input_files: urls.into_iter().map(String::from).collect(),
285188
output_dir: output_dir.to_string(),
286189
is_dataset_required: true,
287-
encrypted_dataset_url: HTTP_DATASET_URL.to_string(),
288-
encrypted_dataset_base64_key: ENCRYPTED_DATASET_KEY.to_string(),
289-
encrypted_dataset_checksum: DATASET_CHECKSUM.to_string(),
290-
plain_dataset_filename: PLAIN_DATA_FILE.to_string(),
190+
bulk_size: 0,
191+
datasets: vec![Dataset {
192+
url: HTTP_DATASET_URL.to_string(),
193+
checksum: DATASET_CHECKSUM.to_string(),
194+
filename: PLAIN_DATA_FILE.to_string(),
195+
key: ENCRYPTED_DATASET_KEY.to_string(),
196+
}],
291197
},
292198
}
293199
}
@@ -422,84 +328,6 @@ mod tests {
422328
}
423329
// endregion
424330

425-
// region download_encrypted_dataset
426-
#[test]
427-
fn download_encrypted_dataset_success_with_valid_dataset_url() {
428-
let app = get_pre_compute_app(CHAIN_TASK_ID, vec![], "");
429-
430-
let actual_content = app.download_encrypted_dataset();
431-
let expected_content = download_from_url(HTTP_DATASET_URL)
432-
.ok_or(ReplicateStatusCause::PreComputeDatasetDownloadFailed);
433-
assert_eq!(actual_content, expected_content);
434-
}
435-
436-
#[test]
437-
fn download_encrypted_dataset_failure_with_invalid_dataset_url() {
438-
let mut app = get_pre_compute_app(CHAIN_TASK_ID, vec![], "");
439-
app.pre_compute_args.encrypted_dataset_url = "http://bad-url".to_string();
440-
let actual_content = app.download_encrypted_dataset();
441-
assert_eq!(
442-
actual_content,
443-
Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed)
444-
);
445-
}
446-
447-
#[test]
448-
fn download_encrypted_dataset_success_with_valid_iexec_gateway() {
449-
let mut app = get_pre_compute_app(CHAIN_TASK_ID, vec![], "");
450-
app.pre_compute_args.encrypted_dataset_url = IPFS_DATASET_URL.to_string();
451-
app.pre_compute_args.encrypted_dataset_checksum =
452-
"0x323b1637c7999942fbebfe5d42fe15dbfe93737577663afa0181938d7ad4a2ac".to_string();
453-
let actual_content = app.download_encrypted_dataset();
454-
let expected_content = Ok("hello world !\n".as_bytes().to_vec());
455-
assert_eq!(actual_content, expected_content);
456-
}
457-
458-
#[test]
459-
fn download_encrypted_dataset_failure_with_invalid_gateway() {
460-
let mut app = get_pre_compute_app(CHAIN_TASK_ID, vec![], "");
461-
app.pre_compute_args.encrypted_dataset_url = "/ipfs/INVALID_IPFS_DATASET_URL".to_string();
462-
let actual_content = app.download_encrypted_dataset();
463-
let expected_content = Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed);
464-
assert_eq!(actual_content, expected_content);
465-
}
466-
467-
#[test]
468-
fn download_encrypted_dataset_failure_with_invalid_dataset_checksum() {
469-
let mut app = get_pre_compute_app(CHAIN_TASK_ID, vec![], "");
470-
app.pre_compute_args.encrypted_dataset_checksum = "invalid_dataset_checksum".to_string();
471-
let actual_content = app.download_encrypted_dataset();
472-
let expected_content = Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum);
473-
assert_eq!(actual_content, expected_content);
474-
}
475-
// endregion
476-
477-
// region decrypt_dataset
478-
#[test]
479-
fn decrypt_dataset_success_with_valid_dataset() {
480-
let app = get_pre_compute_app(CHAIN_TASK_ID, vec![], "");
481-
482-
let encrypted_data = app.download_encrypted_dataset().unwrap();
483-
let expected_plain_data = Ok("Some very useful data.".as_bytes().to_vec());
484-
let actual_plain_data = app.decrypt_dataset(&encrypted_data);
485-
486-
assert_eq!(actual_plain_data, expected_plain_data);
487-
}
488-
489-
#[test]
490-
fn decrypt_dataset_failure_with_bad_key() {
491-
let mut app = get_pre_compute_app(CHAIN_TASK_ID, vec![], "");
492-
app.pre_compute_args.encrypted_dataset_base64_key = "bad_key".to_string();
493-
let encrypted_data = app.download_encrypted_dataset().unwrap();
494-
let actual_plain_data = app.decrypt_dataset(&encrypted_data);
495-
496-
assert_eq!(
497-
actual_plain_data,
498-
Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed)
499-
);
500-
}
501-
// endregion
502-
503331
// region save_plain_dataset_file
504332
#[test]
505333
fn save_plain_dataset_file_success_with_valid_output_dir() {
@@ -509,7 +337,7 @@ mod tests {
509337
let app = get_pre_compute_app(CHAIN_TASK_ID, vec![], output_path);
510338

511339
let plain_dataset = "Some very useful data.".as_bytes().to_vec();
512-
let saved_dataset = app.save_plain_dataset_file(&plain_dataset);
340+
let saved_dataset = app.save_plain_dataset_file(&plain_dataset, PLAIN_DATA_FILE);
513341

514342
assert!(saved_dataset.is_ok());
515343

@@ -532,10 +360,10 @@ mod tests {
532360
let temp_dir = TempDir::new().unwrap();
533361
let output_path = temp_dir.path().to_str().unwrap();
534362

535-
let mut app = get_pre_compute_app(CHAIN_TASK_ID, vec![], output_path);
536-
app.pre_compute_args.plain_dataset_filename = "/some-folder-123/not-found".to_string();
363+
let app = get_pre_compute_app(CHAIN_TASK_ID, vec![], output_path);
537364
let plain_dataset = "Some very useful data.".as_bytes().to_vec();
538-
let saved_dataset = app.save_plain_dataset_file(&plain_dataset);
365+
let saved_dataset =
366+
app.save_plain_dataset_file(&plain_dataset, "/some-folder-123/not-found");
539367

540368
assert_eq!(
541369
saved_dataset,

0 commit comments

Comments
 (0)