Skip to content

Commit 3234590

Browse files
feat: add dataset struct
1 parent d64ab45 commit 3234590

File tree

1 file changed

+241
-0
lines changed

1 file changed

+241
-0
lines changed

pre-compute/src/compute/dataset.rs

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
use crate::compute::errors::ReplicateStatusCause;
2+
use crate::compute::utils::file_utils::download_from_url;
3+
use crate::compute::utils::hash_utils::sha256_from_bytes;
4+
use aes::Aes256;
5+
use base64::{Engine as _, engine::general_purpose};
6+
use cbc::{
7+
Decryptor,
8+
cipher::{BlockDecryptMut, KeyIvInit, block_padding::Pkcs7},
9+
};
10+
use log::{error, info};
11+
use multiaddr::Multiaddr;
12+
use std::str::FromStr;
13+
14+
type Aes256CbcDec = Decryptor<Aes256>;
15+
const IPFS_GATEWAYS: &[&str] = &[
16+
"https://ipfs-gateway.v8-bellecour.iex.ec",
17+
"https://gateway.ipfs.io",
18+
"https://gateway.pinata.cloud",
19+
];
20+
const AES_KEY_LENGTH: usize = 32;
21+
const AES_IV_LENGTH: usize = 16;
22+
23+
/// Represents a dataset for bulk processing in a Trusted Execution Environment (TEE).
24+
///
25+
/// This structure contains all the information needed to download, verify, and decrypt
26+
/// a single dataset as part of bulk processing.
27+
#[cfg_attr(test, derive(Debug))]
28+
#[derive(Clone, Default)]
29+
pub struct Dataset {
30+
pub url: String,
31+
pub checksum: String,
32+
pub filename: String,
33+
pub key: String,
34+
}
35+
36+
impl Dataset {
37+
/// Creates a new Dataset instance.
38+
///
39+
/// # Arguments
40+
///
41+
/// * `url` - The dataset URL, can be IPFS multiaddr
42+
/// * `checksum` - The dataset checksum for verification
43+
/// * `filename` - The filename to use when saving the dataset locally
44+
/// * `key` - The dataset decryption key (base64 encoded)
45+
pub fn new(url: String, checksum: String, filename: String, key: String) -> Self {
46+
Dataset {
47+
url,
48+
checksum,
49+
filename,
50+
key,
51+
}
52+
}
53+
54+
/// Downloads the encrypted dataset file from a URL or IPFS multi-address, and verifies its checksum.
55+
///
56+
/// # Arguments
57+
///
58+
/// * `chain_task_id` - The chain task ID for logging
59+
///
60+
/// # Returns
61+
///
62+
/// * `Ok(Vec<u8>)` containing the dataset's encrypted content if download and verification succeed.
63+
/// * `Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed)` if the download fails.
64+
/// * `Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum)` if checksum validation fails.
65+
pub fn download_encrypted_dataset(
66+
&self,
67+
chain_task_id: &str,
68+
) -> Result<Vec<u8>, ReplicateStatusCause> {
69+
info!(
70+
"Downloading encrypted dataset file [chainTaskId:{chain_task_id}, url:{}]",
71+
self.url
72+
);
73+
74+
let encrypted_content = if is_multi_address(&self.url) {
75+
IPFS_GATEWAYS.iter().find_map(|gateway| {
76+
let full_url = format!("{gateway}{}", self.url);
77+
info!("Attempting to download dataset from {full_url}");
78+
79+
if let Some(content) = download_from_url(&full_url) {
80+
info!("Successfully downloaded from {full_url}");
81+
Some(content)
82+
} else {
83+
info!("Failed to download from {full_url}");
84+
None
85+
}
86+
})
87+
} else {
88+
download_from_url(&self.url)
89+
}
90+
.ok_or(ReplicateStatusCause::PreComputeDatasetDownloadFailed)?;
91+
92+
info!("Checking encrypted dataset checksum [chainTaskId:{chain_task_id}]");
93+
let actual_checksum = sha256_from_bytes(&encrypted_content);
94+
95+
if actual_checksum != self.checksum {
96+
error!(
97+
"Invalid dataset checksum [chainTaskId:{chain_task_id}, expected:{}, actual:{actual_checksum}]",
98+
self.checksum
99+
);
100+
return Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum);
101+
}
102+
103+
info!("Dataset downloaded and verified successfully.");
104+
Ok(encrypted_content)
105+
}
106+
107+
/// Decrypts the provided encrypted dataset bytes using AES-CBC.
108+
///
109+
/// The first 16 bytes of `encrypted_content` are treated as the IV.
110+
/// The rest is the ciphertext. The decryption key is decoded from a Base64 string.
111+
///
112+
/// # Arguments
113+
///
114+
/// * `encrypted_content` - Full encrypted dataset, including the IV prefix.
115+
///
116+
/// # Returns
117+
///
118+
/// * `Ok(Vec<u8>)` containing the plaintext dataset if decryption succeeds.
119+
/// * `Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed)` if the key is missing, decoding fails, or decryption fails.
120+
pub fn decrypt_dataset(
121+
&self,
122+
encrypted_content: &[u8],
123+
) -> Result<Vec<u8>, ReplicateStatusCause> {
124+
let key = general_purpose::STANDARD
125+
.decode(&self.key)
126+
.map_err(|_| ReplicateStatusCause::PreComputeDatasetDecryptionFailed)?;
127+
128+
if encrypted_content.len() < AES_IV_LENGTH || key.len() != AES_KEY_LENGTH {
129+
return Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed);
130+
}
131+
132+
let key_slice = &key[..AES_KEY_LENGTH];
133+
let iv_slice = &encrypted_content[..AES_IV_LENGTH];
134+
let ciphertext = &encrypted_content[AES_IV_LENGTH..];
135+
136+
Aes256CbcDec::new(key_slice.into(), iv_slice.into())
137+
.decrypt_padded_vec_mut::<Pkcs7>(ciphertext)
138+
.map_err(|_| ReplicateStatusCause::PreComputeDatasetDecryptionFailed)
139+
}
140+
}
141+
142+
fn is_multi_address(uri: &str) -> bool {
143+
!uri.trim().is_empty() && Multiaddr::from_str(uri).is_ok()
144+
}
145+
146+
#[cfg(test)]
147+
mod tests {
148+
use super::*;
149+
150+
const CHAIN_TASK_ID: &str = "0x123456789abcdef";
151+
const DATASET_CHECKSUM: &str =
152+
"0x02a12ef127dcfbdb294a090c8f0b69a0ca30b7940fc36cabf971f488efd374d7";
153+
const ENCRYPTED_DATASET_KEY: &str = "ubA6H9emVPJT91/flYAmnKHC0phSV3cfuqsLxQfgow0=";
154+
const HTTP_DATASET_URL: &str = "https://raw.githubusercontent.com/iExecBlockchainComputing/tee-worker-pre-compute-rust/main/src/tests_resources/encrypted-data.bin";
155+
const PLAIN_DATA_FILE: &str = "plain-data.txt";
156+
const IPFS_DATASET_URL: &str = "/ipfs/QmUVhChbLFiuzNK1g2GsWyWEiad7SXPqARnWzGumgziwEp";
157+
158+
fn get_test_dataset() -> Dataset {
159+
Dataset::new(
160+
HTTP_DATASET_URL.to_string(),
161+
DATASET_CHECKSUM.to_string(),
162+
PLAIN_DATA_FILE.to_string(),
163+
ENCRYPTED_DATASET_KEY.to_string(),
164+
)
165+
}
166+
167+
// region download_encrypted_dataset
168+
#[test]
169+
fn download_encrypted_dataset_success() {
170+
let dataset = get_test_dataset();
171+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
172+
assert!(actual_content.is_ok());
173+
}
174+
175+
#[test]
176+
fn download_encrypted_dataset_failure_with_invalid_dataset_url() {
177+
let mut dataset = get_test_dataset();
178+
dataset.url = "http://bad-url".to_string();
179+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
180+
assert_eq!(
181+
actual_content,
182+
Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed)
183+
);
184+
}
185+
186+
#[test]
187+
fn download_encrypted_dataset_success_with_valid_iexec_gateway() {
188+
let mut dataset = get_test_dataset();
189+
dataset.url = IPFS_DATASET_URL.to_string();
190+
dataset.checksum =
191+
"0x323b1637c7999942fbebfe5d42fe15dbfe93737577663afa0181938d7ad4a2ac".to_string();
192+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
193+
let expected_content = Ok("hello world !\n".as_bytes().to_vec());
194+
assert_eq!(actual_content, expected_content);
195+
}
196+
197+
#[test]
198+
fn download_encrypted_dataset_failure_with_invalid_gateway() {
199+
let mut dataset = get_test_dataset();
200+
dataset.url = "/ipfs/INVALID_IPFS_DATASET_URL".to_string();
201+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
202+
let expected_content = Err(ReplicateStatusCause::PreComputeDatasetDownloadFailed);
203+
assert_eq!(actual_content, expected_content);
204+
}
205+
206+
#[test]
207+
fn download_encrypted_dataset_failure_with_invalid_dataset_checksum() {
208+
let mut dataset = get_test_dataset();
209+
dataset.checksum = "invalid_dataset_checksum".to_string();
210+
let actual_content = dataset.download_encrypted_dataset(CHAIN_TASK_ID);
211+
let expected_content = Err(ReplicateStatusCause::PreComputeInvalidDatasetChecksum);
212+
assert_eq!(actual_content, expected_content);
213+
}
214+
// endregion
215+
216+
// region decrypt_dataset
217+
#[test]
218+
fn decrypt_dataset_success_with_valid_dataset() {
219+
let dataset = get_test_dataset();
220+
221+
let encrypted_data = dataset.download_encrypted_dataset(CHAIN_TASK_ID).unwrap();
222+
let expected_plain_data = Ok("Some very useful data.".as_bytes().to_vec());
223+
let actual_plain_data = dataset.decrypt_dataset(&encrypted_data);
224+
225+
assert_eq!(actual_plain_data, expected_plain_data);
226+
}
227+
228+
#[test]
229+
fn decrypt_dataset_failure_with_bad_key() {
230+
let mut dataset = get_test_dataset();
231+
dataset.key = "bad_key".to_string();
232+
let encrypted_data = dataset.download_encrypted_dataset(CHAIN_TASK_ID).unwrap();
233+
let actual_plain_data = dataset.decrypt_dataset(&encrypted_data);
234+
235+
assert_eq!(
236+
actual_plain_data,
237+
Err(ReplicateStatusCause::PreComputeDatasetDecryptionFailed)
238+
);
239+
}
240+
// endregion
241+
}

0 commit comments

Comments
 (0)