|
| 1 | +use std::collections::HashMap; |
| 2 | +use std::sync::Arc; |
| 3 | + |
| 4 | +use bytes::Bytes; |
| 5 | +use cas_object::SerializedCasObject; |
| 6 | +use cas_types::FileRange; |
| 7 | +use mdb_shard::file_structs::MDBFileInfo; |
| 8 | +use merklehash::MerkleHash; |
| 9 | +use progress_tracking::item_tracking::SingleItemProgressUpdater; |
| 10 | +use progress_tracking::upload_tracking::CompletionTracker; |
| 11 | + |
| 12 | +use crate::error::Result; |
1 | 13 | #[cfg(not(target_family = "wasm"))] |
2 | | -use mdb_shard::shard_file_reconstructor::FileReconstructor; |
3 | | - |
4 | | -#[cfg(not(target_family = "wasm"))] |
5 | | -use crate::CasClientError; |
| 14 | +use crate::OutputProvider; |
6 | 15 |
|
7 | 16 | /// A Client to the Shard service. The shard service |
8 | 17 | /// provides for |
9 | 18 | /// 1. upload shard to the shard service |
10 | 19 | /// 2. querying of file->reconstruction information |
11 | 20 | /// 3. querying of chunk->shard information |
12 | | -#[cfg(not(target_family = "wasm"))] |
13 | | -pub trait ShardClientInterface: |
14 | | - RegistrationClient + FileReconstructor<CasClientError> + ShardDedupProber + Send + Sync |
15 | | -{ |
16 | | -} |
17 | | -// In webassembly environment ShardClientInterface does not include FileReconstructor |
18 | | -// and does not require Send/Sync'ness |
19 | | -#[cfg(target_family = "wasm")] |
20 | | -pub trait ShardClientInterface: RegistrationClient + ShardDedupProber {} |
21 | | - |
22 | | -#[cfg(not(target_family = "wasm"))] |
23 | | -pub trait Client: UploadClient + ReconstructionClient + ShardClientInterface {} |
24 | | -// in webassembly environment the general "Client" interface does not support the |
25 | | -// ReconstructionClient download interface. |
26 | | -#[cfg(target_family = "wasm")] |
27 | | -pub trait Client: UploadClient + ShardClientInterface {} |
28 | | - |
29 | | -// interfaces used on the download path, primarily relating to fetching and using file reconstructions |
30 | | -// to download file data. |
31 | | -// not enabled in webassembly |
32 | | -#[cfg(not(target_family = "wasm"))] |
33 | | -mod download { |
34 | | - use std::collections::HashMap; |
35 | | - use std::sync::Arc; |
36 | | - |
37 | | - use cas_types::{FileRange, QueryReconstructionResponse}; |
38 | | - use merklehash::MerkleHash; |
39 | | - use progress_tracking::item_tracking::SingleItemProgressUpdater; |
40 | | - |
41 | | - use crate::error::Result; |
42 | | - use crate::output_provider::OutputProvider; |
43 | | - |
44 | | - /// A Client to the CAS (Content Addressed Storage) service to allow reconstructing a |
45 | | - /// pointer file based on FileID (MerkleHash). |
| 21 | +#[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] |
| 22 | +#[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] |
| 23 | +pub trait Client { |
| 24 | + /// Get an entire file by file hash with an optional bytes range. |
46 | 25 | /// |
47 | | - /// To simplify this crate, it is intentional that the client does not create its own http_client or |
48 | | - /// spawn its own threads. Instead, it is expected to be given the parallelism harness/threadpool/queue |
49 | | - /// on which it is expected to run. This allows the caller to better optimize overall system utilization |
50 | | - /// by controlling the number of concurrent requests. |
51 | | - #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] |
52 | | - #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] |
53 | | - pub trait ReconstructionClient { |
54 | | - /// Get an entire file by file hash with an optional bytes range. |
55 | | - /// |
56 | | - /// The http_client passed in is a non-authenticated client. This is used to directly communicate |
57 | | - /// with the backing store (S3) to retrieve xorbs. |
58 | | - async fn get_file( |
59 | | - &self, |
60 | | - hash: &MerkleHash, |
61 | | - byte_range: Option<FileRange>, |
62 | | - output_provider: &OutputProvider, |
63 | | - progress_updater: Option<Arc<SingleItemProgressUpdater>>, |
64 | | - ) -> Result<u64>; |
65 | | - |
66 | | - async fn batch_get_file(&self, files: HashMap<MerkleHash, &OutputProvider>) -> Result<u64> { |
67 | | - let mut n_bytes = 0; |
68 | | - // Provide the basic naive implementation as a default. |
69 | | - for (h, w) in files { |
70 | | - n_bytes += self.get_file(&h, None, w, None).await?; |
71 | | - } |
72 | | - Ok(n_bytes) |
| 26 | + /// The http_client passed in is a non-authenticated client. This is used to directly communicate |
| 27 | + /// with the backing store (S3) to retrieve xorbs. |
| 28 | + #[cfg(not(target_family = "wasm"))] |
| 29 | + async fn get_file( |
| 30 | + &self, |
| 31 | + hash: &MerkleHash, |
| 32 | + byte_range: Option<FileRange>, |
| 33 | + output_provider: &OutputProvider, |
| 34 | + progress_updater: Option<Arc<SingleItemProgressUpdater>>, |
| 35 | + ) -> Result<u64>; |
| 36 | + |
| 37 | + #[cfg(not(target_family = "wasm"))] |
| 38 | + async fn batch_get_file(&self, files: HashMap<MerkleHash, &OutputProvider>) -> Result<u64> { |
| 39 | + let mut n_bytes = 0; |
| 40 | + // Provide the basic naive implementation as a default. |
| 41 | + for (h, w) in files { |
| 42 | + n_bytes += self.get_file(&h, None, w, None).await?; |
73 | 43 | } |
| 44 | + Ok(n_bytes) |
74 | 45 | } |
75 | 46 |
|
76 | | - /// A Client to the CAS (Content Addressed Storage) service that is able to obtain |
77 | | - /// the reconstruction info of a file by FileID (MerkleHash). Return |
78 | | - /// - Ok(Some(response)) if the query succeeded, |
79 | | - /// - Ok(None) if the specified range can't be satisfied, |
80 | | - /// - Err(e) for other errors. |
81 | | - #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] |
82 | | - #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] |
83 | | - pub trait Reconstruct { |
84 | | - async fn get_reconstruction( |
85 | | - &self, |
86 | | - hash: &MerkleHash, |
87 | | - byte_range: Option<FileRange>, |
88 | | - ) -> Result<Option<QueryReconstructionResponse>>; |
89 | | - } |
90 | | -} |
91 | | - |
92 | | -// upload interfaces for operations required for the upload path |
93 | | -// including global deduplication |
94 | | -// enabled in standard build environments and web assembly |
95 | | -mod upload { |
96 | | - use std::sync::Arc; |
97 | | - |
98 | | - use cas_object::SerializedCasObject; |
99 | | - use merklehash::MerkleHash; |
100 | | - use progress_tracking::upload_tracking::CompletionTracker; |
101 | | - |
102 | | - use crate::error::Result; |
103 | | - |
104 | | - #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] |
105 | | - #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] |
106 | | - pub trait RegistrationClient { |
107 | | - async fn upload_shard( |
108 | | - &self, |
109 | | - prefix: &str, |
110 | | - hash: &MerkleHash, |
111 | | - force_sync: bool, |
112 | | - shard_data: bytes::Bytes, |
113 | | - salt: &[u8; 32], |
114 | | - ) -> Result<bool>; |
115 | | - } |
116 | | - |
117 | | - /// Probes for shards that provide dedup information for a chunk, and, if |
118 | | - /// any are found, writes them to disk and returns the path. |
119 | | - #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] |
120 | | - #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] |
121 | | - pub trait ShardDedupProber { |
122 | | - #[cfg(not(target_family = "wasm"))] |
123 | | - async fn query_for_global_dedup_shard( |
124 | | - &self, |
125 | | - prefix: &str, |
126 | | - chunk_hash: &MerkleHash, |
127 | | - salt: &[u8; 32], |
128 | | - ) -> Result<Option<std::path::PathBuf>>; |
129 | | - |
130 | | - async fn query_for_global_dedup_shard_in_memory( |
131 | | - &self, |
132 | | - prefix: &str, |
133 | | - chunk_hash: &MerkleHash, |
134 | | - salt: &[u8; 32], |
135 | | - ) -> Result<Option<Vec<u8>>>; |
136 | | - } |
137 | | - |
138 | | - /// A Client to the CAS (Content Addressed Storage) service to allow storage and |
139 | | - /// management of XORBs (Xet Object Remote Block). A XORB represents a collection |
140 | | - /// of arbitrary bytes. These bytes are hashed according to a Xet Merkle Hash |
141 | | - /// producing a Merkle Tree. XORBs in the CAS are identified by a combination of |
142 | | - /// a prefix namespacing the XORB and the hash at the root of the Merkle Tree. |
143 | | - #[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)] |
144 | | - #[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))] |
145 | | - pub trait UploadClient { |
146 | | - /// Insert a serialized XORB into the CAS, returning the number of bytes read. |
147 | | - async fn upload_xorb( |
148 | | - &self, |
149 | | - prefix: &str, |
150 | | - serialized_cas_object: SerializedCasObject, |
151 | | - upload_tracker: Option<Arc<CompletionTracker>>, |
152 | | - ) -> Result<u64>; |
153 | | - |
154 | | - /// Check if a XORB already exists. |
155 | | - async fn exists(&self, prefix: &str, hash: &MerkleHash) -> Result<bool>; |
156 | | - |
157 | | - /// Indicates if the serialized cas object should have a written footer. |
158 | | - /// This should only be true for testing with LocalClient. |
159 | | - fn use_xorb_footer(&self) -> bool; |
160 | | - } |
| 47 | + async fn get_file_reconstruction_info( |
| 48 | + &self, |
| 49 | + file_hash: &MerkleHash, |
| 50 | + ) -> Result<Option<(MDBFileInfo, Option<MerkleHash>)>>; |
| 51 | + |
| 52 | + async fn query_for_global_dedup_shard( |
| 53 | + &self, |
| 54 | + prefix: &str, |
| 55 | + chunk_hash: &MerkleHash, |
| 56 | + salt: &[u8; 32], |
| 57 | + ) -> Result<Option<Bytes>>; |
| 58 | + |
| 59 | + /// Upload a new shard. |
| 60 | + async fn upload_shard( |
| 61 | + &self, |
| 62 | + prefix: &str, |
| 63 | + hash: &MerkleHash, |
| 64 | + force_sync: bool, |
| 65 | + shard_data: bytes::Bytes, |
| 66 | + salt: &[u8; 32], |
| 67 | + ) -> Result<bool>; |
| 68 | + |
| 69 | + /// Upload a new xorb. |
| 70 | + async fn upload_xorb( |
| 71 | + &self, |
| 72 | + prefix: &str, |
| 73 | + serialized_cas_object: SerializedCasObject, |
| 74 | + upload_tracker: Option<Arc<CompletionTracker>>, |
| 75 | + ) -> Result<u64>; |
| 76 | + |
| 77 | + /// Check if a XORB already exists. |
| 78 | + async fn exists(&self, prefix: &str, hash: &MerkleHash) -> Result<bool>; |
| 79 | + |
| 80 | + /// Indicates if the serialized cas object should have a written footer. |
| 81 | + /// This should only be true for testing with LocalClient. |
| 82 | + fn use_xorb_footer(&self) -> bool; |
161 | 83 | } |
162 | | - |
163 | | -// export out interfaces to be referred to directly out of the interface sub-crate |
164 | | -// users of cas_client interface are unaware of the module level separation between download/upload. |
165 | | -#[cfg(not(target_family = "wasm"))] |
166 | | -pub use download::*; |
167 | | -pub use upload::*; |
0 commit comments