Skip to content

Commit 225f4b0

Browse files
authored
Simplified Client interface. (#408)
Currently, the Client trait has numerous small traits underneath it, but only the umbrella Client trait is ever used. This PR simplifies this interface by dropping the unneeded fine-grained traits. It also consolidates the multiple routes for the global dedup query into a single function that returns an in-memory shard to further reduce the complexity. There should be no functionality change, just code moving around and trait simplification.
1 parent 8fbe968 commit 225f4b0

File tree

11 files changed

+416
-602
lines changed

11 files changed

+416
-602
lines changed

cas_client/src/interface.rs

Lines changed: 71 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -1,167 +1,83 @@
1+
use std::collections::HashMap;
2+
use std::sync::Arc;
3+
4+
use bytes::Bytes;
5+
use cas_object::SerializedCasObject;
6+
use cas_types::FileRange;
7+
use mdb_shard::file_structs::MDBFileInfo;
8+
use merklehash::MerkleHash;
9+
use progress_tracking::item_tracking::SingleItemProgressUpdater;
10+
use progress_tracking::upload_tracking::CompletionTracker;
11+
12+
use crate::error::Result;
113
#[cfg(not(target_family = "wasm"))]
2-
use mdb_shard::shard_file_reconstructor::FileReconstructor;
3-
4-
#[cfg(not(target_family = "wasm"))]
5-
use crate::CasClientError;
14+
use crate::OutputProvider;
615

716
/// A Client to the Shard service. The shard service
817
/// provides for
918
/// 1. upload shard to the shard service
1019
/// 2. querying of file->reconstruction information
1120
/// 3. querying of chunk->shard information
12-
#[cfg(not(target_family = "wasm"))]
13-
pub trait ShardClientInterface:
14-
RegistrationClient + FileReconstructor<CasClientError> + ShardDedupProber + Send + Sync
15-
{
16-
}
17-
// In webassembly environment ShardClientInterface does not include FileReconstructor
18-
// and does not require Send/Sync'ness
19-
#[cfg(target_family = "wasm")]
20-
pub trait ShardClientInterface: RegistrationClient + ShardDedupProber {}
21-
22-
#[cfg(not(target_family = "wasm"))]
23-
pub trait Client: UploadClient + ReconstructionClient + ShardClientInterface {}
24-
// in webassembly environment the general "Client" interface does not support the
25-
// ReconstructionClient download interface.
26-
#[cfg(target_family = "wasm")]
27-
pub trait Client: UploadClient + ShardClientInterface {}
28-
29-
// interfaces used on the download path, primarily relating to fetching and using file reconstructions
30-
// to download file data.
31-
// not enabled in webassembly
32-
#[cfg(not(target_family = "wasm"))]
33-
mod download {
34-
use std::collections::HashMap;
35-
use std::sync::Arc;
36-
37-
use cas_types::{FileRange, QueryReconstructionResponse};
38-
use merklehash::MerkleHash;
39-
use progress_tracking::item_tracking::SingleItemProgressUpdater;
40-
41-
use crate::error::Result;
42-
use crate::output_provider::OutputProvider;
43-
44-
/// A Client to the CAS (Content Addressed Storage) service to allow reconstructing a
45-
/// pointer file based on FileID (MerkleHash).
21+
#[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)]
22+
#[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))]
23+
pub trait Client {
24+
/// Get an entire file by file hash with an optional bytes range.
4625
///
47-
/// To simplify this crate, it is intentional that the client does not create its own http_client or
48-
/// spawn its own threads. Instead, it is expected to be given the parallelism harness/threadpool/queue
49-
/// on which it is expected to run. This allows the caller to better optimize overall system utilization
50-
/// by controlling the number of concurrent requests.
51-
#[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)]
52-
#[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))]
53-
pub trait ReconstructionClient {
54-
/// Get an entire file by file hash with an optional bytes range.
55-
///
56-
/// The http_client passed in is a non-authenticated client. This is used to directly communicate
57-
/// with the backing store (S3) to retrieve xorbs.
58-
async fn get_file(
59-
&self,
60-
hash: &MerkleHash,
61-
byte_range: Option<FileRange>,
62-
output_provider: &OutputProvider,
63-
progress_updater: Option<Arc<SingleItemProgressUpdater>>,
64-
) -> Result<u64>;
65-
66-
async fn batch_get_file(&self, files: HashMap<MerkleHash, &OutputProvider>) -> Result<u64> {
67-
let mut n_bytes = 0;
68-
// Provide the basic naive implementation as a default.
69-
for (h, w) in files {
70-
n_bytes += self.get_file(&h, None, w, None).await?;
71-
}
72-
Ok(n_bytes)
26+
/// The http_client passed in is a non-authenticated client. This is used to directly communicate
27+
/// with the backing store (S3) to retrieve xorbs.
28+
#[cfg(not(target_family = "wasm"))]
29+
async fn get_file(
30+
&self,
31+
hash: &MerkleHash,
32+
byte_range: Option<FileRange>,
33+
output_provider: &OutputProvider,
34+
progress_updater: Option<Arc<SingleItemProgressUpdater>>,
35+
) -> Result<u64>;
36+
37+
#[cfg(not(target_family = "wasm"))]
38+
async fn batch_get_file(&self, files: HashMap<MerkleHash, &OutputProvider>) -> Result<u64> {
39+
let mut n_bytes = 0;
40+
// Provide the basic naive implementation as a default.
41+
for (h, w) in files {
42+
n_bytes += self.get_file(&h, None, w, None).await?;
7343
}
44+
Ok(n_bytes)
7445
}
7546

76-
/// A Client to the CAS (Content Addressed Storage) service that is able to obtain
77-
/// the reconstruction info of a file by FileID (MerkleHash). Return
78-
/// - Ok(Some(response)) if the query succeeded,
79-
/// - Ok(None) if the specified range can't be satisfied,
80-
/// - Err(e) for other errors.
81-
#[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)]
82-
#[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))]
83-
pub trait Reconstruct {
84-
async fn get_reconstruction(
85-
&self,
86-
hash: &MerkleHash,
87-
byte_range: Option<FileRange>,
88-
) -> Result<Option<QueryReconstructionResponse>>;
89-
}
90-
}
91-
92-
// upload interfaces for operations required for the upload path
93-
// including global deduplication
94-
// enabled in standard build environments and web assembly
95-
mod upload {
96-
use std::sync::Arc;
97-
98-
use cas_object::SerializedCasObject;
99-
use merklehash::MerkleHash;
100-
use progress_tracking::upload_tracking::CompletionTracker;
101-
102-
use crate::error::Result;
103-
104-
#[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)]
105-
#[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))]
106-
pub trait RegistrationClient {
107-
async fn upload_shard(
108-
&self,
109-
prefix: &str,
110-
hash: &MerkleHash,
111-
force_sync: bool,
112-
shard_data: bytes::Bytes,
113-
salt: &[u8; 32],
114-
) -> Result<bool>;
115-
}
116-
117-
/// Probes for shards that provide dedup information for a chunk, and, if
118-
/// any are found, writes them to disk and returns the path.
119-
#[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)]
120-
#[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))]
121-
pub trait ShardDedupProber {
122-
#[cfg(not(target_family = "wasm"))]
123-
async fn query_for_global_dedup_shard(
124-
&self,
125-
prefix: &str,
126-
chunk_hash: &MerkleHash,
127-
salt: &[u8; 32],
128-
) -> Result<Option<std::path::PathBuf>>;
129-
130-
async fn query_for_global_dedup_shard_in_memory(
131-
&self,
132-
prefix: &str,
133-
chunk_hash: &MerkleHash,
134-
salt: &[u8; 32],
135-
) -> Result<Option<Vec<u8>>>;
136-
}
137-
138-
/// A Client to the CAS (Content Addressed Storage) service to allow storage and
139-
/// management of XORBs (Xet Object Remote Block). A XORB represents a collection
140-
/// of arbitrary bytes. These bytes are hashed according to a Xet Merkle Hash
141-
/// producing a Merkle Tree. XORBs in the CAS are identified by a combination of
142-
/// a prefix namespacing the XORB and the hash at the root of the Merkle Tree.
143-
#[cfg_attr(not(target_family = "wasm"), async_trait::async_trait)]
144-
#[cfg_attr(target_family = "wasm", async_trait::async_trait(?Send))]
145-
pub trait UploadClient {
146-
/// Insert a serialized XORB into the CAS, returning the number of bytes read.
147-
async fn upload_xorb(
148-
&self,
149-
prefix: &str,
150-
serialized_cas_object: SerializedCasObject,
151-
upload_tracker: Option<Arc<CompletionTracker>>,
152-
) -> Result<u64>;
153-
154-
/// Check if a XORB already exists.
155-
async fn exists(&self, prefix: &str, hash: &MerkleHash) -> Result<bool>;
156-
157-
/// Indicates if the serialized cas object should have a written footer.
158-
/// This should only be true for testing with LocalClient.
159-
fn use_xorb_footer(&self) -> bool;
160-
}
47+
async fn get_file_reconstruction_info(
48+
&self,
49+
file_hash: &MerkleHash,
50+
) -> Result<Option<(MDBFileInfo, Option<MerkleHash>)>>;
51+
52+
async fn query_for_global_dedup_shard(
53+
&self,
54+
prefix: &str,
55+
chunk_hash: &MerkleHash,
56+
salt: &[u8; 32],
57+
) -> Result<Option<Bytes>>;
58+
59+
/// Upload a new shard.
60+
async fn upload_shard(
61+
&self,
62+
prefix: &str,
63+
hash: &MerkleHash,
64+
force_sync: bool,
65+
shard_data: bytes::Bytes,
66+
salt: &[u8; 32],
67+
) -> Result<bool>;
68+
69+
/// Upload a new xorb.
70+
async fn upload_xorb(
71+
&self,
72+
prefix: &str,
73+
serialized_cas_object: SerializedCasObject,
74+
upload_tracker: Option<Arc<CompletionTracker>>,
75+
) -> Result<u64>;
76+
77+
/// Check if a XORB already exists.
78+
async fn exists(&self, prefix: &str, hash: &MerkleHash) -> Result<bool>;
79+
80+
/// Indicates if the serialized cas object should have a written footer.
81+
/// This should only be true for testing with LocalClient.
82+
fn use_xorb_footer(&self) -> bool;
16183
}
162-
163-
// export out interfaces to be referred to directly out of the interface sub-crate
164-
// users of cas_client interface are unaware of the module level separation between download/upload.
165-
#[cfg(not(target_family = "wasm"))]
166-
pub use download::*;
167-
pub use upload::*;

cas_client/src/lib.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,14 @@
22

33
pub use chunk_cache::{CacheConfig, CHUNK_CACHE_SIZE_BYTES};
44
pub use http_client::{build_auth_http_client, build_http_client, Api, RetryConfig};
5-
use interface::RegistrationClient;
6-
pub use interface::{Client, UploadClient};
7-
#[cfg(not(target_family = "wasm"))]
8-
pub use interface::{Reconstruct, ReconstructionClient};
5+
pub use interface::Client;
96
#[cfg(not(target_family = "wasm"))]
107
pub use local_client::LocalClient;
118
#[cfg(not(target_family = "wasm"))]
129
pub use output_provider::{FileProvider, OutputProvider};
1310
pub use remote_client::RemoteClient;
1411

1512
pub use crate::error::CasClientError;
16-
pub use crate::interface::ShardClientInterface;
1713

1814
mod constants;
1915
#[cfg(not(target_family = "wasm"))]

0 commit comments

Comments
 (0)