Skip to content

Commit 9f36c88

Browse files
authored
Implement MultipartStore for ThrottledStore (#5533)
* Implement MultipartStore for ThrottledStore Limit concurrency in BufWriter Tweak WriteMultipart * Fix MSRV * Format
1 parent 40fa58e commit 9f36c88

File tree

3 files changed

+148
-20
lines changed

3 files changed

+148
-20
lines changed

object_store/src/buffered.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ impl AsyncBufRead for BufReader {
216216
/// streamed using [`ObjectStore::put_multipart`]
217217
pub struct BufWriter {
218218
capacity: usize,
219+
max_concurrency: usize,
219220
state: BufWriterState,
220221
store: Arc<dyn ObjectStore>,
221222
}
@@ -250,10 +251,21 @@ impl BufWriter {
250251
Self {
251252
capacity,
252253
store,
254+
max_concurrency: 8,
253255
state: BufWriterState::Buffer(path, Vec::new()),
254256
}
255257
}
256258

259+
/// Override the maximum number of in-flight requests for this writer
260+
///
261+
/// Defaults to 8
262+
pub fn with_max_concurrency(self, max_concurrency: usize) -> Self {
263+
Self {
264+
max_concurrency,
265+
..self
266+
}
267+
}
268+
257269
/// Abort this writer, cleaning up any partially uploaded state
258270
///
259271
/// # Panic
@@ -275,9 +287,11 @@ impl AsyncWrite for BufWriter {
275287
buf: &[u8],
276288
) -> Poll<Result<usize, Error>> {
277289
let cap = self.capacity;
290+
let max_concurrency = self.max_concurrency;
278291
loop {
279292
return match &mut self.state {
280293
BufWriterState::Write(Some(write)) => {
294+
ready!(write.poll_for_capacity(cx, max_concurrency))?;
281295
write.write(buf);
282296
Poll::Ready(Ok(buf.len()))
283297
}

object_store/src/throttle.rs

Lines changed: 71 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,12 @@ use parking_lot::Mutex;
2020
use std::ops::Range;
2121
use std::{convert::TryInto, sync::Arc};
2222

23-
use crate::GetOptions;
23+
use crate::multipart::{MultipartStore, PartId};
2424
use crate::{
25-
path::Path, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
26-
PutOptions, PutResult, Result,
25+
path::Path, GetResult, GetResultPayload, ListResult, MultipartId, MultipartUpload, ObjectMeta,
26+
ObjectStore, PutOptions, PutResult, Result,
2727
};
28+
use crate::{GetOptions, UploadPart};
2829
use async_trait::async_trait;
2930
use bytes::Bytes;
3031
use futures::{stream::BoxStream, FutureExt, StreamExt};
@@ -110,12 +111,12 @@ async fn sleep(duration: Duration) {
110111
/// **Note that the behavior of the wrapper is deterministic and might not reflect real-world
111112
/// conditions!**
112113
#[derive(Debug)]
113-
pub struct ThrottledStore<T: ObjectStore> {
114+
pub struct ThrottledStore<T> {
114115
inner: T,
115116
config: Arc<Mutex<ThrottleConfig>>,
116117
}
117118

118-
impl<T: ObjectStore> ThrottledStore<T> {
119+
impl<T> ThrottledStore<T> {
119120
/// Create new wrapper with zero waiting times.
120121
pub fn new(inner: T, config: ThrottleConfig) -> Self {
121122
Self {
@@ -157,8 +158,12 @@ impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
157158
self.inner.put_opts(location, bytes, opts).await
158159
}
159160

160-
async fn put_multipart(&self, _location: &Path) -> Result<Box<dyn MultipartUpload>> {
161-
Err(super::Error::NotImplemented)
161+
async fn put_multipart(&self, location: &Path) -> Result<Box<dyn MultipartUpload>> {
162+
let upload = self.inner.put_multipart(location).await?;
163+
Ok(Box::new(ThrottledUpload {
164+
upload,
165+
sleep: self.config().wait_put_per_call,
166+
}))
162167
}
163168

164169
async fn get(&self, location: &Path) -> Result<GetResult> {
@@ -316,6 +321,63 @@ where
316321
.boxed()
317322
}
318323

324+
#[async_trait]
325+
impl<T: MultipartStore> MultipartStore for ThrottledStore<T> {
326+
async fn create_multipart(&self, path: &Path) -> Result<MultipartId> {
327+
self.inner.create_multipart(path).await
328+
}
329+
330+
async fn put_part(
331+
&self,
332+
path: &Path,
333+
id: &MultipartId,
334+
part_idx: usize,
335+
data: Bytes,
336+
) -> Result<PartId> {
337+
sleep(self.config().wait_put_per_call).await;
338+
self.inner.put_part(path, id, part_idx, data).await
339+
}
340+
341+
async fn complete_multipart(
342+
&self,
343+
path: &Path,
344+
id: &MultipartId,
345+
parts: Vec<PartId>,
346+
) -> Result<PutResult> {
347+
self.inner.complete_multipart(path, id, parts).await
348+
}
349+
350+
async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()> {
351+
self.inner.abort_multipart(path, id).await
352+
}
353+
}
354+
355+
#[derive(Debug)]
356+
struct ThrottledUpload {
357+
upload: Box<dyn MultipartUpload>,
358+
sleep: Duration,
359+
}
360+
361+
#[async_trait]
362+
impl MultipartUpload for ThrottledUpload {
363+
fn put_part(&mut self, data: Bytes) -> UploadPart {
364+
let duration = self.sleep;
365+
let put = self.upload.put_part(data);
366+
Box::pin(async move {
367+
sleep(duration).await;
368+
put.await
369+
})
370+
}
371+
372+
async fn complete(&mut self) -> Result<PutResult> {
373+
self.upload.complete().await
374+
}
375+
376+
async fn abort(&mut self) -> Result<()> {
377+
self.upload.abort().await
378+
}
379+
}
380+
319381
#[cfg(test)]
320382
mod tests {
321383
use super::*;
@@ -351,6 +413,8 @@ mod tests {
351413
list_with_delimiter(&store).await;
352414
rename_and_copy(&store).await;
353415
copy_if_not_exists(&store).await;
416+
stream_get(&store).await;
417+
multipart(&store, &store).await;
354418
}
355419

356420
#[tokio::test]

object_store/src/upload.rs

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::{PutResult, Result};
18+
use std::task::{Context, Poll};
19+
1920
use async_trait::async_trait;
2021
use bytes::Bytes;
2122
use futures::future::BoxFuture;
23+
use futures::ready;
2224
use tokio::task::JoinSet;
2325

26+
use crate::{PutResult, Result};
27+
2428
/// An upload part request
2529
pub type UploadPart = BoxFuture<'static, Result<()>>;
2630

@@ -110,31 +114,44 @@ pub struct WriteMultipart {
110114
impl WriteMultipart {
111115
/// Create a new [`WriteMultipart`] that will upload using 5MB chunks
112116
pub fn new(upload: Box<dyn MultipartUpload>) -> Self {
113-
Self::new_with_capacity(upload, 5 * 1024 * 1024)
117+
Self::new_with_chunk_size(upload, 5 * 1024 * 1024)
114118
}
115119

116-
/// Create a new [`WriteMultipart`] that will upload in fixed `capacity` sized chunks
117-
pub fn new_with_capacity(upload: Box<dyn MultipartUpload>, capacity: usize) -> Self {
120+
/// Create a new [`WriteMultipart`] that will upload in fixed `chunk_size` sized chunks
121+
pub fn new_with_chunk_size(upload: Box<dyn MultipartUpload>, chunk_size: usize) -> Self {
118122
Self {
119123
upload,
120-
buffer: Vec::with_capacity(capacity),
124+
buffer: Vec::with_capacity(chunk_size),
121125
tasks: Default::default(),
122126
}
123127
}
124128

125-
/// Wait until there are `max_concurrency` or fewer requests in-flight
126-
pub async fn wait_for_capacity(&mut self, max_concurrency: usize) -> Result<()> {
127-
while self.tasks.len() > max_concurrency {
128-
self.tasks.join_next().await.unwrap()??;
129+
/// Polls for there to be less than `max_concurrency` [`UploadPart`] in progress
130+
///
131+
/// See [`Self::wait_for_capacity`] for an async version of this function
132+
pub fn poll_for_capacity(
133+
&mut self,
134+
cx: &mut Context<'_>,
135+
max_concurrency: usize,
136+
) -> Poll<Result<()>> {
137+
while !self.tasks.is_empty() && self.tasks.len() >= max_concurrency {
138+
ready!(self.tasks.poll_join_next(cx)).unwrap()??
129139
}
130-
Ok(())
140+
Poll::Ready(Ok(()))
141+
}
142+
143+
/// Wait until there are less than `max_concurrency` [`UploadPart`] in progress
144+
///
145+
/// See [`Self::poll_for_capacity`] for a [`Poll`] version of this function
146+
pub async fn wait_for_capacity(&mut self, max_concurrency: usize) -> Result<()> {
147+
futures::future::poll_fn(|cx| self.poll_for_capacity(cx, max_concurrency)).await
131148
}
132149

133150
/// Write data to this [`WriteMultipart`]
134151
///
135-
/// Note this method is synchronous (not `async`) and will immediately start new uploads
136-
/// as soon as the internal `capacity` is hit, regardless of
137-
/// how many outstanding uploads are already in progress.
152+
/// Note this method is synchronous (not `async`) and will immediately
153+
/// start new uploads as soon as the internal `chunk_size` is hit,
154+
/// regardless of how many outstanding uploads are already in progress.
138155
///
139156
/// Back pressure can optionally be applied to producers by calling
140157
/// [`Self::wait_for_capacity`] prior to calling this method
@@ -173,3 +190,36 @@ impl WriteMultipart {
173190
self.upload.complete().await
174191
}
175192
}
193+
194+
#[cfg(test)]
195+
mod tests {
196+
use std::time::Duration;
197+
198+
use futures::FutureExt;
199+
200+
use crate::memory::InMemory;
201+
use crate::path::Path;
202+
use crate::throttle::{ThrottleConfig, ThrottledStore};
203+
use crate::ObjectStore;
204+
205+
use super::*;
206+
207+
#[tokio::test]
208+
async fn test_concurrency() {
209+
let config = ThrottleConfig {
210+
wait_put_per_call: Duration::from_millis(1),
211+
..Default::default()
212+
};
213+
214+
let path = Path::from("foo");
215+
let store = ThrottledStore::new(InMemory::new(), config);
216+
let upload = store.put_multipart(&path).await.unwrap();
217+
let mut write = WriteMultipart::new_with_chunk_size(upload, 10);
218+
219+
for _ in 0..20 {
220+
write.write(&[0; 5]);
221+
}
222+
assert!(write.wait_for_capacity(10).now_or_never().is_none());
223+
write.wait_for_capacity(10).await.unwrap()
224+
}
225+
}

0 commit comments

Comments
 (0)