Skip to content

Commit 71b18de

Browse files
committed
Fix broken resource manager memory permits implementation
1 parent e0c911b commit 71b18de

File tree

3 files changed

+67
-18
lines changed

3 files changed

+67
-18
lines changed

src/app.rs

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use axum::{
2323
Router, TypedHeader,
2424
};
2525
use bytes::Bytes;
26+
use tokio::sync::SemaphorePermit;
2627

2728
use std::sync::Arc;
2829
use tower::Layer;
@@ -193,13 +194,11 @@ async fn download_s3_object<'a>(
193194
client: &s3_client::S3Client,
194195
request_data: &models::RequestData,
195196
resource_manager: &'a ResourceManager,
197+
mut mem_permits: Option<SemaphorePermit<'a>>,
196198
) -> Result<Bytes, ActiveStorageError> {
197-
// If we're given a size in the request data then use this to
198-
// get an initial guess at the required memory resources.
199-
let memory = request_data.size.unwrap_or(0);
200-
let mut mem_permits = resource_manager.memory(memory).await?;
201-
199+
// Convert request data to byte range for S3 request
202200
let range = s3_client::get_range(request_data.offset, request_data.size);
201+
// Acquire connection permit to be freed via drop when this function returns
203202
let _conn_permits = resource_manager.s3_connection().await?;
204203

205204
client
@@ -231,16 +230,37 @@ async fn download_and_cache_s3_object<'a>(
231230
client: &s3_client::S3Client,
232231
request_data: &models::RequestData,
233232
resource_manager: &'a ResourceManager,
233+
mut mem_permits: Option<SemaphorePermit<'a>>,
234234
chunk_cache: &ChunkCache,
235235
) -> Result<Bytes, ActiveStorageError> {
236236
let key = format!("{},{:?}", client, request_data);
237237

238-
let cache_value = chunk_cache.get(&key).await?;
239-
if let Some(bytes) = cache_value {
240-
return Ok(bytes);
238+
if let Some(metadata) = chunk_cache.get_metadata(&key).await {
239+
// Update memory requested from resource manager to account for actual
240+
// size of data if we were previously unable to guess the size from request
241+
// data's size + offset parameters.
242+
// FIXME: how to account for compressed data?
243+
let mem_permits = &mut mem_permits;
244+
match mem_permits {
245+
None => {
246+
*mem_permits = resource_manager.memory(metadata.size_bytes).await?;
247+
}
248+
Some(permits) => {
249+
if permits.num_permits() == 0 {
250+
*mem_permits = resource_manager.memory(metadata.size_bytes).await?;
251+
}
252+
}
253+
}
254+
// We only want to get chunks for which the metadata check succeeded too,
255+
// otherwise chunks which are missing metadata could bypass the resource
256+
// manager and exhaust system resources
257+
let cache_value = chunk_cache.get(&key).await?;
258+
if let Some(bytes) = cache_value {
259+
return Ok(bytes);
260+
}
241261
}
242262

243-
let data = download_s3_object(client, request_data, resource_manager).await?;
263+
let data = download_s3_object(client, request_data, resource_manager, mem_permits).await?;
244264

245265
// Write data to cache
246266
chunk_cache.set(&key, data.clone()).await?;
@@ -270,6 +290,15 @@ async fn operation_handler<T: operation::Operation>(
270290
auth: Option<TypedHeader<Authorization<Basic>>>,
271291
ValidatedJson(request_data): ValidatedJson<models::RequestData>,
272292
) -> Result<models::Response, ActiveStorageError> {
293+
// NOTE(sd109): We acquire memory permits semaphore here so that
294+
// they are owned by this top-level function and not freed until
295+
// the permits are dropped when the this function returns.
296+
297+
// If we're given a size in the request data then use this to
298+
// get an initial guess at the required memory resources.
299+
let memory = request_data.size.unwrap_or(0);
300+
let mut _mem_permits = state.resource_manager.memory(memory).await?;
301+
273302
let credentials = if let Some(TypedHeader(auth)) = auth {
274303
s3_client::S3Credentials::access_key(auth.username(), auth.password())
275304
} else {
@@ -283,12 +312,12 @@ async fn operation_handler<T: operation::Operation>(
283312

284313
let data = match (&state.args.use_chunk_cache, &state.chunk_cache) {
285314
(false, _) => {
286-
download_s3_object(&s3_client, &request_data, &state.resource_manager)
315+
download_s3_object(&s3_client, &request_data, &state.resource_manager, _mem_permits)
287316
.instrument(tracing::Span::current())
288317
.await?
289318
}
290319
(true, Some(cache)) => {
291-
download_and_cache_s3_object(&s3_client, &request_data, &state.resource_manager, cache)
320+
download_and_cache_s3_object(&s3_client, &request_data, &state.resource_manager, _mem_permits, cache)
292321
.await?
293322
}
294323
(true, None) => panic!(

src/chunk_cache.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,16 @@ impl ChunkCache {
112112
}
113113
}
114114

115+
/// Retrieves chunk metadata from the cache for an unique key.
116+
///
117+
/// # Arguments
118+
///
119+
/// * `key`: Unique key identifying the chunk
120+
pub async fn get_metadata(&self, key: &str) -> Option<Metadata> {
121+
let state = self.cache.load_state().await;
122+
state.metadata.get(key).cloned()
123+
}
124+
115125
/// Retrieves chunk `Bytes` from the cache for an unique key.
116126
///
117127
/// # Arguments
@@ -128,12 +138,12 @@ impl ChunkCache {
128138
}
129139

130140
/// Metadata stored against each cache chunk.
131-
#[derive(Debug, Serialize, Deserialize)]
132-
struct Metadata {
141+
#[derive(Clone, Debug, Serialize, Deserialize)]
142+
pub struct Metadata {
133143
/// Seconds after unix epoch for cache item expiry.
134-
expires: u64,
144+
pub expires: u64,
135145
/// Cache value size.
136-
size_bytes: usize,
146+
pub size_bytes: usize,
137147
}
138148

139149
impl Metadata {

src/s3_client.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,20 @@ impl S3Client {
174174
.ok_or(ActiveStorageError::S3ContentLengthMissing)?
175175
.try_into()?;
176176

177+
// Update memory requested from resource manager to account for actual
178+
// size of data if we were previously unable to guess the size from request
179+
// data's size + offset parameters.
177180
// FIXME: how to account for compressed data?
178-
if mem_permits.is_none() || mem_permits.as_ref().unwrap().num_permits() == 0 {
179-
*mem_permits = resource_manager.memory(content_length).await?;
180-
};
181+
match mem_permits {
182+
None => {
183+
*mem_permits = resource_manager.memory(content_length).await?;
184+
}
185+
Some(permits) => {
186+
if permits.num_permits() == 0 {
187+
*mem_permits = resource_manager.memory(content_length).await?;
188+
}
189+
}
190+
}
181191

182192
// The data returned by the S3 client does not have any alignment guarantees. In order to
183193
// reinterpret the data as an array of numbers with a higher alignment than 1, we need to

0 commit comments

Comments
 (0)