Skip to content

Commit 1bad961

Browse files
committed
Add S3 client auth check to cached object
1 parent 71b18de commit 1bad961

File tree

3 files changed

+110
-6
lines changed

3 files changed

+110
-6
lines changed

src/app.rs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ async fn download_s3_object<'a>(
224224
/// * `chunk_cache`: ChunkCache object
225225
#[tracing::instrument(
226226
level = "DEBUG",
227-
skip(client, request_data, resource_manager, chunk_cache)
227+
skip(client, request_data, resource_manager, mem_permits, chunk_cache)
228228
)]
229229
async fn download_and_cache_s3_object<'a>(
230230
client: &s3_client::S3Client,
@@ -233,9 +233,32 @@ async fn download_and_cache_s3_object<'a>(
233233
mut mem_permits: Option<SemaphorePermit<'a>>,
234234
chunk_cache: &ChunkCache,
235235
) -> Result<Bytes, ActiveStorageError> {
236-
let key = format!("{},{:?}", client, request_data);
236+
// We chose a cache key such that any changes to request data
237+
// which may feasibly indicate a change to the upstream object
238+
// lead to a new cache key.
239+
let key = format!(
240+
"{}-{}-{}-{}-{:?}-{:?}",
241+
request_data.source.as_str(),
242+
request_data.bucket,
243+
request_data.object,
244+
request_data.dtype,
245+
request_data.byte_order,
246+
request_data.compression,
247+
);
237248

238249
if let Some(metadata) = chunk_cache.get_metadata(&key).await {
250+
// To avoid having to include the S3 client ID as part of the cache key
251+
// (which means we'd have a separate cache for each authorised user and
252+
// waste storage space) we instead make a lightweight check against the
253+
// object store to ensure the user is authorised, even if the object data
254+
// is already in the local cache.
255+
let authorised = client
256+
.is_authorised(&request_data.bucket, &request_data.object)
257+
.await?;
258+
if !authorised {
259+
return Err(ActiveStorageError::Forbidden);
260+
}
261+
239262
// Update memory requested from resource manager to account for actual
240263
// size of data if we were previously unable to guess the size from request
241264
// data's size + offset parameters.
@@ -254,7 +277,10 @@ async fn download_and_cache_s3_object<'a>(
254277
// We only want to get chunks for which the metadata check succeeded too,
255278
// otherwise chunks which are missing metadata could bypass the resource
256279
// manager and exhaust system resources
257-
let cache_value = chunk_cache.get(&key).await?;
280+
let cache_value = chunk_cache
281+
.get(&key)
282+
.instrument(tracing::Span::current())
283+
.await?;
258284
if let Some(bytes) = cache_value {
259285
return Ok(bytes);
260286
}

src/error.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
use aws_sdk_s3::error::ProvideErrorMetadata;
44
use aws_sdk_s3::error::SdkError;
55
use aws_sdk_s3::operation::get_object::GetObjectError;
6+
use aws_sdk_s3::operation::head_object::HeadObjectError;
67
use aws_smithy_types::byte_stream::error::Error as ByteStreamError;
78
use axum::{
89
extract::rejection::JsonRejection,
@@ -74,6 +75,14 @@ pub enum ActiveStorageError {
7475
#[error("error retrieving object from S3 storage")]
7576
S3GetObject(#[from] SdkError<GetObjectError>),
7677

78+
/// Error while retrieving object head from S3
79+
#[error("error retrieving object metadata from S3 storage")]
80+
S3HeadObject(#[from] SdkError<HeadObjectError>),
81+
82+
/// HTTP 403 from object store
83+
#[error("error receiving object metadata from S3 storage")]
84+
Forbidden,
85+
7786
/// Error acquiring a semaphore
7887
#[error("error acquiring resources")]
7988
SemaphoreAcquireError(#[from] AcquireError),
@@ -225,7 +234,11 @@ impl From<ActiveStorageError> for ErrorResponse {
225234
| ActiveStorageError::ShapeInvalid(_) => Self::bad_request(&error),
226235

227236
// Not found
228-
ActiveStorageError::UnsupportedOperation { operation: _ } => Self::not_found(&error),
237+
ActiveStorageError::UnsupportedOperation { operation: _ }
238+
// If we receive a forbidden from object store, return a
239+
// not found to client to avoid leaking information about
240+
// bucket contents.
241+
| ActiveStorageError::Forbidden => Self::not_found(&error),
229242

230243
// Internal server error
231244
ActiveStorageError::FromBytes { type_name: _ }
@@ -277,6 +290,31 @@ impl From<ActiveStorageError> for ErrorResponse {
277290
_ => Self::internal_server_error(&error),
278291
}
279292
}
293+
294+
ActiveStorageError::S3HeadObject(sdk_error) => {
295+
// Tailor the response based on the specific SdkError variant.
296+
match &sdk_error {
297+
// These are generic SdkError variants.
298+
// Internal server error
299+
SdkError::ConstructionFailure(_)
300+
| SdkError::DispatchFailure(_)
301+
| SdkError::ResponseError(_)
302+
| SdkError::TimeoutError(_) => Self::internal_server_error(&error),
303+
304+
// This is a more specific ServiceError variant,
305+
// with HeadObjectError as the inner error.
306+
SdkError::ServiceError(head_obj_error) => {
307+
let head_obj_error = head_obj_error.err();
308+
match head_obj_error {
309+
HeadObjectError::NotFound(_) => Self::bad_request(&error),
310+
// Enum is marked as non-exhaustive
311+
_ => Self::internal_server_error(&error),
312+
}
313+
}
314+
// Enum is marked as non-exhaustive
315+
_ => Self::internal_server_error(&error),
316+
}
317+
}
280318
};
281319

282320
// Log server errors.

src/s3_client.rs

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ use crate::error::ActiveStorageError;
77
use crate::resource_manager::ResourceManager;
88

99
use aws_credential_types::Credentials;
10-
use aws_sdk_s3::config::BehaviorVersion;
10+
use aws_sdk_s3::operation::head_object::HeadObjectError;
1111
use aws_sdk_s3::Client;
12+
use aws_sdk_s3::{config::BehaviorVersion, error::SdkError};
13+
use aws_smithy_runtime_api::http::Response;
1214
use aws_types::region::Region;
1315
use axum::body::Bytes;
1416
use hashbrown::HashMap;
@@ -142,6 +144,44 @@ impl S3Client {
142144
}
143145
}
144146

147+
/// Checks whether the client is authorised to download an
148+
/// object from object storage.
149+
///
150+
/// # Arguments
151+
///
152+
/// * `bucket`: Name of the bucket
153+
/// * `key`: Name of the object in the bucket
154+
pub async fn is_authorised(
155+
&self,
156+
bucket: &str,
157+
key: &str,
158+
) -> Result<bool, SdkError<HeadObjectError, Response>> {
159+
let response = self
160+
.client
161+
.head_object()
162+
.bucket(bucket)
163+
.key(key)
164+
.send()
165+
.instrument(tracing::Span::current())
166+
.await;
167+
168+
// Strategy here is to return true if client is authorised to download object,
169+
// false if explicitly not authorised (HTTP 403) and pass any other errors or
170+
// responses back to the caller.
171+
match response {
172+
Ok(_) => Ok(true),
173+
Err(err) => match &err {
174+
aws_smithy_runtime_api::client::result::SdkError::ServiceError(inner) => {
175+
match inner.raw().status().as_u16() {
176+
403 => Ok(false), // HTTP 403 == Forbidden
177+
_ => Err(err),
178+
}
179+
}
180+
_ => Err(err),
181+
},
182+
}
183+
}
184+
145185
/// Downloads an object from object storage and returns the data as Bytes
146186
///
147187
/// # Arguments
@@ -152,7 +192,7 @@ impl S3Client {
152192
/// * `resource_manager`: ResourceManager object
153193
/// * `mem_permits`: Optional SemaphorePermit for any memory resources reserved
154194
pub async fn download_object<'a>(
155-
self: &S3Client,
195+
&self,
156196
bucket: &str,
157197
key: &str,
158198
range: Option<String>,

0 commit comments

Comments
 (0)