Skip to content

Commit 780e141

Browse files
authored
Add option to set content md5 header (#218)
This PR adds option in S3 configuration to enable content md5 header. This option can be enabled by setting P_S3_SET_CONTENT_MD5 environment variable to true. When the option is set to true, application server will calculate and set content md5 header in AWS S3 client. Fixes #215
1 parent e968866 commit 780e141

File tree

2 files changed

+43
-5
lines changed

2 files changed

+43
-5
lines changed

server/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ arrow-schema = { version = "24.0.0", features = ["serde"] }
1515
async-trait = "0.1"
1616
aws-sdk-s3 = "0.19"
1717
aws-smithy-async = { version = "0.49.0", features = ["rt-tokio"] }
18+
base64 = "0.20.0"
1819
bytes = "1"
1920
chrono = "0.4.19"
2021
chrono-humanize = "0.2.2"
@@ -31,6 +32,7 @@ humantime-serde = "1.1.1"
3132
lazy_static = "1.4.0"
3233
log = "0.4.14"
3334
num_cpus = "1.0.0"
35+
md-5 = "0.10.5"
3436
os_info = "3.0.7"
3537
hostname = "0.3"
3638
rand = "0.8.4"

server/src/storage/s3.rs

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use aws_sdk_s3::Error as AwsSdkError;
2424
use aws_sdk_s3::RetryConfig;
2525
use aws_sdk_s3::{Client, Credentials, Endpoint, Region};
2626
use aws_smithy_async::rt::sleep::default_async_sleep;
27+
use base64::encode;
2728
use bytes::Bytes;
2829
use clap::builder::ArgPredicate;
2930

@@ -35,6 +36,7 @@ use datafusion::datasource::object_store::ObjectStoreRegistry;
3536
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
3637
use futures::StreamExt;
3738
use http::Uri;
39+
use md5::{Digest, Md5};
3840
use object_store::aws::AmazonS3Builder;
3941
use object_store::limit::LimitStore;
4042
use relative_path::RelativePath;
@@ -105,6 +107,15 @@ pub struct S3Config {
105107
default_value_if("demo", ArgPredicate::IsPresent, DEFAULT_S3_BUCKET)
106108
)]
107109
pub s3_bucket_name: String,
110+
111+
/// Set client to send content_md5 header on every put request
112+
#[arg(
113+
long,
114+
env = "P_S3_SET_CONTENT_MD5",
115+
value_name = "bool",
116+
default_value = "false"
117+
)]
118+
pub content_md5: bool,
108119
}
109120

110121
impl ObjectStorageProvider for S3Config {
@@ -153,6 +164,7 @@ impl ObjectStorageProvider for S3Config {
153164
Arc::new(S3 {
154165
client,
155166
bucket: self.s3_bucket_name.clone(),
167+
set_content_md5: self.content_md5,
156168
})
157169
}
158170

@@ -164,6 +176,7 @@ impl ObjectStorageProvider for S3Config {
164176
pub struct S3 {
165177
client: aws_sdk_s3::Client,
166178
bucket: String,
179+
set_content_md5: bool,
167180
}
168181

169182
impl S3 {
@@ -233,16 +246,24 @@ impl S3 {
233246
Ok(logstreams)
234247
}
235248

236-
async fn _upload_file(&self, key: &str, path: &Path) -> Result<(), AwsSdkError> {
237-
let body = ByteStream::from_path(path).await.unwrap();
249+
async fn _upload_file(
250+
&self,
251+
key: &str,
252+
path: &Path,
253+
md5: Option<String>,
254+
) -> Result<(), AwsSdkError> {
255+
let body = ByteStream::from_path(&path).await.unwrap();
256+
238257
let resp = self
239258
.client
240259
.put_object()
241260
.bucket(&self.bucket)
242261
.key(key)
243262
.body(body)
263+
.set_content_md5(md5)
244264
.send()
245265
.await?;
266+
246267
log::trace!("{:?}", resp);
247268

248269
Ok(())
@@ -260,12 +281,18 @@ impl ObjectStorage for S3 {
260281
path: &RelativePath,
261282
resource: Bytes,
262283
) -> Result<(), ObjectStorageError> {
263-
let _resp = self
264-
.client
284+
let hash = self.set_content_md5.then(|| {
285+
let mut hash = Md5::new();
286+
hash.update(&resource);
287+
encode(hash.finalize())
288+
});
289+
290+
self.client
265291
.put_object()
266292
.bucket(&self.bucket)
267293
.key(path.as_str())
268294
.body(resource.into())
295+
.set_content_md5(hash)
269296
.send()
270297
.await
271298
.map_err(|err| ObjectStorageError::ConnectionError(Box::new(err)))?;
@@ -296,7 +323,16 @@ impl ObjectStorage for S3 {
296323
}
297324

298325
async fn upload_file(&self, key: &str, path: &Path) -> Result<(), ObjectStorageError> {
299-
self._upload_file(key, path).await?;
326+
let hash = if self.set_content_md5 {
327+
let mut file = std::fs::File::open(path)?;
328+
let mut digest = Md5::new();
329+
std::io::copy(&mut file, &mut digest)?;
330+
Some(encode(digest.finalize()))
331+
} else {
332+
None
333+
};
334+
335+
self._upload_file(key, path, hash).await?;
300336

301337
Ok(())
302338
}

0 commit comments

Comments
 (0)