Skip to content

Commit 12da109

Browse files
committed
Add support for ther EXCLUDE_REGEX environment variable to avoid some keys
This allows for optionally avoiding the copy of some large or otherwise unnecessary objects for the lambda, without needing to configure wacky event bridge rules or something similar
1 parent 65400f5 commit 12da109

File tree

3 files changed

+44
-8
lines changed

3 files changed

+44
-8
lines changed

Cargo.toml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "lambda-s3-restructure"
3-
version = "0.3.0"
3+
version = "0.4.0"
44
edition = "2021"
55
homepage = "https://github.com/buoyant-data/lambda-s3-restructure"
66
authors = [
@@ -9,16 +9,17 @@ authors = [
99

1010
[dependencies]
1111
anyhow = "1.0.75"
12-
aws-config = "0.56.0"
13-
aws-sdk-s3 = "0.29.0"
14-
aws_lambda_events = { version = "0.12.0", default-features = false, features = ["sns", "sqs", "s3"] }
12+
aws-config = { version = "1.5", features = ["behavior-version-latest"]}
13+
aws-sdk-s3 = "1.49"
14+
aws_lambda_events = { version = "0.15.1", default-features = false, features = ["sns", "sqs", "s3"] }
1515
chrono = "0.4.31"
1616
liquid = "0.26"
1717
serde = { version = "=1", features = ["rc"] }
18-
lambda_runtime = "0.8.1"
18+
lambda_runtime = "0.13.0"
19+
regex = "1.10"
1920
routefinder = "0.5.3"
2021
serde_json = "1.0.105"
21-
tokio = { version = "1", features = ["macros"] }
22+
tokio = { version = "1.40", features = ["macros"] }
2223
tracing = { version = "0.1", features = ["log"] }
2324
tracing-subscriber = { version = "0.3", default-features = false, features = ["fmt", "env-filter", "tracing-log"] }
2425
urlencoding = "2.1.3"

README.adoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ The following environment variables must be set for the function to run properly
6666
| _required byt empty by default_
6767
| A link:https://crates.io/crates/liquid[liquid] template which produces a compatible path for outputting the file into the S3 bucket.
6868

69+
| `EXCLUDE_REGEX`
70+
|
71+
| optional regular expression for keys to exclude from consideration
72+
6973
|===
7074

7175
[WARNING]

src/main.rs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use aws_lambda_events::event::s3::{S3Entity, S3Event};
22
use aws_lambda_events::sqs::SqsEvent;
33
use aws_sdk_s3::Client as S3Client;
44
use lambda_runtime::{run, service_fn, Error, LambdaEvent};
5+
use regex::Regex;
56
use routefinder::Router;
67
use tracing::log::*;
78

@@ -61,6 +62,8 @@ async fn function_handler(
6162
) -> Result<(), Error> {
6263
let input_pattern =
6364
std::env::var("INPUT_PATTERN").expect("You must define INPUT_PATTERN in the environment");
65+
let exclude_regex: Option<Regex> = std::env::var("EXCLUDE_REGEX")
66+
.map(|ex| Regex::new(ex.as_ref()).expect("Failed to compile EXCLUDE_REGEX")).ok();
6467
let output_template = std::env::var("OUTPUT_TEMPLATE")
6568
.expect("You must define OUTPUT_TEMPLATE in the environment");
6669

@@ -80,6 +83,10 @@ async fn function_handler(
8083
debug!("Processing {entity:?}");
8184

8285
if let Some(source_key) = entity.object.key {
86+
if should_exclude(exclude_regex.as_ref(), &source_key) {
87+
continue;
88+
}
89+
8390
let parameters = add_builtin_parameters(captured_parameters(&router, &source_key)?);
8491
let output_key = template.render(&parameters)?;
8592
info!("Copying {source_key:?} to {output_key:?}");
@@ -110,7 +117,7 @@ async fn main() -> Result<(), Error> {
110117
.without_time()
111118
.init();
112119

113-
let shared_config = aws_config::load_from_env().await;
120+
let shared_config = aws_config::from_env().load().await;
114121
let client = S3Client::new(&shared_config);
115122
let client_ref = &client;
116123

@@ -146,6 +153,15 @@ fn captured_parameters<Handler>(
146153
Ok(data)
147154
}
148155

156+
/// Return true if the given key matches the pattern and should be excluded from consideration
157+
fn should_exclude(pattern: Option<&Regex>, key: &str) -> bool {
158+
match pattern {
159+
Some(re) => re.is_match(key),
160+
None => false,
161+
}
162+
}
163+
164+
/// Introduce the necessary built-in parameters to the `data` for rendering a Handlebars template
149165
fn add_builtin_parameters(mut data: HashMap<String, String>) -> HashMap<String, String> {
150166
use chrono::Datelike;
151167
let now = chrono::Utc::now();
@@ -246,7 +262,7 @@ mod tests {
246262
]
247263
}"#;
248264

249-
let event: S3Event = serde_json::from_str(&raw_buf)?;
265+
let event: S3Event = serde_json::from_str(raw_buf)?;
250266
Ok(event)
251267
}
252268

@@ -272,4 +288,19 @@ mod tests {
272288
"databases/oltp/a_table/ds=2023-09-05/some.parquet"
273289
);
274290
}
291+
292+
#[test]
293+
fn test_exclude_regex() {
294+
let exclude = Some(Regex::new(r#"^path\/to\/table.*"#).expect("Failed to compile regular expression"));
295+
let keys = vec![
296+
"path/to/alpha",
297+
"path/to/bravo/foo.parquet",
298+
"path/to/table",
299+
"path/to/table/foo.parquet",
300+
];
301+
302+
let filtered: Vec<_> = keys.iter().filter(|k| !should_exclude(exclude.as_ref(), k)).map(|k| k.clone()).collect();
303+
assert_ne!(filtered, keys);
304+
}
275305
}
306+

0 commit comments

Comments
 (0)