@@ -3,10 +3,64 @@ use aws_sdk_s3::Client as S3Client;
3
3
use lambda_runtime:: { run, service_fn, Error , LambdaEvent } ;
4
4
use routefinder:: Router ;
5
5
use tracing:: log:: * ;
6
+ use aws_lambda_events:: sqs:: SqsEvent ;
7
+ // use deltalake::{DeltaResult};
6
8
7
9
use std:: collections:: HashMap ;
8
10
9
- async fn function_handler ( event : LambdaEvent < S3Event > , client : & S3Client ) -> Result < ( ) , Error > {
11
+
12
+ /// A simple structure to make deserializing test events for identification easier
13
+ ///
14
+ /// See <fhttps://github.com/buoyant-data/oxbow/issues/8>
15
+ #[ derive( serde:: Deserialize ) ]
16
+ #[ serde( rename_all = "PascalCase" ) ]
17
+ struct TestEvent {
18
+ event : String ,
19
+ }
20
+
21
+
22
+ /// Convert the given [aws_lambda_events::sqs::SqsEvent] to a collection of
23
+ /// [aws_lambda_events::s3::S3EventRecord] entities. This is mostly useful for handling S3 Bucket
24
+ /// Notifications which have been passed into SQS
25
+ ///
26
+ /// In the case where the [aws_lambda_events::sqs::SqsEvent] contains an `s3:TestEvent` which is
27
+ /// fired when S3 Bucket Notifications are first enabled, the event will be ignored to avoid
28
+ /// errorsin the processing pipeline
29
+ async fn s3_from_sqs ( event : SqsEvent ) -> Result < S3Event , anyhow:: Error > {
30
+ let mut records = vec ! [ ] ;
31
+ for record in event. records . iter ( ) {
32
+ /* each record is an SqsMessage */
33
+ if let Some ( body) = & record. body {
34
+ match serde_json:: from_str :: < S3Event > ( body) {
35
+ Ok ( s3event) => {
36
+ for s3record in s3event. records {
37
+ records. push ( s3record. clone ( ) ) ;
38
+ }
39
+ }
40
+ Err ( err) => {
41
+ // if we cannot deserialize and the event is an s3::TestEvent, then we should
42
+ // just return empty records.
43
+ let test_event = serde_json:: from_str :: < TestEvent > ( body) ;
44
+ // Early exit with the original error if we cannot parse the JSON at all
45
+ if test_event. is_err ( ) {
46
+ return Err ( err. into ( ) ) ;
47
+ }
48
+
49
+ // Ignore the error on deserialization if the event ends up being an S3
50
+ // TestEvent which is fired when bucket notifications are originally configured
51
+ if "s3:TestEvent" != test_event. unwrap ( ) . event {
52
+ return Err ( err. into ( ) ) ;
53
+ }
54
+ }
55
+ } ;
56
+ }
57
+ }
58
+ Ok ( aws_lambda_events:: s3:: S3Event { records : records } )
59
+ }
60
+
61
+
62
+
63
+ async fn function_handler ( event : LambdaEvent < SqsEvent > , client : & S3Client ) -> Result < ( ) , Error > {
10
64
let input_pattern =
11
65
std:: env:: var ( "INPUT_PATTERN" ) . expect ( "You must define INPUT_PATTERN in the environment" ) ;
12
66
let output_template = std:: env:: var ( "OUTPUT_TEMPLATE" )
@@ -18,9 +72,9 @@ async fn function_handler(event: LambdaEvent<S3Event>, client: &S3Client) -> Res
18
72
. parse ( & output_template) ?;
19
73
20
74
router. add ( input_pattern, 1 ) ?;
21
- info ! ( "Processing records: { event:?}" ) ;
75
+ let records = s3_from_sqs ( event. payload ) ;
22
76
23
- for entity in entities_from ( event . payload ) ? {
77
+ for entity in entities_from ( records . await ? ) ? {
24
78
debug ! ( "Processing {entity:?}" ) ;
25
79
26
80
if let Some ( source_key) = entity. object . key {
@@ -221,4 +275,4 @@ mod tests {
221
275
"databases/oltp/a_table/ds=2023-09-05/some.parquet"
222
276
) ;
223
277
}
224
- }
278
+ }
0 commit comments