@@ -11,12 +11,13 @@ import {
11
11
aws_logs as logs ,
12
12
RemovalPolicy ,
13
13
Stack ,
14
+ Size ,
14
15
} from 'aws-cdk-lib' ;
15
16
import {
16
- LambdaFunctionProcessor as LambdaFunctionProcessorAlpha ,
17
17
DeliveryStream as DeliveryStreamAlpha
18
18
} from '@aws-cdk/aws-kinesisfirehose-alpha'
19
19
import * as glueAlpha from '@aws-cdk/aws-glue-alpha'
20
+ import { CfnDeliveryStream } from 'aws-cdk-lib/aws-kinesisfirehose' ;
20
21
21
22
22
23
export interface FirehoseParquetProps {
@@ -28,13 +29,14 @@ export interface FirehoseParquetProps {
28
29
glueSecurityConfiguration : glueAlpha . SecurityConfiguration
29
30
glueDb : glueAlpha . Database
30
31
table : dynamodb . ITable
32
+ tableName : string
31
33
}
32
34
33
35
export class FirehoseParquet extends Construct {
34
36
constructor ( scope : Construct , id : string , props : FirehoseParquetProps ) {
35
37
super ( scope , id )
36
38
37
- const { kmsKey, firehoseBucket, name, ddbChangesPrefix, stream, glueSecurityConfiguration, glueDb, table } = props
39
+ const { kmsKey, firehoseBucket, name, ddbChangesPrefix, stream, glueSecurityConfiguration, glueDb, table, tableName } = props
38
40
const roleName = `${ name } -crawler-ddb-role` ;
39
41
const roleCrawlerddb = new iam . Role ( this , 'roleCrawlerDdb' , {
40
42
roleName : roleName ,
@@ -76,17 +78,15 @@ export class FirehoseParquet extends Construct {
76
78
glueDb . catalogArn ,
77
79
glueDb . databaseArn ,
78
80
kmsKey . keyArn ,
79
- firehoseBucket . bucketArn ,
80
- `${ firehoseBucket . bucketArn } /*` ,
81
81
glueCrawlerArn ,
82
82
table . tableArn ,
83
83
] ,
84
84
actions : [
85
85
'logs:*' ,
86
86
'glue:*' ,
87
87
'kms:Decrypt' ,
88
- 'S3:*' ,
89
88
'dynamodb:DescribeTable' ,
89
+ 'dynamodb:Scan' ,
90
90
] ,
91
91
} )
92
92
)
@@ -98,22 +98,76 @@ export class FirehoseParquet extends Construct {
98
98
)
99
99
glueSecurityConfiguration . node . addDependency ( roleCrawlerddb )
100
100
101
-
102
101
const s3Destination = new destinationsAlpha . S3Bucket ( firehoseBucket , {
103
102
encryptionKey : kmsKey ,
104
103
bufferingInterval : Duration . seconds ( 60 ) ,
105
104
dataOutputPrefix : `${ ddbChangesPrefix } /` ,
106
- logGroup : new logs . LogGroup ( this , 'firehose-- parquet-s3-log-group' , {
105
+ logGroup : new logs . LogGroup ( this , 'firehose-parquet-s3-log-group' , {
107
106
logGroupName : `${ name } -firehose-parquet-s3-log-group` ,
108
107
removalPolicy : RemovalPolicy . DESTROY ,
109
108
} ) ,
109
+ bufferingSize : Size . mebibytes ( 64 ) ,
110
110
} )
111
111
112
- new DeliveryStreamAlpha ( this , 'Delivery Stream' , {
112
+ const glueTableName = tableName . replace ( / - / g, '_' )
113
+ console . log ( `glueTableName: ${ glueTableName } ` )
114
+ // https://5k-team.trilogy.com/hc/en-us/articles/360015651640-Configuring-Firehose-with-CDK
115
+ // https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-kinesisfirehose-deliverystream.html
116
+
117
+
118
+ const firehoseDeliveryStream = new DeliveryStreamAlpha ( this , 'Delivery Stream' , {
113
119
deliveryStreamName : `${ name } -firehose-parquet` ,
114
120
sourceStream : stream ,
115
121
destinations : [ s3Destination ] ,
116
122
} )
117
123
124
+ const firehoseRole = firehoseDeliveryStream . node . findChild ( 'S3 Destination Role' ) as iam . Role ;
125
+ // firehoseRole.addToPolicy(
126
+ // new iam.PolicyStatement({
127
+ // effect: iam.Effect.ALLOW,
128
+ // resources: [
129
+ // glueDb.databaseArn,
130
+ // `arn:aws:glue:${Stack.of(this).region}:${Stack.of(this).account}:catalog`
131
+ // ],
132
+ // actions: ['glue:GetTable', 'glue:GetTableVersion'],
133
+ // })
134
+ // );
135
+ firehoseRole . addToPolicy (
136
+ new iam . PolicyStatement ( {
137
+ effect : iam . Effect . ALLOW ,
138
+ resources : [ '*' ] ,
139
+ actions : [ '*' ] ,
140
+ } )
141
+ ) ;
142
+
143
+ const firehoseDeliveryStreamCfn = firehoseDeliveryStream . node . defaultChild as CfnDeliveryStream ;
144
+ firehoseDeliveryStreamCfn . addPropertyOverride ( 'ExtendedS3DestinationConfiguration.DataFormatConversionConfiguration' , {
145
+ inputFormatConfiguration : {
146
+ deserializer : {
147
+ // These settings might need to be changed based on the use case
148
+ // This is the default settings when configured through the console
149
+ openXJsonSerDe : {
150
+ caseInsensitive : false ,
151
+ // Add hive keywords (e.g. timestamp) if they are added to events schema
152
+ columnToJsonKeyMappings : { } ,
153
+ convertDotsInJsonKeysToUnderscores : false ,
154
+ } ,
155
+ } ,
156
+ } ,
157
+ outputFormatConfiguration : {
158
+ serializer : {
159
+ parquetSerDe : {
160
+ compression : 'SNAPPY' ,
161
+ } ,
162
+ } ,
163
+ } ,
164
+ schemaConfiguration : {
165
+ databaseName : glueDb . databaseName ,
166
+ roleArn : firehoseRole . roleArn ,
167
+ tableName : glueTableName ,
168
+ } ,
169
+ } ) ;
170
+ firehoseDeliveryStreamCfn . node . addDependency ( firehoseRole ) ;
171
+
118
172
}
119
173
}
0 commit comments