Skip to content

Commit e6a749b

Browse files
wip
1 parent f879fc1 commit e6a749b

File tree

2 files changed

+63
-50
lines changed

2 files changed

+63
-50
lines changed

cdk/lib/cdk-stack.ts

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -115,53 +115,12 @@ export class CdkStack extends Stack {
115115
glueSecurityConfiguration: glueSecurityConfiguration,
116116
glueDb: glueDb,
117117
table: table,
118+
tableName: name,
118119
})
119120
break;
120121
default: throw new Error('kinesisFormat not supported');
121122
}
122123

123-
124-
125-
// parquet format
126-
// const s3Destination = new destinationsAlpha.S3Bucket(firehoseBucket, {
127-
// encryptionKey: kmsKey,
128-
// bufferingInterval: Duration.seconds(60),
129-
// processor: lambdaProcessor,
130-
// bufferingSize: Size.mebibytes(64),
131-
// });
132-
133-
134-
135-
// // https://5k-team.trilogy.com/hc/en-us/articles/360015651640-Configuring-Firehose-with-CDK
136-
// // https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-kinesisfirehose-deliverystream.html
137-
// const firehoseDeliveryStreamCfn = firehoseDeliveryStream.node.defaultChild as CfnDeliveryStream;
138-
// firehoseDeliveryStreamCfn.addPropertyOverride('ExtendedS3DestinationConfiguration.DataFormatConversionConfiguration', {
139-
// inputFormatConfiguration: {
140-
// deserializer: {
141-
// // These settings might need to be changed based on the use case
142-
// // This is the default settings when configured through the console
143-
// openXJsonSerDe: {
144-
// caseInsensitive: false,
145-
// // Add hive keywords (e.g. timestamp) if they are added to events schema
146-
// columnToJsonKeyMappings: {},
147-
// convertDotsInJsonKeysToUnderscores: false,
148-
// },
149-
// },
150-
// },
151-
// outputFormatConfiguration: {
152-
// serializer: {
153-
// parquetSerDe: {
154-
// compression: 'SNAPPY',
155-
// },
156-
// },
157-
// },
158-
// schemaConfiguration: {
159-
// databaseName: this.backendStack.glueStack.database.databaseName, // Target Glue database name
160-
// roleArn: this.deliveryStreamRole.roleArn,
161-
// tableName: this.backendStack.glueStack.eventsTable.tableName, // Target Glue table name
162-
// },
163-
// });
164-
165124
const athenaQueryResults = new s3.Bucket(this, 'query-results', {
166125
bucketName: `${name}-query-results`,
167126
encryptionKey: kmsKey,

cdk/lib/firehose/firehose-parquet.ts

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ import {
1111
aws_logs as logs,
1212
RemovalPolicy,
1313
Stack,
14+
Size,
1415
} from 'aws-cdk-lib';
1516
import {
16-
LambdaFunctionProcessor as LambdaFunctionProcessorAlpha,
1717
DeliveryStream as DeliveryStreamAlpha
1818
} from '@aws-cdk/aws-kinesisfirehose-alpha'
1919
import * as glueAlpha from '@aws-cdk/aws-glue-alpha'
20+
import { CfnDeliveryStream } from 'aws-cdk-lib/aws-kinesisfirehose';
2021

2122

2223
export interface FirehoseParquetProps {
@@ -28,13 +29,14 @@ export interface FirehoseParquetProps {
2829
glueSecurityConfiguration: glueAlpha.SecurityConfiguration
2930
glueDb: glueAlpha.Database
3031
table: dynamodb.ITable
32+
tableName: string
3133
}
3234

3335
export class FirehoseParquet extends Construct {
3436
constructor(scope: Construct, id: string, props: FirehoseParquetProps) {
3537
super(scope, id)
3638

37-
const { kmsKey, firehoseBucket, name, ddbChangesPrefix, stream, glueSecurityConfiguration, glueDb, table } = props
39+
const { kmsKey, firehoseBucket, name, ddbChangesPrefix, stream, glueSecurityConfiguration, glueDb, table, tableName } = props
3840
const roleName = `${name}-crawler-ddb-role`;
3941
const roleCrawlerddb = new iam.Role(this, 'roleCrawlerDdb', {
4042
roleName: roleName,
@@ -76,17 +78,15 @@ export class FirehoseParquet extends Construct {
7678
glueDb.catalogArn,
7779
glueDb.databaseArn,
7880
kmsKey.keyArn,
79-
firehoseBucket.bucketArn,
80-
`${firehoseBucket.bucketArn}/*`,
8181
glueCrawlerArn,
8282
table.tableArn,
8383
],
8484
actions: [
8585
'logs:*',
8686
'glue:*',
8787
'kms:Decrypt',
88-
'S3:*',
8988
'dynamodb:DescribeTable',
89+
'dynamodb:Scan',
9090
],
9191
})
9292
)
@@ -98,22 +98,76 @@ export class FirehoseParquet extends Construct {
9898
)
9999
glueSecurityConfiguration.node.addDependency(roleCrawlerddb)
100100

101-
102101
const s3Destination = new destinationsAlpha.S3Bucket(firehoseBucket, {
103102
encryptionKey: kmsKey,
104103
bufferingInterval: Duration.seconds(60),
105104
dataOutputPrefix: `${ddbChangesPrefix}/`,
106-
logGroup: new logs.LogGroup(this, 'firehose--parquet-s3-log-group', {
105+
logGroup: new logs.LogGroup(this, 'firehose-parquet-s3-log-group', {
107106
logGroupName: `${name}-firehose-parquet-s3-log-group`,
108107
removalPolicy: RemovalPolicy.DESTROY,
109108
}),
109+
bufferingSize: Size.mebibytes(64),
110110
})
111111

112-
new DeliveryStreamAlpha(this, 'Delivery Stream', {
112+
const glueTableName = tableName.replace(/-/g, '_')
113+
console.log(`glueTableName: ${glueTableName}`)
114+
// https://5k-team.trilogy.com/hc/en-us/articles/360015651640-Configuring-Firehose-with-CDK
115+
// https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-kinesisfirehose-deliverystream.html
116+
117+
118+
const firehoseDeliveryStream = new DeliveryStreamAlpha(this, 'Delivery Stream', {
113119
deliveryStreamName: `${name}-firehose-parquet`,
114120
sourceStream: stream,
115121
destinations: [s3Destination],
116122
})
117123

124+
const firehoseRole = firehoseDeliveryStream.node.findChild('S3 Destination Role') as iam.Role;
125+
// firehoseRole.addToPolicy(
126+
// new iam.PolicyStatement({
127+
// effect: iam.Effect.ALLOW,
128+
// resources: [
129+
// glueDb.databaseArn,
130+
// `arn:aws:glue:${Stack.of(this).region}:${Stack.of(this).account}:catalog`
131+
// ],
132+
// actions: ['glue:GetTable', 'glue:GetTableVersion'],
133+
// })
134+
// );
135+
firehoseRole.addToPolicy(
136+
new iam.PolicyStatement({
137+
effect: iam.Effect.ALLOW,
138+
resources: ['*'],
139+
actions: ['*'],
140+
})
141+
);
142+
143+
const firehoseDeliveryStreamCfn = firehoseDeliveryStream.node.defaultChild as CfnDeliveryStream;
144+
firehoseDeliveryStreamCfn.addPropertyOverride('ExtendedS3DestinationConfiguration.DataFormatConversionConfiguration', {
145+
inputFormatConfiguration: {
146+
deserializer: {
147+
// These settings might need to be changed based on the use case
148+
// This is the default settings when configured through the console
149+
openXJsonSerDe: {
150+
caseInsensitive: false,
151+
// Add hive keywords (e.g. timestamp) if they are added to events schema
152+
columnToJsonKeyMappings: {},
153+
convertDotsInJsonKeysToUnderscores: false,
154+
},
155+
},
156+
},
157+
outputFormatConfiguration: {
158+
serializer: {
159+
parquetSerDe: {
160+
compression: 'SNAPPY',
161+
},
162+
},
163+
},
164+
schemaConfiguration: {
165+
databaseName: glueDb.databaseName,
166+
roleArn: firehoseRole.roleArn,
167+
tableName: glueTableName,
168+
},
169+
});
170+
firehoseDeliveryStreamCfn.node.addDependency(firehoseRole);
171+
118172
}
119173
}

0 commit comments

Comments
 (0)