@@ -47,6 +47,9 @@ Parameters:
4747 SchedulerExecutionRoleARN :
4848 Type : String
4949 Description : Common role for module Scheduler execution
50+ LambdaManageGlueTableARN :
51+ Type : String
52+ Description : ARN of a Lambda for Managing GlueTable
5053
5154Outputs :
5255 StepFunctionARN :
@@ -103,7 +106,7 @@ Resources:
103106 LambdaFunction :
104107 Type : AWS::Lambda::Function
105108 Properties :
106- FunctionName : !Sub ' ${ResourcePrefix}${CFDataName}-Lambda'
109+ FunctionName : !Sub " ${ResourcePrefix}${CFDataName}-Lambda"
107110 Description : !Sub "Lambda function to retrieve ${CFDataName}"
108111 Runtime : python3.10
109112 Architectures : [x86_64]
@@ -305,17 +308,88 @@ Resources:
305308 Crawler :
306309 Type : AWS::Glue::Crawler
307310 Properties :
308- Name : !Sub ' ${ResourcePrefix}${CFDataName}-Crawler'
311+ Name : !Sub " ${ResourcePrefix}${CFDataName}-Crawler"
309312 Role : !Ref GlueRoleARN
310313 DatabaseName : !Ref DatabaseName
311314 Targets :
312315 S3Targets :
313316 - Path : !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/"
317+ Configuration : |
318+ {
319+ "Version": 1.0,
320+ "Grouping": {
321+ "TableGroupingPolicy": "CombineCompatibleSchemas"
322+ },
323+ "CrawlerOutput": {
324+ "Tables": {
325+ "TableThreshold": 1
326+ }
327+ }
328+ }
329+
330+ ModuleGlueTable :
331+ Type : Custom::ManageGlueTable
332+ Properties :
333+ ServiceToken : !Ref LambdaManageGlueTableARN
334+ TableInput :
335+ Name : cost_anomaly_data
336+ TableType : EXTERNAL_TABLE
337+ Parameters :
338+ classification : json
339+ compressionType : none
340+ PartitionKeys :
341+ - Name : payer_id
342+ Type : string
343+ - Name : year
344+ Type : string
345+ - Name : month
346+ Type : string
347+ - Name : day
348+ Type : string
349+ StorageDescriptor :
350+ Columns :
351+ - Name : anomalyid
352+ Type : string
353+ - Name : anomalystartdate
354+ Type : string
355+ - Name : anomalyenddate
356+ Type : string
357+ - Name : dimensionvalue
358+ Type : string
359+ - Name : maximpact
360+ Type : double
361+ - Name : totalactualspend
362+ Type : double
363+ - Name : totalexpectedspend
364+ Type : double
365+ - Name : totalimpact
366+ Type : double
367+ - Name : totalimpactpercentage
368+ Type : double
369+ - Name : monitorarn
370+ Type : string
371+ - Name : linkedaccount
372+ Type : string
373+ - Name : linkedaccountname
374+ Type : string
375+ - Name : region
376+ Type : string
377+ - Name : service
378+ Type : string
379+ - Name : usagetype
380+ Type : string
381+ InputFormat : org.apache.hadoop.mapred.TextInputFormat
382+ Location : !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/"
383+ OutputFormat : org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
384+ SerdeInfo :
385+ Parameters :
386+ paths : anomalyid,anomalystartdate,anomalyenddate,dimensionvalue,maximpact,totalactualspend,totalexpectedspend,totalimpact,totalimpactpercentage,monitorarn,linkedaccount,linkedaccountname,region,service,usagetype
387+ SerializationLibrary : org.openx.data.jsonserde.JsonSerDe
314388
315389 ModuleStepFunction :
316390 Type : AWS::StepFunctions::StateMachine
317391 Properties :
318- StateMachineName : !Sub ' ${ResourcePrefix}${CFDataName}-StateMachine'
392+ StateMachineName : !Sub " ${ResourcePrefix}${CFDataName}-StateMachine"
319393 StateMachineType : STANDARD
320394 RoleArn : !Ref StepFunctionExecutionRoleARN
321395 DefinitionS3Location :
@@ -326,22 +400,22 @@ Resources:
326400 ModuleLambdaARN : !GetAtt LambdaFunction.Arn
327401 Crawlers : !Sub '["${ResourcePrefix}${CFDataName}-Crawler"]'
328402 CollectionType : " Payers"
329- Params : ' '
403+ Params : " "
330404 Module : !Ref CFDataName
331405 DeployRegion : !Ref AWS::Region
332406 Account : !Ref AWS::AccountId
333407 Prefix : !Ref ResourcePrefix
334408
335409 ModuleRefreshSchedule :
336- Type : ' AWS::Scheduler::Schedule'
410+ Type : " AWS::Scheduler::Schedule"
337411 Properties :
338- Description : !Sub ' Scheduler for the ODC ${CFDataName} module'
339- Name : !Sub ' ${ResourcePrefix}${CFDataName}-RefreshSchedule'
412+ Description : !Sub " Scheduler for the ODC ${CFDataName} module"
413+ Name : !Sub " ${ResourcePrefix}${CFDataName}-RefreshSchedule"
340414 ScheduleExpression : !Ref Schedule
341415 State : ENABLED
342416 FlexibleTimeWindow :
343417 MaximumWindowInMinutes : 30
344- Mode : ' FLEXIBLE'
418+ Mode : " FLEXIBLE"
345419 Target :
346420 Arn : !GetAtt ModuleStepFunction.Arn
347421 RoleArn : !Ref SchedulerExecutionRoleARN
0 commit comments