Skip to content

Commit f4dbfd0

Browse files
authored
Create cost_anomaly_data table when module is deployed (#194)
1 parent 4b4907c commit f4dbfd0

File tree

2 files changed

+83
-8
lines changed

2 files changed

+83
-8
lines changed

data-collection/deploy/deploy-data-collection.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,7 @@ Resources:
901901
CodeBucket: !If [ ProdCFNTemplateUsed, !FindInMap [RegionMap, !Ref "AWS::Region", CodeBucket], !Ref CFNSourceBucket ]
902902
StepFunctionTemplate: !FindInMap [StepFunctionCode, main-v2, TemplatePath]
903903
StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn
904+
LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn
904905
SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn
905906

906907
BackupModule:

data-collection/deploy/module-cost-anomaly.yaml

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ Parameters:
4747
SchedulerExecutionRoleARN:
4848
Type: String
4949
Description: Common role for module Scheduler execution
50+
LambdaManageGlueTableARN:
51+
Type: String
52+
Description: ARN of a Lambda for Managing GlueTable
5053

5154
Outputs:
5255
StepFunctionARN:
@@ -103,7 +106,7 @@ Resources:
103106
LambdaFunction:
104107
Type: AWS::Lambda::Function
105108
Properties:
106-
FunctionName: !Sub '${ResourcePrefix}${CFDataName}-Lambda'
109+
FunctionName: !Sub "${ResourcePrefix}${CFDataName}-Lambda"
107110
Description: !Sub "Lambda function to retrieve ${CFDataName}"
108111
Runtime: python3.10
109112
Architectures: [x86_64]
@@ -305,17 +308,88 @@ Resources:
305308
Crawler:
306309
Type: AWS::Glue::Crawler
307310
Properties:
308-
Name: !Sub '${ResourcePrefix}${CFDataName}-Crawler'
311+
Name: !Sub "${ResourcePrefix}${CFDataName}-Crawler"
309312
Role: !Ref GlueRoleARN
310313
DatabaseName: !Ref DatabaseName
311314
Targets:
312315
S3Targets:
313316
- Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/"
317+
Configuration: |
318+
{
319+
"Version": 1.0,
320+
"Grouping": {
321+
"TableGroupingPolicy": "CombineCompatibleSchemas"
322+
},
323+
"CrawlerOutput": {
324+
"Tables": {
325+
"TableThreshold": 1
326+
}
327+
}
328+
}
329+
330+
ModuleGlueTable:
331+
Type: Custom::ManageGlueTable
332+
Properties:
333+
ServiceToken: !Ref LambdaManageGlueTableARN
334+
TableInput:
335+
Name: cost_anomaly_data
336+
TableType: EXTERNAL_TABLE
337+
Parameters:
338+
classification: json
339+
compressionType: none
340+
PartitionKeys:
341+
- Name: payer_id
342+
Type: string
343+
- Name: year
344+
Type: string
345+
- Name: month
346+
Type: string
347+
- Name: day
348+
Type: string
349+
StorageDescriptor:
350+
Columns:
351+
- Name: anomalyid
352+
Type: string
353+
- Name: anomalystartdate
354+
Type: string
355+
- Name: anomalyenddate
356+
Type: string
357+
- Name: dimensionvalue
358+
Type: string
359+
- Name: maximpact
360+
Type: double
361+
- Name: totalactualspend
362+
Type: double
363+
- Name: totalexpectedspend
364+
Type: double
365+
- Name: totalimpact
366+
Type: double
367+
- Name: totalimpactpercentage
368+
Type: double
369+
- Name: monitorarn
370+
Type: string
371+
- Name: linkedaccount
372+
Type: string
373+
- Name: linkedaccountname
374+
Type: string
375+
- Name: region
376+
Type: string
377+
- Name: service
378+
Type: string
379+
- Name: usagetype
380+
Type: string
381+
InputFormat: org.apache.hadoop.mapred.TextInputFormat
382+
Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/"
383+
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
384+
SerdeInfo:
385+
Parameters:
386+
paths: anomalyid,anomalystartdate,anomalyenddate,dimensionvalue,maximpact,totalactualspend,totalexpectedspend,totalimpact,totalimpactpercentage,monitorarn,linkedaccount,linkedaccountname,region,service,usagetype
387+
SerializationLibrary: org.openx.data.jsonserde.JsonSerDe
314388

315389
ModuleStepFunction:
316390
Type: AWS::StepFunctions::StateMachine
317391
Properties:
318-
StateMachineName: !Sub '${ResourcePrefix}${CFDataName}-StateMachine'
392+
StateMachineName: !Sub "${ResourcePrefix}${CFDataName}-StateMachine"
319393
StateMachineType: STANDARD
320394
RoleArn: !Ref StepFunctionExecutionRoleARN
321395
DefinitionS3Location:
@@ -326,22 +400,22 @@ Resources:
326400
ModuleLambdaARN: !GetAtt LambdaFunction.Arn
327401
Crawlers: !Sub '["${ResourcePrefix}${CFDataName}-Crawler"]'
328402
CollectionType: "Payers"
329-
Params: ''
403+
Params: ""
330404
Module: !Ref CFDataName
331405
DeployRegion: !Ref AWS::Region
332406
Account: !Ref AWS::AccountId
333407
Prefix: !Ref ResourcePrefix
334408

335409
ModuleRefreshSchedule:
336-
Type: 'AWS::Scheduler::Schedule'
410+
Type: "AWS::Scheduler::Schedule"
337411
Properties:
338-
Description: !Sub 'Scheduler for the ODC ${CFDataName} module'
339-
Name: !Sub '${ResourcePrefix}${CFDataName}-RefreshSchedule'
412+
Description: !Sub "Scheduler for the ODC ${CFDataName} module"
413+
Name: !Sub "${ResourcePrefix}${CFDataName}-RefreshSchedule"
340414
ScheduleExpression: !Ref Schedule
341415
State: ENABLED
342416
FlexibleTimeWindow:
343417
MaximumWindowInMinutes: 30
344-
Mode: 'FLEXIBLE'
418+
Mode: "FLEXIBLE"
345419
Target:
346420
Arn: !GetAtt ModuleStepFunction.Arn
347421
RoleArn: !Ref SchedulerExecutionRoleARN

0 commit comments

Comments
 (0)