Skip to content

Commit 7746a4d

Browse files
esc1144iakov-aws
andauthored
Instrumentation phase 1 (#275)
* Data Collection logging phase 1 * codify new SF version name * tweak account collector logging * Sync account collector with fix for manual account list * Add backoff/retry to standalone state machine * add more retry buffer * Misc cleanup * Temporarily re-indent the over indent to make the diff easier to read * Switch to pre-defined table * Fix merge issue * Merge main, normalize linked sf code file names * Interim commit * Checkpoint * beta * Pre-main merge instrumentation * Post merge main and cleanup * Fix misplaced space insertion * Refine Health detail execution logging * Add policy condition for CW logging * Update data-collection/deploy/deploy-data-collection.yaml * Tweak to output error message * Scale back Lambda-based monitoring * Renenable testing deploy of CaseSummary * Misc cleanup for easier merge * Misc cleanup for easier merge, 2 * Health SF correction from reversion of code --------- Co-authored-by: Iakov Gan <[email protected]> Co-authored-by: Iakov GAN <[email protected]>
1 parent 9e5ff48 commit 7746a4d

28 files changed

+1542
-136
lines changed

data-collection/deploy/account-collector.yaml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@ Resources:
128128
ZipFile: |
129129
import os
130130
import json
131+
import uuid
131132
import logging
133+
from datetime import datetime
132134
from functools import partial
133135
134136
import boto3
@@ -141,6 +143,7 @@ Resources:
141143
EXCLUDED_ACCOUNT_LIST_KEY = os.environ.get('EXCLUDED_ACCOUNT_LIST_KEY')
142144
EUC_ACCOUNTS = os.environ.get('EUC_ACCOUNT_IDS', '').strip()
143145
TMP_FILE = "/tmp/data.json"
146+
START_TIME = str(datetime.now().isoformat())
144147
145148
logger = logging.getLogger(__name__)
146149
logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO))
@@ -164,7 +167,7 @@ Resources:
164167
'compute-optimizer': partial(iterate_admins_accounts, 'compute-optimizer'),
165168
'backup': partial(iterate_admins_accounts, 'backup'),
166169
}
167-
account_type = event.get("Type", '').lower()
170+
account_type = event.get("type", '').lower()
168171
if account_type not in functions:
169172
raise Exception(f"Lambda event must have 'Type' parameter with value = ({list(functions.keys())})") #pylint: disable=broad-exception-raised
170173
@@ -173,6 +176,14 @@ Resources:
173176
count = 0
174177
f.write("[\n")
175178
for account in account_iterator():
179+
account['main_exe_uuid'] = event.get("main_exe_uuid", str(uuid.uuid4()))
180+
account['module'] = event.get("module", '').lower()
181+
account['bucket'] = BUCKET
182+
account['dc_account'] = boto3.client('sts').get_caller_identity()['Account']
183+
account['dc_region'] = boto3.session.Session().region_name
184+
account['params'] = event.get("params", '')
185+
account['prefix'] = RESOURCE_PREFIX
186+
account['stack_version'] = event.get("stack_version", '')
176187
if count > 0:
177188
f.write(",\n")
178189
f.write(json.dumps(account))
@@ -285,6 +296,7 @@ Resources:
285296
aws_secret_access_key=credentials['SecretAccessKey'],
286297
aws_session_token=credentials['SessionToken'],
287298
)
299+
288300
Handler: 'index.lambda_handler'
289301
MemorySize: 2688
290302
Timeout: 600
@@ -298,6 +310,7 @@ Resources:
298310
PREDEF_ACCOUNT_LIST_KEY: "account-list/account-list"
299311
EXCLUDED_ACCOUNT_LIST_KEY: "account-list/excluded-linked-account-list.csv"
300312
EUC_ACCOUNT_IDS: !Ref EUCAccountIDs
313+
301314
Metadata:
302315
cfn_nag:
303316
rules_to_suppress:
@@ -310,4 +323,4 @@ Resources:
310323
Type: AWS::Logs::LogGroup
311324
Properties:
312325
LogGroupName: !Sub "/aws/lambda/${LambdaFunction}"
313-
RetentionInDays: 60
326+
RetentionInDays: 60

data-collection/deploy/deploy-data-collection.yaml

Lines changed: 97 additions & 27 deletions
Large diffs are not rendered by default.

data-collection/deploy/module-aws-feeds.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -932,4 +932,4 @@ Resources:
932932
Type: Custom::LambdaAnalyticsExecutor
933933
Properties:
934934
ServiceToken: !Ref LambdaAnalyticsARN
935-
Name: !Ref CFDataName
935+
Name: !Ref CFDataName

data-collection/deploy/module-backup.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ Resources:
320320
DeployRegion: !Ref AWS::Region
321321
Account: !Ref AWS::AccountId
322322
Prefix: !Ref ResourcePrefix
323+
Bucket: !Ref DestinationBucket
323324
'RefreshSchedule${AwsObject}':
324325
Type: AWS::Scheduler::Schedule
325326
Properties:
@@ -338,4 +339,4 @@ Resources:
338339
Type: Custom::LambdaAnalyticsExecutor
339340
Properties:
340341
ServiceToken: !Ref LambdaAnalyticsARN
341-
Name: !Ref CFDataName
342+
Name: !Ref CFDataName

data-collection/deploy/module-budgets.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ Resources:
290290
DeployRegion: !Ref AWS::Region
291291
Account: !Ref AWS::AccountId
292292
Prefix: !Ref ResourcePrefix
293+
Bucket: !Ref DestinationBucket
293294

294295
ModuleRefreshSchedule:
295296
Type: 'AWS::Scheduler::Schedule'

data-collection/deploy/module-compute-optimizer.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ Resources:
501501
DeployRegion: !Ref AWS::Region
502502
Account: !Ref AWS::AccountId
503503
Prefix: !Ref ResourcePrefix
504+
Bucket: !Ref DestinationBucket
504505

505506
ModuleRefreshSchedule:
506507
Type: 'AWS::Scheduler::Schedule'

data-collection/deploy/module-cost-anomaly.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ Resources:
425425
DeployRegion: !Ref AWS::Region
426426
Account: !Ref AWS::AccountId
427427
Prefix: !Ref ResourcePrefix
428+
Bucket: !Ref DestinationBucket
428429

429430
ModuleRefreshSchedule:
430431
Type: "AWS::Scheduler::Schedule"

data-collection/deploy/module-cost-explorer-rightsizing.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ Resources:
262262
DeployRegion: !Ref AWS::Region
263263
Account: !Ref AWS::AccountId
264264
Prefix: !Ref ResourcePrefix
265+
Bucket: !Ref DestinationBucket
265266

266267
ModuleRefreshSchedule:
267268
Type: 'AWS::Scheduler::Schedule'

data-collection/deploy/module-ecs-chargeback.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ Resources:
274274
DeployRegion: !Ref AWS::Region
275275
Account: !Ref AWS::AccountId
276276
Prefix: !Ref ResourcePrefix
277+
Bucket: !Ref DestinationBucket
277278

278279
ModuleRefreshSchedule:
279280
Type: 'AWS::Scheduler::Schedule'

data-collection/deploy/module-health-events.yaml

Lines changed: 10 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ Parameters:
5151
Type: String
5252
Description: "ARNs of KMS Keys for data buckets and/or Glue Catalog. Comma separated list, no spaces. Keep empty if data Buckets and Glue Catalog are not Encrypted with KMS. You can also set it to '*' to grant decrypt permission for all the keys."
5353
Default: ""
54+
DetailStepFunctionTemplate:
55+
Type: String
56+
Description: JSON representation of the detail retrieval StepFunction template
5457

5558
Conditions:
5659
NeedDataBucketsKms: !Not [ !Equals [ !Ref DataBucketsKmsKeysArns, "" ] ]
@@ -479,6 +482,7 @@ Resources:
479482
DeployRegion: !Ref AWS::Region
480483
Account: !Ref AWS::AccountId
481484
Prefix: !Ref ResourcePrefix
485+
Bucket: !Ref DestinationBucket
482486

483487
ModuleRefreshSchedule:
484488
Type: 'AWS::Scheduler::Schedule'
@@ -497,84 +501,12 @@ Resources:
497501
StepFunctionDetail:
498502
Type: AWS::StepFunctions::StateMachine
499503
Properties:
500-
StateMachineName: !Sub '${ResourcePrefix}${CFDataName}-detail-StateMachine'
504+
StateMachineName: !Sub '${ResourcePrefix}${CFDataName}-StateMachine'
501505
StateMachineType: STANDARD
502506
RoleArn: !Ref StepFunctionExecutionRoleARN
503-
DefinitionString: |
504-
{
505-
"Comment": "Collects Health Events",
506-
"StartAt": "DetailProcessor Map",
507-
"States": {
508-
"DetailProcessor Map": {
509-
"Type": "Map",
510-
"ItemProcessor": {
511-
"ProcessorConfig": {
512-
"Mode": "DISTRIBUTED",
513-
"ExecutionType": "STANDARD"
514-
},
515-
"StartAt": "DetailLambda Invoke",
516-
"States": {
517-
"DetailLambda Invoke": {
518-
"Type": "Task",
519-
"Resource": "arn:${Partition}:states:::lambda:invoke",
520-
"OutputPath": "$.Payload",
521-
"Parameters": {
522-
"Payload.$": "$",
523-
"FunctionName": "${ModuleLambdaARN}"
524-
},
525-
"Retry": [
526-
{
527-
"ErrorEquals": [
528-
"Lambda.ServiceException",
529-
"Lambda.AWSLambdaException",
530-
"Lambda.SdkClientException",
531-
"Lambda.TooManyRequestsException"
532-
],
533-
"IntervalSeconds": 1,
534-
"MaxAttempts": 3,
535-
"BackoffRate": 2
536-
}
537-
],
538-
"End": true
539-
}
540-
}
541-
},
542-
"Label": "DetailProcessorMap",
543-
"MaxConcurrency": ${MaxConcurrentBatches},
544-
"ItemReader": {
545-
"Resource": "arn:${Partition}:states:::s3:getObject",
546-
"ReaderConfig": {
547-
"InputType": "CSV",
548-
"CSVHeaderLocation": "FIRST_ROW"
549-
},
550-
"Parameters": {
551-
"Bucket.$": "$.bucket",
552-
"Key.$": "$.file"
553-
}
554-
},
555-
"ItemBatcher": {
556-
"MaxItemsPerBatch": ${ItemsPerBatch},
557-
"BatchInput": {
558-
"account.$": "$.account",
559-
"ingestion_time.$": "$.ingestion_time"
560-
}
561-
},
562-
"Next": "CrawlerStepFunctionStartExecution"
563-
},
564-
"CrawlerStepFunctionStartExecution": {
565-
"Type": "Task",
566-
"Resource": "arn:${Partition}:states:::states:startExecution.sync:2",
567-
"Parameters": {
568-
"StateMachineArn": "arn:${Partition}:states:${DeployRegion}:${Account}:stateMachine:${Prefix}CrawlerExecution-StateMachine",
569-
"Input": {
570-
"crawlers": ${Crawlers}
571-
}
572-
},
573-
"End": true
574-
}
575-
},
576-
"TimeoutSeconds": 14400
577-
}
507+
DefinitionS3Location:
508+
Bucket: !Ref CodeBucket
509+
Key: !Ref DetailStepFunctionTemplate
578510
DefinitionSubstitutions:
579511
ModuleLambdaARN: !GetAtt LambdaFunction.Arn
580512
Crawlers: !Sub '["${ResourcePrefix}${CFDataName}-detail-Crawler"]'
@@ -587,6 +519,7 @@ Resources:
587519
ItemsPerBatch: 50
588520
MaxConcurrentBatches: 1
589521
Partition: !Ref AWS::Partition
522+
Bucket: !Ref DestinationBucket
590523
Metadata:
591524
cfn-lint:
592525
config:
@@ -596,4 +529,4 @@ Resources:
596529
Type: Custom::LambdaAnalyticsExecutor
597530
Properties:
598531
ServiceToken: !Ref LambdaAnalyticsARN
599-
Name: !Ref CFDataName
532+
Name: !Ref CFDataName

0 commit comments

Comments
 (0)