Skip to content

Commit f0d1a92

Browse files
authored
Added TA Priority data collection and table within TA module (#308)
1 parent 1d6678a commit f0d1a92

File tree

2 files changed

+187
-50
lines changed

2 files changed

+187
-50
lines changed

data-collection/deploy/deploy-in-linked-account.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ Resources:
164164
Action:
165165
- "support:DescribeTrustedAdvisorChecks"
166166
- "support:DescribeTrustedAdvisorCheckResult"
167+
- "trustedadvisor:ListRecommendations"
168+
- "trustedadvisor:ListRecommendationResources"
169+
- "trustedadvisor:GetRecommendation"
167170
Resource: "*" # Wildcard required as actions do not support resource-level permissions
168171
Roles:
169172
- Ref: LambdaRole

data-collection/deploy/module-trusted-advisor.yaml

Lines changed: 184 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -121,34 +121,32 @@ Resources:
121121
ZipFile: |
122122
import os
123123
import json
124+
import logging
124125
from datetime import date, datetime
125126
from json import JSONEncoder
126-
127127
import boto3
128128
from botocore.client import Config
129-
from botocore.exceptions import ClientError
130-
import logging
131129
132130
PREFIX = os.environ["PREFIX"]
133131
BUCKET = os.environ["BUCKET_NAME"]
134132
ROLE_NAME = os.environ['ROLENAME']
135133
COSTONLY = os.environ.get('COSTONLY', 'no').lower() == 'yes'
136134
TMP_FILE = "/tmp/data.json"
135+
TMP_FILE_Priority = "/tmp/data_priority.json"
137136
REGIONS = ["us-east-1"]
138137
139138
#config to avoid ThrottlingException
140139
config = Config(
141-
retries = {
142-
'max_attempts': 10,
143-
'mode': 'standard'
144-
}
140+
retries = {
141+
'max_attempts': 10,
142+
'mode': 'standard'
143+
}
145144
)
146145
147146
logger = logging.getLogger(__name__)
148147
logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO))
149148
150-
def lambda_handler(event, context):
151-
collection_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
149+
def lambda_handler(event, context): #pylint: disable=unused-argument
152150
if 'account' not in event:
153151
raise ValueError(
154152
"Please do not trigger this Lambda manually."
@@ -159,25 +157,31 @@ Resources:
159157
account_id = account["account_id"]
160158
account_name = account["account_name"]
161159
payer_id = account["payer_id"]
162-
logger.info(f"Collecting data for account: {account_id}")
163-
read_ta(account_id, account_name)
164-
upload_to_s3(account_id, payer_id)
165-
except Exception as e:
160+
161+
logger.info(f"Collecting TA for account: {account_id}")
162+
filename = read_ta(account_id, account_name)
163+
upload_to_s3(account_id, payer_id, "data", filename)
164+
165+
logger.info(f"Collecting TA Priority for account: {account_id}")
166+
filename = read_ta_priority(account_id, account_name)
167+
upload_to_s3(account_id, payer_id, "priority-data", filename)
168+
169+
except Exception as e: #pylint: disable=broad-exception-caught
166170
logging.warning(e)
167171
168-
def upload_to_s3(account_id, payer_id):
169-
if os.path.getsize(TMP_FILE) == 0:
170-
print(f"No data in file for {PREFIX}")
172+
def upload_to_s3(account_id, payer_id, suffix, tmp_file):
173+
key = datetime.now().strftime(
174+
f"{PREFIX}/{PREFIX}-{suffix}/payer_id={payer_id}/year=%Y/month=%m/{PREFIX}-{account_id}-%d%m%Y-%H%M%S.json"
175+
)
176+
177+
if os.path.getsize(tmp_file) == 0:
178+
print(f"No data in file for {tmp_file}")
171179
return
172-
d = datetime.now()
173-
month = d.strftime("%m")
174-
year = d.strftime("%Y")
175-
_date = d.strftime("%d%m%Y-%H%M%S")
176-
key = f"{PREFIX}/{PREFIX}-data/payer_id={payer_id}/year={year}/month={month}/{PREFIX}-{account_id}-{_date}.json"
180+
177181
try:
178-
boto3.client("s3").upload_file(TMP_FILE, BUCKET, key)
182+
boto3.client("s3").upload_file(tmp_file, BUCKET, key)
179183
print(f"Data for {account_id} in s3 - {key}")
180-
except Exception as e:
184+
except Exception as e: #pylint: disable=broad-exception-caught
181185
print(f"{type(e)}: {e}")
182186
183187
def assume_role(account_id, service, region, role):
@@ -195,35 +199,103 @@ Resources:
195199
)
196200
197201
def _json_serial(self, obj):
198-
if isinstance(obj, (datetime, date)): return obj.isoformat()
199-
return JSONEncoder.default(self, obj)
202+
return obj.isoformat() if isinstance(obj, (datetime, date)) else JSONEncoder.default(self, obj)
200203
201204
def read_ta(account_id, account_name):
202-
f = open(TMP_FILE, "w")
203-
support = assume_role(account_id, "support", REGIONS[0], ROLE_NAME)
204-
checks = support.describe_trusted_advisor_checks(language="en")["checks"]
205-
for check in checks:
206-
#print(json.dumps(check))
207-
if (COSTONLY and check.get("category") != "cost_optimizing"): continue
208-
try:
209-
result = support.describe_trusted_advisor_check_result(checkId=check["id"], language="en")['result']
210-
#print(json.dumps(result))
211-
if result.get("status") == "not_available": continue
212-
dt = result['timestamp']
213-
ts = datetime.strptime(dt, '%Y-%m-%dT%H:%M:%SZ').strftime('%s')
214-
for resource in result["flaggedResources"]:
215-
output = {}
216-
if "metadata" in resource:
217-
output.update(dict(zip(check["metadata"], resource["metadata"])))
218-
del resource['metadata']
219-
resource["Region"] = resource.pop("region") if "region" in resource else '-'
220-
resource["Status"] = resource.pop("status") if "status" in resource else '-'
221-
output.update({"AccountId":account_id, "AccountName":account_name, "Category": check["category"], 'DateTime': dt, 'Timestamp': ts, "CheckName": check["name"], "CheckId": check["id"]})
222-
output.update(resource)
223-
output = {k.lower(): v for k, v in output.items()}
224-
f.write(json.dumps(output, default=_json_serial) + "\n")
225-
except Exception as e:
226-
print(f'{type(e)}: {e}')
205+
with open(TMP_FILE, "w", encoding='utf-8') as f:
206+
support = assume_role(account_id, "support", REGIONS[0], ROLE_NAME)
207+
checks = support.describe_trusted_advisor_checks(language="en")["checks"]
208+
for check in checks:
209+
#print(json.dumps(check))
210+
if (COSTONLY and check.get("category") != "cost_optimizing"):
211+
continue
212+
try:
213+
result = support.describe_trusted_advisor_check_result(checkId=check["id"], language="en")['result']
214+
#print(json.dumps(result))
215+
if result.get("status") == "not_available":
216+
continue
217+
dt = result['timestamp']
218+
ts = datetime.strptime(dt, '%Y-%m-%dT%H:%M:%SZ').strftime('%s')
219+
for resource in result["flaggedResources"]:
220+
output = {}
221+
if "metadata" in resource:
222+
output.update(dict(zip(check["metadata"], resource["metadata"])))
223+
del resource['metadata']
224+
resource["Region"] = resource.pop("region") if "region" in resource else '-'
225+
resource["Status"] = resource.pop("status") if "status" in resource else '-'
226+
output.update({"AccountId":account_id, "AccountName":account_name, "Category": check["category"], 'DateTime': dt, 'Timestamp': ts, "CheckName": check["name"], "CheckId": check["id"]})
227+
output.update(resource)
228+
output = {k.lower(): v for k, v in output.items()}
229+
f.write(json.dumps(output, default=_json_serial) + "\n")
230+
except Exception as e: #pylint: disable=broad-exception-caught
231+
print(f'{type(e)}: {e}')
232+
return TMP_FILE
233+
234+
def _isoformat(date_value, default='N/A'):
235+
""" Converts a datetime value to ISO format string.
236+
"""
237+
return date_value.isoformat() if isinstance(date_value, datetime) else default
238+
239+
def read_ta_priority(account_id, account_name):
240+
""" Read recommendations and write to a file
241+
"""
242+
trustedadvisor = assume_role(account_id, "trustedadvisor", REGIONS[0], ROLE_NAME)
243+
try:
244+
recommendations = (trustedadvisor
245+
.get_paginator('list_recommendations')
246+
.paginate(type='priority')
247+
.search('recommendationSummaries[]')
248+
)
249+
with open(TMP_FILE_Priority, 'w', encoding='utf-8') as jsonfile:
250+
for recommendation in recommendations:
251+
# Get recommendation details including resolved date
252+
recommendation_details = (trustedadvisor
253+
.get_recommendation(recommendationIdentifier=recommendation['arn'])
254+
.get('recommendation', {})
255+
)
256+
# Get resources for this recommendation
257+
try:
258+
resources = list(trustedadvisor
259+
.get_paginator('list_recommendation_resources')
260+
.paginate(recommendationIdentifier=recommendation['arn'])
261+
.search('recommendationResourceSummaries[]')
262+
)
263+
except trustedadvisor.exceptions.ClientError as e:
264+
print(f"Error getting resources for recommendation {recommendation['arn']}: {str(e)}")
265+
resources = []
266+
267+
# Base recommendation data
268+
rec_data = {
269+
'recommendationArn': recommendation['arn'],
270+
'name': recommendation['name'],
271+
'description': recommendation_details['description'],
272+
'awsServices': recommendation.get('awsServices', 'N/A'),
273+
'createdAt': _isoformat(recommendation['createdAt']),
274+
'resolvedAt': _isoformat(recommendation_details['resolvedAt']),
275+
'lastUpdatedAt': _isoformat(recommendation['lastUpdatedAt']),
276+
'lifecycleStage': recommendation['lifecycleStage'],
277+
'recommendationStatus': recommendation['status'],
278+
'pillars': recommendation['pillars'],
279+
'source': recommendation['source'],
280+
'accountID': account_id,
281+
'accountName': account_name
282+
}
283+
for resource in (resources or [{}]):
284+
resource_data = rec_data.copy()
285+
resource_data.update({
286+
'awsResourceId': resource.get('awsResourceId', 'N/A'),
287+
'exclusionStatus': resource.get('exclusionStatus', 'N/A'),
288+
'regionCode': resource.get('regionCode', 'N/A'),
289+
'resourceStatus': resource.get('status', 'N/A')
290+
})
291+
jsonfile.write(json.dumps(resource_data) + '\n')
292+
293+
except Exception as e: #pylint: disable=broad-exception-caught
294+
print(f"Error processing TA-Priority: {str(e)}")
295+
raise
296+
return TMP_FILE_Priority
297+
298+
227299
Handler: 'index.lambda_handler'
228300
MemorySize: 2688
229301
Timeout: 300
@@ -257,8 +329,70 @@ Resources:
257329
Targets:
258330
S3Targets:
259331
- Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-data/"
332+
- Path: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-priority-data/"
260333
Configuration: "{\"Version\":1.0,\"Grouping\":{\"TableGroupingPolicy\":\"CombineCompatibleSchemas\"}}"
261334

335+
# Add glue table for trusted advisor priority data
336+
TAPriorityTable:
337+
Type: AWS::Glue::Table
338+
Properties:
339+
DatabaseName: !Ref DatabaseName
340+
CatalogId: !Ref AWS::AccountId
341+
TableInput:
342+
Name: trusted_advisor_priority_data
343+
TableType: EXTERNAL_TABLE
344+
StorageDescriptor:
345+
Columns:
346+
- Name: recommendationArn
347+
Type: string
348+
- Name: name
349+
Type: string
350+
- Name: description
351+
Type: string
352+
- Name: awsServices
353+
Type: string
354+
- Name: createdAt
355+
Type: string
356+
- Name: resolvedAt
357+
Type: string
358+
- Name: lastUpdatedAt
359+
Type: string
360+
- Name: lifecycleStage
361+
Type: string
362+
- Name: recommendationStatus
363+
Type: string
364+
- Name: pillars
365+
Type: string
366+
- Name: source
367+
Type: string
368+
- Name: accountID
369+
Type: string
370+
- Name: accountName
371+
Type: string
372+
- Name: awsResourceId
373+
Type: string
374+
- Name: exclusionStatus
375+
Type: string
376+
- Name: regionCode
377+
Type: string
378+
- Name: resourceStatus
379+
Type: string
380+
Location: !Sub "s3://${DestinationBucket}/${CFDataName}/${CFDataName}-priority-data/"
381+
InputFormat: org.apache.hadoop.mapred.TextInputFormat
382+
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
383+
Parameters:
384+
UPDATED_BY_CRAWLER: !Sub '${ResourcePrefix}${CFDataName}-Crawler'
385+
SerdeInfo:
386+
SerializationLibrary: org.openx.data.jsonserde.JsonSerDe
387+
Parameters:
388+
paths: recommendationArn,name,description,awsServices,createdAt,resolvedAt,lastUpdatedAt,lifecycleStage,recommendationStatus,pillars,source,accountID,accountName,awsResourceId,exclusionStatus,regionCode,resourceStatus
389+
PartitionKeys:
390+
- Name: payer_id
391+
Type: string
392+
- Name: year
393+
Type: string
394+
- Name: month
395+
Type: string
262396

263397
ModuleStepFunction:
264398
Type: AWS::StepFunctions::StateMachine

0 commit comments

Comments
 (0)