Skip to content

Commit aa76aa8

Browse files
authored
Merge branch 'main' into feature/multi-language-support
2 parents fbca17a + 5377959 commit aa76aa8

39 files changed

+2554
-422
lines changed

data-collection/deploy/account-collector.yaml

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,9 @@ Resources:
128128
ZipFile: |
129129
import os
130130
import json
131+
import uuid
131132
import logging
133+
from datetime import datetime
132134
from functools import partial
133135
134136
import boto3
@@ -141,6 +143,9 @@ Resources:
141143
EXCLUDED_ACCOUNT_LIST_KEY = os.environ.get('EXCLUDED_ACCOUNT_LIST_KEY')
142144
EUC_ACCOUNTS = os.environ.get('EUC_ACCOUNT_IDS', '').strip()
143145
TMP_FILE = "/tmp/data.json"
146+
START_TIME = str(datetime.now().isoformat())
147+
LINKED_ACCOUNT_LIST_KEY = os.environ.get('LINKED_ACCOUNT_LIST_KEY', 'linked-account-list.json')
148+
PAYER_ACCOUNT_LIST_KEY = os.environ.get('PAYER_ACCOUNT_LIST_KEY', 'payer-account-list.json')
144149
145150
logger = logging.getLogger(__name__)
146151
logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO))
@@ -156,6 +161,8 @@ Resources:
156161
logger.error(message)
157162
raise Exception(message) #pylint: disable=broad-exception-raised
158163
164+
module = event.get("module", '').lower()
165+
params = event.get("params", '')
159166
functions = { # keep keys same as boto3 services
160167
'linked': iterate_linked_accounts,
161168
'euc': partial(iterate_accounts_with_filter, EUC_ACCOUNTS),
@@ -164,7 +171,7 @@ Resources:
164171
'compute-optimizer': partial(iterate_admins_accounts, 'compute-optimizer'),
165172
'backup': partial(iterate_admins_accounts, 'backup'),
166173
}
167-
account_type = event.get("Type", '').lower()
174+
account_type = event.get("type", '').lower()
168175
if account_type not in functions:
169176
raise Exception(f"Lambda event must have 'Type' parameter with value = ({list(functions.keys())})") #pylint: disable=broad-exception-raised
170177
@@ -173,6 +180,14 @@ Resources:
173180
count = 0
174181
f.write("[\n")
175182
for account in account_iterator():
183+
account['main_exe_uuid'] = event.get("main_exe_uuid", str(uuid.uuid4()))
184+
account['module'] = module
185+
account['bucket'] = BUCKET
186+
account['dc_account'] = boto3.client('sts').get_caller_identity()['Account']
187+
account['dc_region'] = boto3.session.Session().region_name
188+
account['params'] = params
189+
account['prefix'] = RESOURCE_PREFIX
190+
account['stack_version'] = event.get("stack_version", '')
176191
if count > 0:
177192
f.write(",\n")
178193
f.write(json.dumps(account))
@@ -183,6 +198,7 @@ Resources:
183198
raise Exception('No accounts found. Check the log.') #pylint: disable=broad-exception-raised
184199
185200
key = f"account-list/{account_type}-account-list.json"
201+
key = f"account-collector/{module+'-'+(params+'-' if params else '')+(LINKED_ACCOUNT_LIST_KEY if account_type == 'linked' else PAYER_ACCOUNT_LIST_KEY)}"
186202
s3 = boto3.client('s3')
187203
s3.upload_file(TMP_FILE, Bucket=BUCKET, Key=key)
188204
@@ -285,6 +301,7 @@ Resources:
285301
aws_secret_access_key=credentials['SecretAccessKey'],
286302
aws_session_token=credentials['SessionToken'],
287303
)
304+
288305
Handler: 'index.lambda_handler'
289306
MemorySize: 2688
290307
Timeout: 600
@@ -298,6 +315,7 @@ Resources:
298315
PREDEF_ACCOUNT_LIST_KEY: "account-list/account-list"
299316
EXCLUDED_ACCOUNT_LIST_KEY: "account-list/excluded-linked-account-list.csv"
300317
EUC_ACCOUNT_IDS: !Ref EUCAccountIDs
318+
301319
Metadata:
302320
cfn_nag:
303321
rules_to_suppress:
@@ -310,4 +328,4 @@ Resources:
310328
Type: AWS::Logs::LogGroup
311329
Properties:
312330
LogGroupName: !Sub "/aws/lambda/${LambdaFunction}"
313-
RetentionInDays: 60
331+
RetentionInDays: 60

data-collection/deploy/deploy-data-collection.yaml

Lines changed: 162 additions & 88 deletions
Large diffs are not rendered by default.

data-collection/deploy/deploy-data-read-permissions.yaml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# https://github.com/awslabs/cid-data-collection-framework/blob/main/data-collection/deploy/deploy-data-read-permissions.yaml
1+
# https://github.com/awslabs/cid-data-collection-framework/blob/main/data-collection/v3.11.0/deploy/deploy-data-read-permissions.yaml
22
AWSTemplateFormatVersion: '2010-09-09'
3-
Description: CID Data Collection - All-in-One for Management Account v3.9.1 - AWS Solution SO9011
3+
Description: CID Data Collection - All-in-One for Management Account v3.11.0 - AWS Solution SO9011
44
Metadata:
55
AWS::CloudFormation::Interface:
66
ParameterGroups:
@@ -105,7 +105,7 @@ Parameters:
105105
Default: "CID-DC-"
106106
CFNSourceBucket:
107107
Type: String
108-
Description: "DO NOT CHANGE - A bucket that contains WA-Labs CloudFormation templates. Must be allways 'aws-managed-cost-intelligence-dashboards'"
108+
Description: "DO NOT CHANGE - A bucket that contains WA-Labs CloudFormation templates. Must be always 'aws-managed-cost-intelligence-dashboards'"
109109
Default: "aws-managed-cost-intelligence-dashboards"
110110
IncludeBudgetsModule:
111111
Type: String
@@ -190,7 +190,7 @@ Resources:
190190
DataCollectorMgmtAccountReadStack:
191191
Type: AWS::CloudFormation::Stack
192192
Properties:
193-
TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/deploy-in-management-account.yaml"
193+
TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-management-account.yaml"
194194
Parameters:
195195
DataCollectionAccountID: !Ref DataCollectionAccountID
196196
ManagementAccountRole: !Ref ManagementAccountRole
@@ -206,7 +206,7 @@ Resources:
206206
Type: AWS::CloudFormation::Stack
207207
Condition: DeployModuleReadInMgmt
208208
Properties:
209-
TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/deploy-in-linked-account.yaml"
209+
TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-linked-account.yaml"
210210
Parameters:
211211
DataCollectionAccountID: !Ref DataCollectionAccountID
212212
MultiAccountRoleName: !Ref MultiAccountRoleName
@@ -223,7 +223,7 @@ Resources:
223223
DataCollectorOrgAccountModulesReadStackSet:
224224
Type: AWS::CloudFormation::StackSet
225225
Properties:
226-
Description: "StackSet in charge of deploying read roles across organization accounts v3.9.1"
226+
Description: "StackSet in charge of deploying read roles across organization accounts v3.11.0"
227227
PermissionModel: SERVICE_MANAGED
228228
AutoDeployment:
229229
Enabled: true
@@ -268,4 +268,4 @@ Resources:
268268
- CAPABILITY_IAM
269269
- CAPABILITY_NAMED_IAM
270270
StackSetName: !Sub "StackSet-${AWS::AccountId}-OptimizationDataRole"
271-
TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/deploy-in-linked-account.yaml"
271+
TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-linked-account.yaml"

data-collection/deploy/deploy-in-linked-account.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
AWSTemplateFormatVersion: '2010-09-09'
2-
Description: CID Data Collection - Role for Linked Account v3.9.1
2+
Description: CID Data Collection - Role for Linked Account v3.11.0
33
Metadata:
44
AWS::CloudFormation::Interface:
55
ParameterGroups:
@@ -262,6 +262,7 @@ Resources:
262262
- "ec2:DescribeInstances"
263263
- "ec2:DescribeVpcs"
264264
- "ec2:DescribeRegions"
265+
- "ec2:DescribeNetworkInterfaces"
265266
- "es:ListDomainNames"
266267
- "eks:ListClusters"
267268
- "eks:ListNodegroups"

data-collection/deploy/deploy-in-management-account.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
AWSTemplateFormatVersion: '2010-09-09'
2-
Description: CID Data Collection - Role for Management Account v3.9.1
2+
Description: CID Data Collection - Role for Management Account v3.11.0
33
Metadata:
44
AWS::CloudFormation::Interface:
55
ParameterGroups:

data-collection/deploy/module-aws-feeds.yaml

Lines changed: 79 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,18 @@ Resources:
111111
from dateutil.parser import parse
112112
import boto3
113113
114+
FEEDS_MAP = {
115+
"aws": {
116+
"path": "aws-feeds/aws-feeds-whats-new",
117+
"feed_url": "https://aws.amazon.com/about-aws/whats-new/recent/feed/"
118+
},
119+
"aws-cid": {
120+
"path": "aws-feeds/aws-cid-feeds-whats-new",
121+
"feed_url": "https://cid.workshops.aws.dev/feed/cloud-intelligence-dashboards.rss",
122+
"default_services": ["aws-cid"],
123+
}
124+
}
125+
114126
def clean_html(html_content):
115127
class MyParser(HTMLParser):
116128
def __init__(self):
@@ -136,81 +148,82 @@ Resources:
136148
return parser.text.strip() + '\n\n' + '\n'.join([f"[{index}]: {url}" for index, url in parser.ref.items()])
137149
138150
def lambda_handler(event, context):
139-
feed_url = os.environ['FEED_URL']
151+
feeds_list = os.environ['FEEDS_LIST'].split(',')
140152
bucket_name = os.environ['BUCKET_NAME']
141-
bucket_path = os.environ.get('BUCKET_PATH', '')
142153
143154
try:
144-
with urllib.request.urlopen(feed_url, timeout=10) as response: # nosec
145-
feed_data = response.read().decode('utf-8')
146-
147-
malicious_strings = ['!ENTITY', ':include']
148-
for string in malicious_strings:
149-
if string in feed_data:
150-
return {
151-
'statusCode': 400,
152-
'body': f'Malicious content detected in the XML feed: {string}'
153-
}
154-
155-
s3 = boto3.client('s3')
156-
root = ET.fromstring(feed_data) # nosec
157-
158-
date_grouped_records = {}
159-
160-
for item in root.findall('.//item'):
161-
try:
162-
link = item.find('link').text
163-
title = item.find('title').text
164-
description = item.find('description').text or ''
165-
pubDate = item.find('pubDate').text
166-
category = item.find('category').text or ''
155+
for entry in feeds_list:
156+
feed_url = FEEDS_MAP[entry]['feed_url']
157+
bucket_path = FEEDS_MAP[entry]['path']
158+
with urllib.request.urlopen(feed_url, timeout=10) as response: # nosec
159+
feed_data = response.read().decode('utf-8')
160+
161+
malicious_strings = ['!ENTITY', ':include']
162+
for string in malicious_strings:
163+
if string in feed_data:
164+
return {
165+
'statusCode': 400,
166+
'body': f'Malicious content detected in the XML feed: {string}'
167+
}
167168
168-
# Parsing and formatting pubDate to ISO 8601 format
169-
pubDate_datetime = parse(pubDate)
170-
formatted_date = pubDate_datetime.strftime('%Y-%m-%dT%H:%M:%SZ')
169+
s3 = boto3.client('s3')
170+
root = ET.fromstring(feed_data) # nosec
171171
172-
year, month, day = formatted_date[:10].split('-')
173-
date_key = f"{year}-{month}-{day}"
174-
description_cleaned = clean_html(description)
172+
date_grouped_records = {}
175173
176-
categories = category.split(',')
177-
services = []
178-
category_values = []
179-
180-
for cat in categories:
181-
if cat.startswith('general:products/'):
182-
services.append(cat.replace('general:products/', ''))
183-
elif cat.startswith('marketing:marchitecture/'):
184-
category_values.append(cat.replace('marketing:marchitecture/', ''))
185-
186-
for service in services:
187-
for category_value in category_values:
188-
json_record = {
189-
'link': link,
190-
'title': title,
191-
'description': description_cleaned,
192-
'date': formatted_date,
193-
'service': service,
194-
'category': category_value
195-
}
196-
if date_key not in date_grouped_records:
197-
date_grouped_records[date_key] = []
198-
date_grouped_records[date_key].append(json_record)
199-
200-
except Exception as e:
201-
print(f"Error processing item: {ET.tostring(item, encoding='unicode')}. Exception: {str(e)}")
174+
for item in root.findall('.//item'):
175+
try:
176+
link = item.find('link').text
177+
title = item.find('title').text
178+
description = item.find('description').text or ''
179+
pubDate = item.find('pubDate').text
180+
category = item.find('category').text or ''
181+
# Parsing and formatting pubDate to ISO 8601 format
182+
pubDate_datetime = parse(pubDate)
183+
formatted_date = pubDate_datetime.strftime('%Y-%m-%dT%H:%M:%SZ')
202184
203-
for date_key, records in date_grouped_records.items():
204-
year, month, day = date_key.split('-')
205-
json_lines = '\n'.join(json.dumps(record) for record in records)
206-
s3_key = f'{bucket_path}/year={year}/month={month}/day={day}/whats_new.jsonl'
207-
s3.put_object(Body=json_lines, Bucket=bucket_name, Key=s3_key)
185+
year, month, day = formatted_date[:10].split('-')
186+
date_key = f"{year}-{month}-{day}"
187+
description_cleaned = clean_html(description)
188+
189+
categories = category.split(',')
190+
services = FEEDS_MAP[entry].get('default_services', [])
191+
category_values = []
192+
193+
for cat in categories:
194+
if cat.startswith('general:products/'):
195+
services.append(cat.replace('general:products/', ''))
196+
elif cat.startswith('marketing:marchitecture/'):
197+
category_values.append(cat.replace('marketing:marchitecture/', ''))
198+
else:
199+
category_values = categories
200+
for service in services:
201+
for category_value in category_values:
202+
json_record = {
203+
'link': link,
204+
'title': title,
205+
'description': description_cleaned,
206+
'date': formatted_date,
207+
'service': service,
208+
'category': category_value
209+
}
210+
if date_key not in date_grouped_records:
211+
date_grouped_records[date_key] = []
212+
date_grouped_records[date_key].append(json_record)
213+
except Exception as e:
214+
print(f"Error processing item: {ET.tostring(item, encoding='unicode')}. Exception: {str(e)}")
215+
216+
for date_key, records in date_grouped_records.items():
217+
year, month, day = date_key.split('-')
218+
json_lines = '\n'.join(json.dumps(record) for record in records)
219+
s3_key = f'{bucket_path}/year={year}/month={month}/day={day}/whats_new.jsonl'
220+
s3.put_object(Body=json_lines, Bucket=bucket_name, Key=s3_key)
208221
209222
return {
210223
'statusCode': 200,
211224
'body': f'Feed downloaded and grouped by date then uploaded to S3 bucket {bucket_name}'
212225
}
213-
226+
214227
except urllib.error.URLError as e:
215228
return {
216229
'statusCode': 500,
@@ -231,15 +244,15 @@ Resources:
231244
'statusCode': 500,
232245
'body': f'Error processing feed: {str(e)}'
233246
}
247+
234248
Handler: 'index.lambda_handler'
235249
MemorySize: 256
236250
Timeout: 60
237251
Role: !GetAtt LambdaRole.Arn
238252
Environment:
239253
Variables:
240254
BUCKET_NAME: !Ref DestinationBucket
241-
BUCKET_PATH: "aws-feeds/aws-feeds-whats-new"
242-
FEED_URL: "https://aws.amazon.com/about-aws/whats-new/recent/feed/"
255+
FEEDS_LIST: "aws,aws-cid"
243256
Metadata:
244257
cfn_nag:
245258
rules_to_suppress:
@@ -702,6 +715,7 @@ Resources:
702715
Targets:
703716
S3Targets:
704717
- Path: !Sub "s3://${DestinationBucket}/aws-feeds/aws-feeds-whats-new/"
718+
- Path: !Sub "s3://${DestinationBucket}/aws-feeds/aws-cid-feeds-whats-new/"
705719
Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"}}}"
706720

707721
CrawlerBlogPost:

data-collection/deploy/module-backup.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ Resources:
320320
DeployRegion: !Ref AWS::Region
321321
Account: !Ref AWS::AccountId
322322
Prefix: !Ref ResourcePrefix
323+
Bucket: !Ref DestinationBucket
323324
'RefreshSchedule${AwsObject}':
324325
Type: AWS::Scheduler::Schedule
325326
Properties:
@@ -338,4 +339,4 @@ Resources:
338339
Type: Custom::LambdaAnalyticsExecutor
339340
Properties:
340341
ServiceToken: !Ref LambdaAnalyticsARN
341-
Name: !Ref CFDataName
342+
Name: !Ref CFDataName

data-collection/deploy/module-budgets.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ Resources:
290290
DeployRegion: !Ref AWS::Region
291291
Account: !Ref AWS::AccountId
292292
Prefix: !Ref ResourcePrefix
293+
Bucket: !Ref DestinationBucket
293294

294295
ModuleRefreshSchedule:
295296
Type: 'AWS::Scheduler::Schedule'

data-collection/deploy/module-compute-optimizer.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,11 @@ Resources:
329329
Status: Enabled
330330
NoncurrentVersionExpiration:
331331
NoncurrentDays: 1
332+
- Id: DeleteIncompleteMultipartUploadsAndExpiredDeleteMarkers
333+
Status: Enabled
334+
AbortIncompleteMultipartUpload:
335+
DaysAfterInitiation: 7
336+
ExpiredObjectDeleteMarker: true
332337
Tags: # Hacky way to manage dependencies
333338
- Key: IgnoreMeIamOnlyWorkaround
334339
Value: !GetAtt StackSetExecutionRole.Arn
@@ -496,6 +501,7 @@ Resources:
496501
DeployRegion: !Ref AWS::Region
497502
Account: !Ref AWS::AccountId
498503
Prefix: !Ref ResourcePrefix
504+
Bucket: !Ref DestinationBucket
499505

500506
ModuleRefreshSchedule:
501507
Type: 'AWS::Scheduler::Schedule'

data-collection/deploy/module-cost-anomaly.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ Resources:
425425
DeployRegion: !Ref AWS::Region
426426
Account: !Ref AWS::AccountId
427427
Prefix: !Ref ResourcePrefix
428+
Bucket: !Ref DestinationBucket
428429

429430
ModuleRefreshSchedule:
430431
Type: "AWS::Scheduler::Schedule"

0 commit comments

Comments
 (0)