aws-solutions-library-samples
diff --git a/‎data-collection/deploy/account-collector.yaml‎
Lines changed: 20 additions & 2 deletions b/‎data-collection/deploy/account-collector.yaml‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎data-collection/deploy/deploy-data-collection.yaml‎
Lines changed: 162 additions & 88 deletions b/‎data-collection/deploy/deploy-data-collection.yaml‎
Lines changed: 162 additions & 88 deletions
diff --git a/‎data-collection/deploy/deploy-data-read-permissions.yaml‎
Lines changed: 7 additions & 7 deletions b/‎data-collection/deploy/deploy-data-read-permissions.yaml‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎data-collection/deploy/deploy-in-linked-account.yaml‎
Lines changed: 2 additions & 1 deletion b/‎data-collection/deploy/deploy-in-linked-account.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎data-collection/deploy/deploy-in-management-account.yaml‎
Lines changed: 1 addition & 1 deletion b/‎data-collection/deploy/deploy-in-management-account.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎data-collection/deploy/module-aws-feeds.yaml‎
Lines changed: 79 additions & 65 deletions b/‎data-collection/deploy/module-aws-feeds.yaml‎
Lines changed: 79 additions & 65 deletions
diff --git a/‎data-collection/deploy/module-backup.yaml‎
Lines changed: 2 additions & 1 deletion b/‎data-collection/deploy/module-backup.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎data-collection/deploy/module-budgets.yaml‎
Lines changed: 1 addition & 0 deletions b/‎data-collection/deploy/module-budgets.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎data-collection/deploy/module-compute-optimizer.yaml‎
Lines changed: 6 additions & 0 deletions b/‎data-collection/deploy/module-compute-optimizer.yaml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎data-collection/deploy/module-cost-anomaly.yaml‎
Lines changed: 1 addition & 0 deletions b/‎data-collection/deploy/module-cost-anomaly.yaml‎
Lines changed: 1 addition & 0 deletions
@@ -128,7 +128,9 @@ Resources:
           ZipFile: |
             import os
             import json
+            import uuid
             import logging
+            from datetime import datetime
             from functools import partial
 
             import boto3
@@ -141,6 +143,9 @@ Resources:
             EXCLUDED_ACCOUNT_LIST_KEY = os.environ.get('EXCLUDED_ACCOUNT_LIST_KEY')
             EUC_ACCOUNTS = os.environ.get('EUC_ACCOUNT_IDS', '').strip()
             TMP_FILE = "/tmp/data.json"
+            START_TIME = str(datetime.now().isoformat())
+            LINKED_ACCOUNT_LIST_KEY = os.environ.get('LINKED_ACCOUNT_LIST_KEY', 'linked-account-list.json')
+            PAYER_ACCOUNT_LIST_KEY = os.environ.get('PAYER_ACCOUNT_LIST_KEY', 'payer-account-list.json')
 
             logger = logging.getLogger(__name__)
             logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO))
@@ -156,6 +161,8 @@ Resources:
                     logger.error(message)
                     raise Exception(message) #pylint: disable=broad-exception-raised
 
+                module = event.get("module", '').lower()
+                params = event.get("params", '')
                 functions = { # keep keys same as boto3 services
                     'linked': iterate_linked_accounts,
                     'euc': partial(iterate_accounts_with_filter, EUC_ACCOUNTS),
@@ -164,7 +171,7 @@ Resources:
                     'compute-optimizer': partial(iterate_admins_accounts, 'compute-optimizer'),
                     'backup': partial(iterate_admins_accounts, 'backup'),
                 }
-                account_type = event.get("Type", '').lower()
+                account_type = event.get("type", '').lower()
                 if account_type not in functions:
                     raise Exception(f"Lambda event must have 'Type' parameter with value = ({list(functions.keys())})") #pylint: disable=broad-exception-raised
 
@@ -173,6 +180,14 @@ Resources:
                     count = 0
                     f.write("[\n")
                     for account in account_iterator():
+                        account['main_exe_uuid'] = event.get("main_exe_uuid", str(uuid.uuid4()))
+                        account['module'] = module
+                        account['bucket'] = BUCKET
+                        account['dc_account'] = boto3.client('sts').get_caller_identity()['Account']
+                        account['dc_region'] = boto3.session.Session().region_name
+                        account['params'] = params
+                        account['prefix'] = RESOURCE_PREFIX
+                        account['stack_version'] = event.get("stack_version", '')
                         if count > 0:
                             f.write(",\n")
                         f.write(json.dumps(account))
@@ -183,6 +198,7 @@ Resources:
                     raise Exception('No accounts found. Check the log.') #pylint: disable=broad-exception-raised
 
                 key = f"account-list/{account_type}-account-list.json"
+                key = f"account-collector/{module+'-'+(params+'-' if params else '')+(LINKED_ACCOUNT_LIST_KEY if account_type == 'linked' else PAYER_ACCOUNT_LIST_KEY)}"
                 s3 = boto3.client('s3')
                 s3.upload_file(TMP_FILE, Bucket=BUCKET, Key=key)
 
@@ -285,6 +301,7 @@ Resources:
                     aws_secret_access_key=credentials['SecretAccessKey'],
                     aws_session_token=credentials['SessionToken'],
                 )
+
       Handler: 'index.lambda_handler'
       MemorySize: 2688
       Timeout: 600
@@ -298,6 +315,7 @@ Resources:
           PREDEF_ACCOUNT_LIST_KEY: "account-list/account-list"
           EXCLUDED_ACCOUNT_LIST_KEY: "account-list/excluded-linked-account-list.csv"
           EUC_ACCOUNT_IDS: !Ref EUCAccountIDs
+
     Metadata:
       cfn_nag:
         rules_to_suppress:
@@ -310,4 +328,4 @@ Resources:
     Type: AWS::Logs::LogGroup
     Properties:
       LogGroupName: !Sub "/aws/lambda/${LambdaFunction}"
-      RetentionInDays: 60
+      RetentionInDays: 60
@@ -1,6 +1,6 @@
-# https://github.com/awslabs/cid-data-collection-framework/blob/main/data-collection/deploy/deploy-data-read-permissions.yaml
+# https://github.com/awslabs/cid-data-collection-framework/blob/main/data-collection/v3.11.0/deploy/deploy-data-read-permissions.yaml
 AWSTemplateFormatVersion: '2010-09-09'
-Description: CID Data Collection - All-in-One for Management Account v3.9.1 - AWS Solution SO9011
+Description: CID Data Collection - All-in-One for Management Account v3.11.0 - AWS Solution SO9011
 Metadata:
   AWS::CloudFormation::Interface:
     ParameterGroups:
@@ -105,7 +105,7 @@ Parameters:
     Default: "CID-DC-"
   CFNSourceBucket:
     Type: String
-    Description: "DO NOT CHANGE - A bucket that contains WA-Labs CloudFormation templates. Must be allways 'aws-managed-cost-intelligence-dashboards'"
+    Description: "DO NOT CHANGE - A bucket that contains WA-Labs CloudFormation templates. Must be always 'aws-managed-cost-intelligence-dashboards'"
     Default: "aws-managed-cost-intelligence-dashboards"
   IncludeBudgetsModule:
     Type: String
@@ -190,7 +190,7 @@ Resources:
   DataCollectorMgmtAccountReadStack:
     Type: AWS::CloudFormation::Stack
     Properties:
-      TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/deploy-in-management-account.yaml"
+      TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-management-account.yaml"
       Parameters:
         DataCollectionAccountID: !Ref DataCollectionAccountID
         ManagementAccountRole: !Ref ManagementAccountRole
@@ -206,7 +206,7 @@ Resources:
     Type: AWS::CloudFormation::Stack
     Condition: DeployModuleReadInMgmt
     Properties:
-      TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/deploy-in-linked-account.yaml"
+      TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-linked-account.yaml"
       Parameters:
         DataCollectionAccountID: !Ref DataCollectionAccountID
         MultiAccountRoleName: !Ref MultiAccountRoleName
@@ -223,7 +223,7 @@ Resources:
   DataCollectorOrgAccountModulesReadStackSet:
     Type: AWS::CloudFormation::StackSet
     Properties:
-      Description: "StackSet in charge of deploying read roles across organization accounts v3.9.1"
+      Description: "StackSet in charge of deploying read roles across organization accounts v3.11.0"
       PermissionModel: SERVICE_MANAGED
       AutoDeployment:
         Enabled: true
@@ -268,4 +268,4 @@ Resources:
         - CAPABILITY_IAM
         - CAPABILITY_NAMED_IAM
       StackSetName: !Sub "StackSet-${AWS::AccountId}-OptimizationDataRole"
-      TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/deploy-in-linked-account.yaml"
+      TemplateURL: !Sub "https://${CFNSourceBucket}.s3.${AWS::URLSuffix}/cfn/data-collection/v3.11.0/deploy-in-linked-account.yaml"
@@ -1,5 +1,5 @@
 AWSTemplateFormatVersion: '2010-09-09'
-Description: CID Data Collection - Role for Linked Account v3.9.1
+Description: CID Data Collection - Role for Linked Account v3.11.0
 Metadata:
   AWS::CloudFormation::Interface:
     ParameterGroups:
@@ -262,6 +262,7 @@ Resources:
               - "ec2:DescribeInstances"
               - "ec2:DescribeVpcs"
               - "ec2:DescribeRegions"
+              - "ec2:DescribeNetworkInterfaces"
               - "es:ListDomainNames"
               - "eks:ListClusters"
               - "eks:ListNodegroups"
 
@@ -1,5 +1,5 @@
 AWSTemplateFormatVersion: '2010-09-09'
-Description: CID Data Collection - Role for Management Account v3.9.1
+Description: CID Data Collection - Role for Management Account v3.11.0
 Metadata:
   AWS::CloudFormation::Interface:
     ParameterGroups:
 
@@ -111,6 +111,18 @@ Resources:
           from dateutil.parser import parse
           import boto3
 
+          FEEDS_MAP = {
+              "aws": {
+                  "path": "aws-feeds/aws-feeds-whats-new",
+                  "feed_url": "https://aws.amazon.com/about-aws/whats-new/recent/feed/"
+              },
+              "aws-cid": {
+                  "path": "aws-feeds/aws-cid-feeds-whats-new",
+                  "feed_url": "https://cid.workshops.aws.dev/feed/cloud-intelligence-dashboards.rss",
+                  "default_services": ["aws-cid"],
+              }
+          }
+
           def clean_html(html_content):
               class MyParser(HTMLParser):
                   def __init__(self):
@@ -136,81 +148,82 @@ Resources:
               return parser.text.strip() + '\n\n' + '\n'.join([f"[{index}]: {url}" for index, url in parser.ref.items()])
 
           def lambda_handler(event, context):
-              feed_url = os.environ['FEED_URL']
+              feeds_list = os.environ['FEEDS_LIST'].split(',')
               bucket_name = os.environ['BUCKET_NAME']
-              bucket_path = os.environ.get('BUCKET_PATH', '')
 
               try:
-                  with urllib.request.urlopen(feed_url, timeout=10) as response:  # nosec
-                      feed_data = response.read().decode('utf-8')
-
-                  malicious_strings = ['!ENTITY', ':include']
-                  for string in malicious_strings:
-                      if string in feed_data:
-                          return {
-                              'statusCode': 400,
-                              'body': f'Malicious content detected in the XML feed: {string}'
-                          }
-
-                  s3 = boto3.client('s3')
-                  root = ET.fromstring(feed_data)  # nosec
-
-                  date_grouped_records = {}
-
-                  for item in root.findall('.//item'):
-                      try:
-                          link = item.find('link').text
-                          title = item.find('title').text
-                          description = item.find('description').text or ''
-                          pubDate = item.find('pubDate').text
-                          category = item.find('category').text or ''
+                  for entry in feeds_list:
+                      feed_url = FEEDS_MAP[entry]['feed_url']
+                      bucket_path = FEEDS_MAP[entry]['path']
+                      with urllib.request.urlopen(feed_url, timeout=10) as response:  # nosec
+                          feed_data = response.read().decode('utf-8')
+
+                      malicious_strings = ['!ENTITY', ':include']
+                      for string in malicious_strings:
+                          if string in feed_data:
+                              return {
+                                  'statusCode': 400,
+                                  'body': f'Malicious content detected in the XML feed: {string}'
+                              }
 
-                          # Parsing and formatting pubDate to ISO 8601 format
-                          pubDate_datetime = parse(pubDate)
-                          formatted_date = pubDate_datetime.strftime('%Y-%m-%dT%H:%M:%SZ')
+                      s3 = boto3.client('s3')
+                      root = ET.fromstring(feed_data)  # nosec
 
-                          year, month, day = formatted_date[:10].split('-')
-                          date_key = f"{year}-{month}-{day}"
-                          description_cleaned = clean_html(description)
+                      date_grouped_records = {}
 
-                          categories = category.split(',')
-                          services = []
-                          category_values = []
-
-                          for cat in categories:
-                              if cat.startswith('general:products/'):
-                                  services.append(cat.replace('general:products/', ''))
-                              elif cat.startswith('marketing:marchitecture/'):
-                                  category_values.append(cat.replace('marketing:marchitecture/', ''))
-
-                          for service in services:
-                              for category_value in category_values:
-                                  json_record = {
-                                      'link': link,
-                                      'title': title,
-                                      'description': description_cleaned,
-                                      'date': formatted_date,
-                                      'service': service,
-                                      'category': category_value
-                                  }
-                                  if date_key not in date_grouped_records:
-                                      date_grouped_records[date_key] = []
-                                  date_grouped_records[date_key].append(json_record)
-                      
-                      except Exception as e:
-                          print(f"Error processing item: {ET.tostring(item, encoding='unicode')}. Exception: {str(e)}")
+                      for item in root.findall('.//item'):
+                          try:
+                              link = item.find('link').text
+                              title = item.find('title').text
+                              description = item.find('description').text or ''
+                              pubDate = item.find('pubDate').text
+                              category = item.find('category').text or ''
+                              # Parsing and formatting pubDate to ISO 8601 format
+                              pubDate_datetime = parse(pubDate)
+                              formatted_date = pubDate_datetime.strftime('%Y-%m-%dT%H:%M:%SZ')
 
-                  for date_key, records in date_grouped_records.items():
-                      year, month, day = date_key.split('-')
-                      json_lines = '\n'.join(json.dumps(record) for record in records)
-                      s3_key = f'{bucket_path}/year={year}/month={month}/day={day}/whats_new.jsonl'
-                      s3.put_object(Body=json_lines, Bucket=bucket_name, Key=s3_key)
+                              year, month, day = formatted_date[:10].split('-')
+                              date_key = f"{year}-{month}-{day}"
+                              description_cleaned = clean_html(description)
+
+                              categories = category.split(',')
+                              services = FEEDS_MAP[entry].get('default_services', [])
+                              category_values = []
+
+                              for cat in categories:
+                                  if cat.startswith('general:products/'):
+                                      services.append(cat.replace('general:products/', ''))
+                                  elif cat.startswith('marketing:marchitecture/'):
+                                      category_values.append(cat.replace('marketing:marchitecture/', ''))
+                                  else:
+                                      category_values = categories
+                              for service in services:
+                                  for category_value in category_values:
+                                      json_record = {
+                                          'link': link,
+                                          'title': title,
+                                          'description': description_cleaned,
+                                          'date': formatted_date,
+                                          'service': service,
+                                          'category': category_value
+                                      }
+                                      if date_key not in date_grouped_records:
+                                          date_grouped_records[date_key] = []
+                                      date_grouped_records[date_key].append(json_record)
+                          except Exception as e:
+                              print(f"Error processing item: {ET.tostring(item, encoding='unicode')}. Exception: {str(e)}")
+
+                      for date_key, records in date_grouped_records.items():
+                          year, month, day = date_key.split('-')
+                          json_lines = '\n'.join(json.dumps(record) for record in records)
+                          s3_key = f'{bucket_path}/year={year}/month={month}/day={day}/whats_new.jsonl'
+                          s3.put_object(Body=json_lines, Bucket=bucket_name, Key=s3_key)
 
                   return {
                       'statusCode': 200,
                       'body': f'Feed downloaded and grouped by date then uploaded to S3 bucket {bucket_name}'
                   }
-
+                      
               except urllib.error.URLError as e:
                   return {
                       'statusCode': 500,
@@ -231,15 +244,15 @@ Resources:
                       'statusCode': 500,
                       'body': f'Error processing feed: {str(e)}'
                   }
+
       Handler: 'index.lambda_handler'
       MemorySize: 256
       Timeout: 60
       Role: !GetAtt LambdaRole.Arn
       Environment:
         Variables:
           BUCKET_NAME: !Ref DestinationBucket
-          BUCKET_PATH: "aws-feeds/aws-feeds-whats-new"
-          FEED_URL: "https://aws.amazon.com/about-aws/whats-new/recent/feed/"
+          FEEDS_LIST: "aws,aws-cid"
     Metadata:
       cfn_nag:
         rules_to_suppress:
@@ -702,6 +715,7 @@ Resources:
       Targets:
         S3Targets:
           - Path: !Sub "s3://${DestinationBucket}/aws-feeds/aws-feeds-whats-new/"
+          - Path: !Sub "s3://${DestinationBucket}/aws-feeds/aws-cid-feeds-whats-new/"
       Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"}}}"
 
   CrawlerBlogPost:
 
@@ -320,6 +320,7 @@ Resources:
             DeployRegion: !Ref AWS::Region
             Account: !Ref AWS::AccountId
             Prefix: !Ref ResourcePrefix
+            Bucket: !Ref DestinationBucket
       'RefreshSchedule${AwsObject}':
         Type: AWS::Scheduler::Schedule
         Properties:
@@ -338,4 +339,4 @@ Resources:
     Type: Custom::LambdaAnalyticsExecutor
     Properties:
       ServiceToken: !Ref LambdaAnalyticsARN
-      Name: !Ref CFDataName
+      Name: !Ref CFDataName
@@ -290,6 +290,7 @@ Resources:
         DeployRegion: !Ref AWS::Region
         Account: !Ref AWS::AccountId
         Prefix: !Ref ResourcePrefix
+        Bucket: !Ref DestinationBucket
 
   ModuleRefreshSchedule:
     Type: 'AWS::Scheduler::Schedule'
 
@@ -329,6 +329,11 @@ Resources:
                     Status: Enabled
                     NoncurrentVersionExpiration:
                       NoncurrentDays: 1
+                  - Id: DeleteIncompleteMultipartUploadsAndExpiredDeleteMarkers
+                    Status: Enabled
+                    AbortIncompleteMultipartUpload:
+                      DaysAfterInitiation: 7
+                    ExpiredObjectDeleteMarker: true
       Tags: # Hacky way to manage dependencies
         - Key: IgnoreMeIamOnlyWorkaround
           Value: !GetAtt StackSetExecutionRole.Arn
@@ -496,6 +501,7 @@ Resources:
         DeployRegion: !Ref AWS::Region
         Account: !Ref AWS::AccountId
         Prefix: !Ref ResourcePrefix
+        Bucket: !Ref DestinationBucket
 
   ModuleRefreshSchedule:
     Type: 'AWS::Scheduler::Schedule'
 
@@ -425,6 +425,7 @@ Resources:
         DeployRegion: !Ref AWS::Region
         Account: !Ref AWS::AccountId
         Prefix: !Ref ResourcePrefix
+        Bucket: !Ref DestinationBucket
 
   ModuleRefreshSchedule:
     Type: "AWS::Scheduler::Schedule"