diff --git a/data-collection/deploy/deploy-data-collection.yaml b/data-collection/deploy/deploy-data-collection.yaml index 41f82939..73d7a37d 100644 --- a/data-collection/deploy/deploy-data-collection.yaml +++ b/data-collection/deploy/deploy-data-collection.yaml @@ -109,9 +109,10 @@ Mappings: us-west-1: {CodeBucket: aws-managed-cost-intelligence-dashboards-us-west-1 } us-west-2: {CodeBucket: aws-managed-cost-intelligence-dashboards-us-west-2 } StepFunctionCode: - main-v3: {TemplatePath: cfn/data-collection/source/step-functions/main-state-machine-v3.json} - crawler-v1: {TemplatePath: cfn/data-collection/source/step-functions/crawler-state-machine-v1.json} - standalone-v1: {TemplatePath: cfn/data-collection/source/step-functions/awsfeeds-state-machine-v1.json} + main-v3: {TemplatePath: cfn/data-collection/source/step-functions/main-state-machine-v3.json} + crawler-v1: {TemplatePath: cfn/data-collection/source/step-functions/crawler-state-machine-v1.json} + standalone-v1: {TemplatePath: cfn/data-collection/source/step-functions/awsfeeds-state-machine-v1.json} + generalDatasets-v1: {TemplatePath: cfn/data-collection/source/step-functions/general-datasets-state-machine-v1.json} Parameters: DestinationBucket: @@ -995,6 +996,7 @@ Resources: AccountCollectorLambdaARN: !Sub "${AccountCollector.Outputs.LambdaFunctionARN}" CodeBucket: !If [ ProdCFNTemplateUsed, !FindInMap [RegionMap, !Ref "AWS::Region", CodeBucket], !Ref CFNSourceBucket ] StepFunctionTemplate: !FindInMap [StepFunctionCode, main-v3, TemplatePath] + GeneralDatasetsStepFunctionTemplate: !FindInMap [StepFunctionCode, generalDatasets-v1, TemplatePath] StepFunctionExecutionRoleARN: !GetAtt StepFunctionExecutionRole.Arn LambdaManageGlueTableARN: !GetAtt LambdaManageGlueTable.Arn SchedulerExecutionRoleARN: !GetAtt SchedulerExecutionRole.Arn diff --git a/data-collection/deploy/deploy-in-linked-account.yaml b/data-collection/deploy/deploy-in-linked-account.yaml index 56f86021..4fd59b3b 100644 --- a/data-collection/deploy/deploy-in-linked-account.yaml +++ b/data-collection/deploy/deploy-in-linked-account.yaml @@ -212,6 +212,7 @@ Resources: - Effect: "Allow" Action: - "ec2:DescribeImages" + - "ec2:DescribeInstanceTypes" - "ec2:DescribeVolumes" - "ec2:DescribeSnapshots" - "ec2:DescribeRegions" diff --git a/data-collection/deploy/module-inventory.yaml b/data-collection/deploy/module-inventory.yaml index 2ba34409..c122ec8f 100644 --- a/data-collection/deploy/module-inventory.yaml +++ b/data-collection/deploy/module-inventory.yaml @@ -45,6 +45,9 @@ Parameters: StepFunctionTemplate: Type: String Description: S3 key to the JSON template for the StepFunction + GeneralDatasetsStepFunctionTemplate: + Type: String + Description: S3 key to the JSON template for the General Datasets StepFunction StepFunctionExecutionRoleARN: Type: String Description: Common role for Step Function execution @@ -58,6 +61,10 @@ Parameters: Type: CommaDelimitedList Default: OpensearchDomains, ElasticacheClusters, RdsDbInstances, EBS, AMI, Snapshot, Ec2Instances, VpcInstances, RdsDbSnapshots, EKSClusters, LambdaFunctions, RdsDbClusters Description: Services for pulling price data + AwsGeneralDatasets: + Type: CommaDelimitedList + Default: EbsOptimizedInstances + Description: General datasets used by inventory based dashboards or predefined queries for inventory optimisation Mappings: ServicesMap: @@ -1003,6 +1010,35 @@ Mappings: paths: functionname, functionarn, runtime, role, handler, codesize, dedscription, timeout, memorysize, lastmodified, codesha256, version, tracingconfig, revisionid, packagetype, architectures, ephemeralstorage, snapstart, loggingconfig, accountid, collection_date, region, layers, vpcconfig SerializationLibrary: org.openx.data.jsonserde.JsonSerDe TableType: EXTERNAL_TABLE + + GeneralDatasetsMap: + EbsOptimizedInstances: + path: ebs-optimized-instances + table: + - Name: inventory_ebs_optimized_instances_data + Parameters: { "classification" : "json", "compressionType": "none" } + StorageDescriptor: + Columns: + - Name: instance_type + Type: string + - Name: max_bandwidth + Type: string + - Name: max_iops + Type: string + - Name: max_throughput + Type: string + - Name: region + Type: string + - Name: collection_date + Type: string + InputFormat: org.apache.hadoop.mapred.TextInputFormat + Location: !Sub s3://${DestinationBucket}/inventory/inventory-ebs-optimized-instances-data/ + OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + SerdeInfo: + Parameters: + paths: instanceType, maxBandwidth, maxIops, maxThroughput, region, collection_date + SerializationLibrary: org.openx.data.jsonserde.JsonSerDe + TableType: EXTERNAL_TABLE Resources: LambdaRole: @@ -1361,6 +1397,207 @@ Resources: Target: Arn: !GetAtt [!Sub 'StepFunction${AwsObject}', Arn] RoleArn: !Ref SchedulerExecutionRoleARN + + GeneralDatasetsLambdaFunction: + Type: AWS::Lambda::Function + Properties: + FunctionName: !Sub '${ResourcePrefix}${CFDataName}-GeneralDatasets-Lambda' + Description: !Sub "Lambda Function to retrieve ${CFDataName}" + Runtime: python3.12 + Architectures: [x86_64] + Code: + ZipFile: | + """ Retrieve general datasets and store info to s3 bucket + """ + import os + import json + import logging + from functools import lru_cache + from datetime import datetime, date + + import boto3 + from botocore.client import Config + + TMP_FILE = "/tmp/data.json" + PREFIX = os.environ['PREFIX'] + BUCKET = os.environ["BUCKET_NAME"] + ROLENAME = os.environ['ROLENAME'] + TRACKING_TAGS = os.environ.get("TRACKING_TAGS") + TAG_LIST = TRACKING_TAGS.split(",") if TRACKING_TAGS else [] + + logger = logging.getLogger(__name__) + logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO)) + + def to_json(obj): + """json helper for date time data""" + return json.dumps( + obj, + default=lambda x: + x.isoformat() if isinstance(x, (date, datetime)) else None + ) + @lru_cache(maxsize=1) + def assume_session(region): + """assume role in data collection account""" + sts_client = boto3.client('sts', region_name=region) + account_id = sts_client.get_caller_identity().get("Account") + partition = boto3.session.Session().get_partition_for_region(region_name=region) + credentials = sts_client.assume_role( + RoleArn=f"arn:{partition}:iam::{account_id}:role/{ROLENAME}" , + RoleSessionName="data_collection" + )['Credentials'] + return boto3.session.Session( + aws_access_key_id=credentials['AccessKeyId'], + aws_secret_access_key=credentials['SecretAccessKey'], + aws_session_token=credentials['SessionToken'] + ) + + def fetch_ebs_optimized_instances(): + session = assume_session("us-east-1") + ec2_client = session.client('ec2') + paginator = ec2_client.get_paginator('describe_instance_types') + operation_parameters = {'Filters': [{'Name': 'ebs-info.ebs-optimized-support', 'Values': ['default']}]} + instance_types = [] + for page in paginator.paginate(**operation_parameters): + for instance_type in page['InstanceTypes']: + instance_types.append({ + "instanceType": instance_type["InstanceType"], + "maxBandwidth": instance_type["EbsInfo"]["EbsOptimizedInfo"]["MaximumBandwidthInMbps"], + "maxIops": instance_type["EbsInfo"]["EbsOptimizedInfo"]["MaximumIops"], + "maxThroughput": instance_type["EbsInfo"]["EbsOptimizedInfo"]["MaximumThroughputInMBps"], + "region": "us-east-1" + }) + return instance_types + + def lambda_handler(event, context): #pylint: disable=unused-argument + """ this lambda collects ami, snapshots and volumes from linked accounts + and must be called from the corresponding Step Function to orchestrate + """ + logger.info(f"Event data: {event}") + if 'params' not in event : + raise ValueError( + "Please do not trigger this Lambda manually." + "Find the corresponding state machine in Step Functions and Trigger from there." + ) + params = [p for p in event.get('params', '').split() if p] + name = params[0] + + sub_modules = { + 'ebs-optimized-instances': fetch_ebs_optimized_instances + } + + func = sub_modules[name] + counter = 0 + logger.info(f"Collecting {name}") + collection_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + try: + with open(TMP_FILE, "w", encoding='utf-8') as file_: + for counter, obj in enumerate(func(), start=counter + 1): + if len(TAG_LIST) > 0 and "Tags" in obj: + logger.debug(f"Tags enabled and found tags {obj['Tags']}") + for tag in obj["Tags"]: + if tag["Key"] in TAG_LIST: + obj[f"tag_{tag['Key']}"] = tag["Value"] + obj['collection_date'] = collection_date + if 'Environment' in obj and name == 'lambda-functions': + obj['Environment'] = to_json(obj['Environment']) # this property breaks crawler as it has a different key structure + file_.write(to_json(obj) + "\n") + logger.info(f"Collected {counter} total {name} instances") + upload_to_s3(name) + except Exception as exc: #pylint: disable=broad-exception-caught + logger.info(f"{name}: {type(exc)} - {exc}" ) + + def upload_to_s3(name): + """upload""" + if os.path.getsize(TMP_FILE) == 0: + logger.info(f"No data in file for {name}") + return + key = f"{PREFIX}/{PREFIX}-{name}-data/data.json" + s3client = boto3.client("s3", config=Config(s3={"addressing_style": "path"})) + try: + s3client.upload_file(TMP_FILE, BUCKET, key) + logger.info(f"Data in s3 - {BUCKET}/{key}") + except Exception as exc: #pylint: disable=broad-exception-caught + logger.info(exc) + + Handler: 'index.lambda_handler' + MemorySize: 5376 + Timeout: 300 + Role: !GetAtt LambdaRole.Arn + Environment: + Variables: + BUCKET_NAME: !Ref DestinationBucket + PREFIX: !Ref CFDataName + ROLENAME: !Ref MultiAccountRoleName + + GeneralDatasetsLogGroup: + Type: AWS::Logs::LogGroup + Properties: + LogGroupName: !Sub "/aws/lambda/${GeneralDatasetsLambdaFunction}" + RetentionInDays: 60 + + 'Fn::ForEach::GeneralDatasets': + - AwsGeneralDataset + - !Ref AwsGeneralDatasets + - 'Crawler${AwsGeneralDataset}': + Type: AWS::Glue::Crawler + Properties: + Name: !Sub '${ResourcePrefix}${CFDataName}-GD-${AwsGeneralDataset}-Crawler' + Role: !Ref GlueRoleARN + DatabaseName: !Ref DatabaseName + Targets: + S3Targets: + - Path: + Fn::Sub: + - "s3://${DestinationBucket}/inventory/inventory-${path}-data/" + - path: !FindInMap [GeneralDatasetsMap, !Ref AwsGeneralDataset, path] + Configuration: | + { + "Version": 1.0, + "Grouping": { + "TableGroupingPolicy": "CombineCompatibleSchemas" + }, + "CrawlerOutput": { + "Tables": { + "TableThreshold": 1 + } + } + } + 'Table${AwsGeneralDataset}': + Type: Custom::ManageGlueTable + Properties: + ServiceToken: !Ref LambdaManageGlueTableARN + TableInput: !Select [0, !FindInMap [GeneralDatasetsMap, !Ref AwsGeneralDataset, table]] + + 'StepFunction${AwsGeneralDataset}': + Type: AWS::StepFunctions::StateMachine + Properties: + StateMachineName: !Sub '${ResourcePrefix}${CFDataName}-GD-${AwsGeneralDataset}-StateMachine' + StateMachineType: STANDARD + RoleArn: !Ref StepFunctionExecutionRoleARN + DefinitionS3Location: + Bucket: !Ref CodeBucket + Key: !Ref GeneralDatasetsStepFunctionTemplate + DefinitionSubstitutions: + ModuleLambdaARN: !GetAtt GeneralDatasetsLambdaFunction.Arn + Crawlers: !Sub '["${ResourcePrefix}${CFDataName}-GD-${AwsGeneralDataset}-Crawler"]' + Params: !FindInMap [GeneralDatasetsMap, !Ref AwsGeneralDataset, path] + Module: !Ref CFDataName + DeployRegion: !Ref AWS::Region + Account: !Ref AWS::AccountId + Prefix: !Ref ResourcePrefix + 'RefreshSchedule${AwsGeneralDataset}': + Type: AWS::Scheduler::Schedule + Properties: + Description: !Sub 'Scheduler for the ODC ${CFDataName} ${AwsGeneralDataset} module' + Name: !Sub '${ResourcePrefix}${CFDataName}-GD-${AwsGeneralDataset}-RefreshSchedule' + ScheduleExpression: !Ref Schedule + State: ENABLED + FlexibleTimeWindow: + MaximumWindowInMinutes: 30 + Mode: 'FLEXIBLE' + Target: + Arn: !GetAtt [!Sub 'StepFunction${AwsGeneralDataset}', Arn] + RoleArn: !Ref SchedulerExecutionRoleARN AnalyticsExecutor: Type: Custom::LambdaAnalyticsExecutor @@ -1779,3 +2016,54 @@ Resources: Name: backward_compat_pricing_region_names QueryString: !Sub | CREATE OR REPLACE VIEW pricing_region_names AS SELECT * FROM ${DatabaseName}.pricing_regionnames_data + + AthenaEC2OverProvisionedIOPS: + Type: AWS::Athena::NamedQuery + Properties: + Database: !Ref DatabaseName + Description: Identifies EBS volumes attached to EC2 instances that have P-IOPS higher than the instance's maximum + Name: inventory_ec2_overprovisioned_iops + QueryString: !Sub | + WITH instances_and_volumes AS ( + SELECT ebs.accountid, + ebs.volumeid, + ebs.attachments[1].instanceid AS instanceid, + instances.instancetype as instancetype, + ebs.volumetype, + ebs.iops as provisionediops, + ebs.throughput as provisionedthroughput + FROM ${DatabaseName}.inventory_ebs_data as ebs + left outer join ${DatabaseName}.inventory_ec2_instances_data as instances on attachments[1].instanceid = instances.instanceid + WHERE cardinality(ebs.attachments) = 1 + ) + SELECT accountid, + volumeid, + instanceid, + instances_and_volumes.instancetype as instancetype, + volumetype, + provisionediops, + maxiops, + provisionedthroughput, + maxthroughput + FROM instances_and_volumes + LEFT OUTER JOIN ${DatabaseName}.inventory_ebs_optimized_instances_data ON instances_and_volumes.instancetype = ${DatabaseName}.inventory_ebs_optimized_instances_data.instancetype + WHERE provisionediops > maxiops + + AthenaRDSOverProvisionedIOPS: + Type: AWS::Athena::NamedQuery + Properties: + Database: !Ref DatabaseName + Description: Identifies EBS volumes attached to RDS DB instances that have P-IOPS higher than the instance's maximum + Name: inventory_rds_overprovisioned_iops + QueryString: !Sub | + SELECT rds.accountid, + rds.dbinstanceidentifier, + rds.dbinstanceclass, + rds.storagetype, + rds.storagethroughput AS provisioned_storagethroughput, + rds.iops as provisioned_iops, + ebs.maxiops + FROM ${DatabaseName}.inventory_rds_db_instances_data AS rds + LEFT OUTER JOIN ${DatabaseName}.inventory_ebs_optimized_instances_data AS ebs ON rds.dbinstanceclass = CONCAT('db.', ebs.instancetype) + WHERE storagetype IN ('gp3', 'io1', 'io2') + AND rds.iops > ebs.maxiops \ No newline at end of file diff --git a/data-collection/deploy/source/step-functions/general-datasets-state-machine-v1.json b/data-collection/deploy/source/step-functions/general-datasets-state-machine-v1.json new file mode 100644 index 00000000..065a469e --- /dev/null +++ b/data-collection/deploy/source/step-functions/general-datasets-state-machine-v1.json @@ -0,0 +1,43 @@ +{ + "Comment": "Orchestrate the collection of ${Module} data", + "StartAt": "InvokeModuleLambda", + "States": { + "InvokeModuleLambda": { + "Type": "Task", + "Resource": "arn:aws:states:${DeployRegion}:${Account}:lambda:invoke", + "OutputPath": "$.Payload", + "Parameters": { + "Payload": { + "params": "${Params}" + }, + "FunctionName": "${ModuleLambdaARN}" + }, + "Retry": [ + { + "ErrorEquals": [ + "Lambda.ServiceException", + "Lambda.AWSLambdaException", + "Lambda.SdkClientException", + "Lambda.TooManyRequestsException" + ], + "IntervalSeconds": 2, + "MaxAttempts": 6, + "BackoffRate": 2 + } + ], + "Next": "CrawlerStepFunctionStartExecution" + }, + "CrawlerStepFunctionStartExecution": { + "Type": "Task", + "Resource": "arn:aws:states:::states:startExecution.sync:2", + "Parameters": { + "StateMachineArn": "arn:aws:states:${DeployRegion}:${Account}:stateMachine:${Prefix}CrawlerExecution-StateMachine", + "Input": { + "crawlers": ${Crawlers} + } + }, + "End": true + } + }, + "TimeoutSeconds": 10800 + }