Skip to content

Commit 432de10

Browse files
authored
Fix lambda data (#167)
* fix lambda inventory as Environment breaks crawler * rename lambda to lambda_functions * rename lambda to lambda_functions * lint and refactor
1 parent beb717f commit 432de10

File tree

5 files changed

+90
-98
lines changed

5 files changed

+90
-98
lines changed

data-collection/deploy/module-budgets.yaml

Lines changed: 49 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -98,39 +98,41 @@ Resources:
9898
Properties:
9999
FunctionName: !Sub '${ResourcePrefix}${CFDataName}-Lambda'
100100
Description: !Sub "Lambda function to retrieve ${CFDataName}"
101-
Runtime: python3.10
101+
Runtime: python3.12
102102
Architectures: [x86_64]
103103
Code:
104104
ZipFile: |
105-
#Author Stephanie Gooch 2021
106-
#Mohideen - Added Budgets tag collection module
105+
#Authors:
106+
# Stephanie Gooch - initial version
107+
# Mohideen - Added Budgets tag collection module
107108
import os
108109
import json
109110
import logging
110111
import datetime
111112
from json import JSONEncoder
112113
import sys
114+
115+
# update boto3 for list_tags_for_resource api
113116
from pip._internal import main
114117
main(['install', '-I', '-q', 'boto3', '--target', '/tmp/', '--no-cache-dir', '--disable-pip-version-check'])
115118
sys.path.insert(0,'/tmp/')
116119
117-
import boto3
120+
import boto3 #pylint: disable=C0413
118121
119122
BUCKET = os.environ["BUCKET_NAME"]
120123
PREFIX = os.environ["PREFIX"]
121-
ROLE_NAME = os.environ['ROLENAME']
124+
ROLE_NAME = os.environ['ROLE_NAME']
122125
TMP_FILE = "/tmp/data.json"
123-
REGIONS = ["us-east-1"]
124126
125127
logger = logging.getLogger(__name__)
126128
logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO))
127129
128-
# subclass JSONEncoder
129130
class DateTimeEncoder(JSONEncoder):
130-
# Override the default method
131-
def default(self, obj):
132-
if isinstance(obj, (datetime.date, datetime.datetime)):
133-
return obj.isoformat()
131+
"""encoder for json with time object"""
132+
def default(self, o):
133+
if isinstance(o, (datetime.date, datetime.datetime)):
134+
return o.isoformat()
135+
return None
134136
135137
def assume_role(account_id, service, region):
136138
cred = boto3.client('sts', region_name=region).assume_role(
@@ -144,63 +146,54 @@ Resources:
144146
aws_session_token=cred['SessionToken']
145147
)
146148
147-
def lambda_handler(event, context):
149+
def lambda_handler(event, context): #pylint: disable=W0613
148150
logger.info(f"Event data {json.dumps(event)}")
149151
if 'account' not in event:
150152
raise ValueError(
151153
"Please do not trigger this Lambda manually."
152154
"Find the corresponding state machine in Step Functions and Trigger from there."
153155
)
154156
collection_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
155-
try:
156-
account = json.loads(event["account"])
157-
account_id = account["account_id"]
158-
account_name = account["account_name"]
159-
payer_id = account["payer_id"]
160-
logger.info(f"Collecting data for account: {account_id}")
161-
budgets_client = assume_role(account_id, "budgets", REGIONS[0])
162-
paginator = budgets_client.get_paginator("describe_budgets") #Paginator for a large list of accounts
163-
response_iterator = paginator.paginate(AccountId=account_id)
164-
count = 0
165-
with open(TMP_FILE, "w") as f:
166-
for budgets in response_iterator:
167-
if not 'Budgets' in budgets: continue
168-
for budget in budgets['Budgets']:
169-
count += 1
170-
budget['collection_time'] = collection_time
171-
logger.debug(budget)
172-
# Fetch tags for the budget using List tag for resource API
173-
budget_name = budget['BudgetName']
174-
resource_arn = f"arn:aws:budgets::{account_id}:budget/{budget_name}"
175-
budget_tag = budgets_client.list_tags_for_resource(
176-
ResourceARN=f"{resource_arn}"
177-
)
178-
if budget_tag['ResourceTags'] is not None:
179-
budget.update({'Account_ID': account_id, 'Account_Name': account_name, 'Tags': budget_tag['ResourceTags']})
180-
else:
181-
budget.update({'Account_ID': account_id, 'Account_Name': account_name})
182-
# Fetch CostFilters if available
183-
if 'CostFilters' not in budget or len(budget['CostFilters']) == 0 or 'PlannedBudgetLimits' not in budget:
184-
budget.update({'CostFilters': {'Filter': ['None']}})
185-
dataJSONData = json.dumps(budget, cls=DateTimeEncoder)
186-
f.write(dataJSONData)
187-
f.write("\n")
188-
logger.info(f"Budgets collected: {count}")
189-
s3_upload(account_id, payer_id)
190-
except Exception as e:
191-
logger.warning(f"Error: {type(e)} {e}")
157+
aws_partition = boto3.session.Session().get_partition_for_region(boto3.session.Session().region_name)
158+
account = json.loads(event["account"])
159+
account_id = account["account_id"]
160+
account_name = account["account_name"]
161+
payer_id = account["payer_id"]
162+
163+
logger.info(f"Collecting data for account: {account_id}")
164+
budgets_client = assume_role(account_id, "budgets", "us-east-1") # must be us-east-1
165+
count = 0
166+
with open(TMP_FILE, "w", encoding='utf-8') as f:
167+
for budget in budgets_client.get_paginator("describe_budgets").paginate(AccountId=account_id).search('Budgets'):
168+
budget['collection_time'] = collection_time
169+
170+
# Fetch tags for the budget using List tag for resource API
171+
budget_name = budget['BudgetName']
172+
budget_tags = budgets_client.list_tags_for_resource(ResourceARN=f"arn:{aws_partition}:budgets::{account_id}:budget/{budget_name}")
173+
budget.update({
174+
'Account_ID': account_id,
175+
'Account_Name': account_name,
176+
'Tags': budget_tags.get('ResourceTags') or []
177+
})
178+
179+
# Fetch CostFilters if available
180+
if 'CostFilters' not in budget or len(budget['CostFilters']) == 0 or 'PlannedBudgetLimits' not in budget:
181+
budget.update({'CostFilters': {'Filter': ['None']}})
182+
183+
f.write(json.dumps(budget, cls=DateTimeEncoder) + "\n")
184+
count += 1
185+
logger.info(f"Budgets collected: {count}")
186+
s3_upload(account_id, payer_id)
187+
192188
193189
def s3_upload(account_id, payer_id):
194190
if os.path.getsize(TMP_FILE) == 0:
195191
logger.info(f"No data in file for {PREFIX}")
196192
return
197193
key = datetime.datetime.now().strftime(f"{PREFIX}/{PREFIX}-data/payer_id={payer_id}/year=%Y/month=%m/budgets-{account_id}.json")
198-
try:
199-
res = boto3.client('s3').upload_file(TMP_FILE, BUCKET, key)
200-
logger.info(f'res={res}')
201-
logger.info(f"Budget data for {account_id} stored at s3://{BUCKET}/{key}")
202-
except Exception as exc:
203-
logger.warning(exc)
194+
boto3.client('s3').upload_file(TMP_FILE, BUCKET, key)
195+
logger.info(f"Budget data for {account_id} stored at s3://{BUCKET}/{key}")
196+
204197
Handler: 'index.lambda_handler'
205198
MemorySize: 2688
206199
Timeout: 300
@@ -209,7 +202,7 @@ Resources:
209202
Variables:
210203
BUCKET_NAME: !Ref DestinationBucket
211204
PREFIX: !Ref CFDataName
212-
ROLENAME: !Ref MultiAccountRoleName
205+
ROLE_NAME: !Ref MultiAccountRoleName
213206

214207
Metadata:
215208
cfn_nag:

data-collection/deploy/module-inventory.yaml

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ Parameters:
5656
Description: ARN of a Lambda for Managing GlueTable
5757
AwsObjects:
5858
Type: CommaDelimitedList
59-
Default: OpensearchDomains, ElasticacheClusters, RdsDbInstances, EBS, AMI, Snapshot, Ec2Instances, VpcInstances, RdsDbSnapshots, EKSClusters, AWSLambda
59+
Default: OpensearchDomains, ElasticacheClusters, RdsDbInstances, EBS, AMI, Snapshot, Ec2Instances, VpcInstances, RdsDbSnapshots, EKSClusters, LambdaFunctions
6060
Description: Services for pulling price data
6161

6262
Mappings:
@@ -798,10 +798,10 @@ Mappings:
798798
paths: Arn,Name,CreatedAt,Version,accountid,collection_date,region
799799
SerializationLibrary: org.openx.data.jsonserde.JsonSerDe
800800
TableType: EXTERNAL_TABLE
801-
AWSLambda:
802-
path: lambda
801+
LambdaFunctions:
802+
path: lambda-functions
803803
table:
804-
- Name: inventory_lambda_data
804+
- Name: inventory_lambda_functions_data
805805
Parameters: { "classification" : "json", "compressionType": "none" }
806806
PartitionKeys:
807807
- Name: payer_id
@@ -863,7 +863,7 @@ Mappings:
863863
- Name: vpcconfig
864864
Type: struct<subnetids:array<string>,securitygroupids:array<string>,vpcid:string,ipv6allowedfordualstack:boolean>
865865
InputFormat: org.apache.hadoop.mapred.TextInputFormat
866-
Location: !Sub s3://${DestinationBucket}/inventory/inventory-lambda-data/
866+
Location: !Sub s3://${DestinationBucket}/inventory/inventory-lambda-functions-data/
867867
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
868868
SerdeInfo:
869869
Parameters:
@@ -970,9 +970,7 @@ Resources:
970970
session = assume_session(account_id, region)
971971
client = session.client(service, region_name=region)
972972
try:
973-
paginator = client.get_paginator(function_name)
974-
for obj in paginator.paginate(**(params or {})).search(obj_name):
975-
yield obj
973+
yield from client.get_paginator(function_name).paginate(**(params or {})).search(obj_name)
976974
except Exception as exc: #pylint: disable=broad-exception-caught
977975
logger.info(f'Error in scan {function_name}/{account_id}: {exc}')
978976
@@ -996,32 +994,32 @@ Resources:
996994
logger.info(f'scan {service}/{account_id}/{region}: {exc}')
997995
998996
def eks_clusters_scan(account_id, region):
999-
"""special function to scan EKS clusters"""
1000-
service = "eks"
1001-
session = assume_session(account_id, region)
1002-
client = session.client(service, region_name=region)
1003-
try:
1004-
for cluster_name in (
1005-
client.get_paginator("list_clusters")
1006-
.paginate(
1007-
PaginationConfig={
1008-
"PageSize": 100,
1009-
}
1010-
)
1011-
.search("clusters")
1012-
):
1013-
cluster = client.describe_cluster(name=cluster_name)
1014-
yield {
1015-
"Arn": cluster["cluster"]["arn"],
1016-
"Name": cluster["cluster"]["name"],
1017-
"CreatedAt": datetime.strftime(
1018-
cluster["cluster"]["createdAt"].astimezone(tz=timezone.utc), "%Y-%m-%dT%H:%M:%SZ"
1019-
),
1020-
"Version": cluster["cluster"]["version"],
1021-
}
1022-
except Exception as exc:
1023-
logger.error(f"Cannot get info from {account_id}/{region}: {type(exc)}-{exc}")
1024-
return []
997+
"""special function to scan EKS clusters"""
998+
service = "eks"
999+
session = assume_session(account_id, region)
1000+
client = session.client(service, region_name=region)
1001+
try:
1002+
for cluster_name in (
1003+
client.get_paginator("list_clusters")
1004+
.paginate(
1005+
PaginationConfig={
1006+
"PageSize": 100,
1007+
}
1008+
)
1009+
.search("clusters")
1010+
):
1011+
cluster = client.describe_cluster(name=cluster_name)
1012+
yield {
1013+
"Arn": cluster["cluster"]["arn"],
1014+
"Name": cluster["cluster"]["name"],
1015+
"CreatedAt": datetime.strftime(
1016+
cluster["cluster"]["createdAt"].astimezone(tz=timezone.utc), "%Y-%m-%dT%H:%M:%SZ"
1017+
),
1018+
"Version": cluster["cluster"]["version"],
1019+
}
1020+
except Exception as exc: #pylint: disable=W0718
1021+
logger.error(f"Cannot get info from {account_id}/{region}: {type(exc)}-{exc}")
1022+
return []
10251023
10261024
def lambda_handler(event, context): #pylint: disable=unused-argument
10271025
""" this lambda collects ami, snapshots and volumes from linked accounts
@@ -1085,7 +1083,7 @@ Resources:
10851083
service='ec2',
10861084
function_name='describe_vpcs'
10871085
),
1088-
'lambda' : partial(
1086+
'lambda-functions' : partial(
10891087
paginated_scan,
10901088
service='lambda',
10911089
function_name='list_functions',
@@ -1114,7 +1112,7 @@ Resources:
11141112
obj[f"tag_{tag['Key']}"] = tag["Value"]
11151113
obj['collection_date'] = collection_date
11161114
obj['region'] = region
1117-
if 'Environment' in obj and name == 'lambda':
1115+
if 'Environment' in obj and name == 'lambda_functions':
11181116
obj['Environment'] = to_json(obj['Environment']) # this property breaks crawler as it has a different key structure
11191117
file_.write(to_json(obj) + "\n")
11201118
logger.info(f"Collected {counter} total {name} instances")

data-collection/test/run-test-from-scratch.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# vars
55
account_id=$(aws sts get-caller-identity --query "Account" --output text )
66
bucket=cid-$account_id-test
7+
export bucket
78

89
# upload files
910
./data-collection/utils/upload.sh "$bucket"

data-collection/test/test_from_scratch.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,13 @@ def test_inventory_vpc_data(athena):
8282
data = athena_query(athena=athena, sql_query='SELECT * FROM "optimization_data"."inventory_vpc_data" LIMIT 10;')
8383
assert len(data) > 0, 'inventory_vpc_data is empty'
8484

85-
def test_inventory_rds_snaphot_data(athena):
85+
def test_inventory_rds_snapshot_data(athena):
8686
data = athena_query(athena=athena, sql_query='SELECT * FROM "optimization_data"."inventory_rds_db_snapshots_data" LIMIT 10;')
8787
assert len(data) > 0, 'inventory_rds_db_snapshots_data is empty'
8888

89-
def test_inventory_lambda_data(athena):
90-
data = athena_query(athena=athena, sql_query='SELECT * FROM "optimization_data"."inventory_lambda_data" LIMIT 10;')
91-
assert len(data) > 0, 'inventory_lambda_data is empty'
89+
def test_inventory_lambda_functions_data(athena):
90+
data = athena_query(athena=athena, sql_query='SELECT * FROM "optimization_data"."inventory_lambda_functions_data" LIMIT 10;')
91+
assert len(data) > 0, 'inventory_lambda_functions_data is empty'
9292

9393
def test_rds_usage_data(athena):
9494
data = athena_query(athena=athena, sql_query='SELECT * FROM "optimization_data"."rds_usage_data" LIMIT 10;')
@@ -144,7 +144,7 @@ def test_pricing_rds_data(athena):
144144

145145
def test_pricing_lambda_data(athena):
146146
data = athena_query(athena=athena, sql_query='SELECT * FROM "optimization_data"."pricing_lambda_data" LIMIT 10;')
147-
assert len(data) > 0, 'pricing_awslambda_data is empty'
147+
assert len(data) > 0, 'pricing_lambda_data is empty'
148148

149149
def test_pricing_regionnames_data(athena):
150150
data = athena_query(athena=athena, sql_query='SELECT * FROM "optimization_data"."pricing_regionnames_data" LIMIT 10;')

data-collection/test/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def trigger_update(account_id):
346346
f'arn:aws:states:{region}:{account_id}:stateMachine:{PREFIX}inventory-Ec2Instances-StateMachine',
347347
f'arn:aws:states:{region}:{account_id}:stateMachine:{PREFIX}inventory-VpcInstances-StateMachine',
348348
f'arn:aws:states:{region}:{account_id}:stateMachine:{PREFIX}inventory-RdsDbSnapshots-StateMachine',
349-
f'arn:aws:states:{region}:{account_id}:stateMachine:{PREFIX}inventory-AWSLambda-StateMachine',
349+
f'arn:aws:states:{region}:{account_id}:stateMachine:{PREFIX}inventory-LambdaFunctions-StateMachine',
350350
f'arn:aws:states:{region}:{account_id}:stateMachine:{PREFIX}rds-usage-StateMachine',
351351
f'arn:aws:states:{region}:{account_id}:stateMachine:{PREFIX}transit-gateway-StateMachine',
352352
f'arn:aws:states:{region}:{account_id}:stateMachine:{PREFIX}trusted-advisor-StateMachine',

0 commit comments

Comments
 (0)