Skip to content

Commit 7b0622c

Browse files
committed
merge main
2 parents 7e52dca + 5651e4e commit 7b0622c

13 files changed

+326
-37
lines changed

data-collection/deploy/deploy-data-collection.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
AWSTemplateFormatVersion: '2010-09-09'
2-
Description: CID Data Collection Stack v3.6.1
2+
Description: CID Data Collection Stack v3.7.0
33
Metadata:
44
AWS::CloudFormation::Interface:
55
ParameterGroups:

data-collection/deploy/deploy-data-read-permissions.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
AWSTemplateFormatVersion: "2010-09-09"
2-
Description: CID Data Collection - All-in-One for Management Account v3.6.1
2+
Description: CID Data Collection - All-in-One for Management Account v3.7.0
33
Metadata:
44
AWS::CloudFormation::Interface:
55
ParameterGroups:
@@ -214,7 +214,7 @@ Resources:
214214
DataCollectorOrgAccountModulesReadStackSet:
215215
Type: AWS::CloudFormation::StackSet
216216
Properties:
217-
Description: "StackSet in charge of deploying read roles across organization accounts v3.6.1"
217+
Description: "StackSet in charge of deploying read roles across organization accounts v3.7.0"
218218
PermissionModel: SERVICE_MANAGED
219219
AutoDeployment:
220220
Enabled: true

data-collection/deploy/deploy-in-linked-account.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
AWSTemplateFormatVersion: '2010-09-09'
2-
Description: CID Data Collection - Role for Linked Account v3.6.1
2+
Description: CID Data Collection - Role for Linked Account v3.7.0
33
Metadata:
44
AWS::CloudFormation::Interface:
55
ParameterGroups:
@@ -241,6 +241,9 @@ Resources:
241241
- "eks:ListNodegroups"
242242
- "eks:DescribeNodegroup"
243243
- "lambda:ListFunctions"
244+
- "workspaces:DescribeWorkspaces"
245+
- "workspaces:DescribeWorkspaceDirectories"
246+
- "workspaces:DescribeWorkspacesConnectionStatus"
244247
Resource: "*" ## Policy is used for scanning of a wide range of resources
245248
Roles:
246249
- Ref: LambdaRole

data-collection/deploy/deploy-in-management-account.yaml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
AWSTemplateFormatVersion: '2010-09-09'
2-
Description: CID Data Collection - Role for Management Account v3.6.1
2+
Description: CID Data Collection - Role for Management Account v3.7.0
33
Metadata:
44
AWS::CloudFormation::Interface:
55
ParameterGroups:
@@ -313,11 +313,6 @@ Resources:
313313
- "rds:DescribeDBInstances"
314314
- "rds:DescribeDBClusters"
315315
Resource: "*"
316-
- Effect: "Allow"
317-
Action:
318-
- "compute-optimizer:GetIdleRecommendations"
319-
- "compute-optimizer:ExportIdleRecommendations"
320-
Resource: "*"
321316
Roles:
322317
- Ref: LambdaRole
323318
Metadata:

data-collection/deploy/module-compute-optimizer.yaml

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ Resources:
142142
- iam:GetRolePolicy
143143
- iam:PutRolePolicy
144144
- iam:TagRole
145+
- iam:UntagRole
145146
Resource:
146147
- !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/${ResourcePrefix}Compute-Optimizer-Replication-*'
147148
- !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/StackSet-${ResourcePrefix}ComputeOptimizer*' # For Compatibility with older versions: Shorter version of StackSetName
@@ -278,17 +279,17 @@ Resources:
278279
- s3:GetObjectVersionTagging
279280
Effect: 'Allow'
280281
Resource: !Sub arn:${AWS::Partition}:s3:::${DestinationBucket}/*
281-
- !If
282-
- NeedDataBucketsKms
283-
- PolicyName: "KMS"
284-
PolicyDocument:
285-
Version: "2012-10-17"
286-
Statement:
287-
- Effect: "Allow"
288-
Action:
289-
- "kms:GenerateDataKey"
290-
Resource: !Split [ ',', !Ref DataBucketsKmsKeysArns ]
291-
- !Ref AWS::NoValue
282+
- !If
283+
- NeedDataBucketsKms
284+
- PolicyName: "KMS"
285+
PolicyDocument:
286+
Version: "2012-10-17"
287+
Statement:
288+
- Effect: "Allow"
289+
Action:
290+
- "kms:GenerateDataKey"
291+
Resource: !Split [ ',', !Ref DataBucketsKmsKeysArns ]
292+
- !Ref AWS::NoValue
292293
S3Bucket:
293294
Type: AWS::S3::Bucket
294295
DeletionPolicy: Delete
@@ -329,7 +330,6 @@ Resources:
329330
Status: Enabled
330331
NoncurrentVersionExpiration:
331332
NoncurrentDays: 1
332-
NewerNoncurrentVersions: 1
333333
Tags: # Hacky way to manage dependencies
334334
- Key: IgnoreMeIamOnlyWorkaround
335335
Value: !GetAtt StackSetExecutionRole.Arn
@@ -385,7 +385,6 @@ Resources:
385385
import logging
386386
from datetime import date
387387
from functools import partial
388-
import boto3
389388
390389
BUCKET_PREFIX = os.environ["BUCKET_PREFIX"]
391390
INCLUDE_MEMBER_ACCOUNTS = os.environ.get("INCLUDE_MEMBER_ACCOUNTS", 'yes').lower() == 'yes'
@@ -395,6 +394,12 @@ Resources:
395394
396395
logger = logging.getLogger(__name__)
397396
logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO))
397+
#ensure we get latest boto3
398+
from pip._internal.cli.main import main, sys
399+
logging.getLogger('pip').setLevel(logging.ERROR) # Silence pip's logger
400+
main(['install', '-I', 'boto3', '--target', '/tmp/', '--no-cache-dir', '--disable-pip-version-check'])
401+
sys.path.insert(0,'/tmp/')
402+
import boto3 #pylint: disable=wrong-import-position
398403
399404
def lambda_handler(event, context): #pylint: disable=unused-argument
400405
logger.info(f"Event data {json.dumps(event)}")

data-collection/deploy/module-health-events.yaml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ Resources:
143143
from datetime import date, datetime, timedelta, timezone
144144
145145
import boto3
146+
from botocore.config import Config
146147
147148
logger = logging.getLogger()
148149
logger.setLevel(getattr(logging, os.environ.get('LOG_LEVEL', 'INFO').upper(), logging.INFO))
@@ -156,6 +157,9 @@ Resources:
156157
LOOKBACK = int(os.environ['LOOKBACK'])
157158
DETAIL_SM_ARN = os.environ['DETAIL_SM_ARN']
158159
TMP_FILE = "/tmp/data.json"
160+
MAX_RETRIES = int(os.environ.get('MAX_RETRIES', "10"))
161+
162+
config = Config(retries={"max_attempts": MAX_RETRIES, "mode": "adaptive"})
159163
160164
mapping = {
161165
'payer_account_id': 'payer_account_id',
@@ -341,6 +345,7 @@ Resources:
341345
)['Credentials']
342346
health_client = boto3.client(
343347
'health',
348+
config=config,
344349
region_name=region,
345350
aws_access_key_id=creds['AccessKeyId'],
346351
aws_secret_access_key=creds['SecretAccessKey'],
@@ -349,7 +354,7 @@ Resources:
349354
350355
count = 0
351356
if is_summary_mode:
352-
start_from, start_to = calculate_dates(BUCKET_NAME, f"{PREFIX}/{PREFIX}-summary-data/payer_id={account_id}")
357+
start_from, start_to = calculate_dates(BUCKET_NAME, f"{PREFIX}/{PREFIX}-detail-data/payer_id={account_id}")
353358
logger.info(f"Collecting events from {start_from} to {start_to}")
354359
args = {
355360
'maxResults':100,
@@ -415,7 +420,7 @@ Resources:
415420
if count > 0:
416421
rand = uuid.uuid4()
417422
key = ingestion_time.strftime(f"{PREFIX}/{PREFIX}-detail-data/payer_id={account_id}/year=%Y/month=%m/day=%d/%Y-%m-%d-%H-%M-%S-{rand}.json")
418-
boto3.client('s3').upload_file(TMP_FILE, BUCKET_NAME, key)
423+
boto3.client('s3', config=config).upload_file(TMP_FILE, BUCKET_NAME, key)
419424
logger.info(f'Uploaded {count} summary records to s3://{BUCKET_NAME}/{key}')
420425
return {"status":"200","Recorded":f'"{count}"'}
421426
Handler: "index.lambda_handler"

data-collection/deploy/module-inventory.yaml

Lines changed: 163 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ Parameters:
5656
Description: ARN of a Lambda for Managing GlueTable
5757
AwsObjects:
5858
Type: CommaDelimitedList
59-
Default: OpensearchDomains, ElasticacheClusters, RdsDbInstances, EBS, AMI, Snapshot, Ec2Instances, VpcInstances, RdsDbSnapshots, EKSClusters, LambdaFunctions, RdsDbClusters
59+
Default: OpensearchDomains, ElasticacheClusters, RdsDbInstances, EBS, AMI, Snapshot, Ec2Instances, VpcInstances, RdsDbSnapshots, EKSClusters, LambdaFunctions, RdsDbClusters, WorkSpaces
6060
Description: Services for pulling price data
6161
DataBucketsKmsKeysArns:
6262
Type: String
@@ -1011,6 +1011,73 @@ Mappings:
10111011
SerializationLibrary: org.openx.data.jsonserde.JsonSerDe
10121012
TableType: EXTERNAL_TABLE
10131013

1014+
WorkSpaces:
1015+
path: workspaces
1016+
table:
1017+
- Name: inventory_workspaces_data
1018+
Parameters:
1019+
classification: json
1020+
compressionType: none
1021+
PartitionKeys:
1022+
- Name: payer_id
1023+
Type: string
1024+
- Name: year
1025+
Type: string
1026+
- Name: month
1027+
Type: string
1028+
- Name: day
1029+
Type: string
1030+
StorageDescriptor:
1031+
Columns:
1032+
- Name: workspaceid
1033+
Type: string
1034+
- Name: username
1035+
Type: string
1036+
- Name: computetype
1037+
Type: string
1038+
- Name: directoryid
1039+
Type: string
1040+
- Name: directoryname
1041+
Type: string
1042+
- Name: directorytype
1043+
Type: string
1044+
- Name: directoryalias
1045+
Type: string
1046+
- Name: directorymaintenance
1047+
Type: string
1048+
- Name: state
1049+
Type: string
1050+
- Name: connectionstatus
1051+
Type: string
1052+
- Name: lastconnected
1053+
Type: string
1054+
- Name: lastinventoryrun
1055+
Type: string
1056+
- Name: runningmode
1057+
Type: string
1058+
- Name: operatingsystemname
1059+
Type: string
1060+
- Name: protocol
1061+
Type: string
1062+
- Name: computername
1063+
Type: string
1064+
- Name: ipaddress
1065+
Type: string
1066+
- Name: accountid
1067+
Type: string
1068+
- Name: collection_date
1069+
Type: string
1070+
- Name: region
1071+
Type: string
1072+
InputFormat: org.apache.hadoop.mapred.TextInputFormat
1073+
Location: !Sub s3://${DestinationBucket}/inventory/inventory-workspaces-data/
1074+
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
1075+
SerdeInfo:
1076+
Parameters:
1077+
paths: WorkspaceId,UserName,ComputeType,DirectoryId,DirectoryName,DirectoryType,DirectoryAlias,DirectoryMaintenance,State,ConnectionStatus,LastConnected,LastInventoryRun,RunningMode,OperatingSystemName,Protocol,ComputerName,IPAddress,accountid,collection_date,region
1078+
SerializationLibrary: org.openx.data.jsonserde.JsonSerDe
1079+
TableType: EXTERNAL_TABLE
1080+
10141081
Resources:
10151082
LambdaRole:
10161083
Type: AWS::IAM::Role
@@ -1196,6 +1263,68 @@ Resources:
11961263
logger.error(f"Cannot get info from {account_id}/{region}: {type(exc)}-{exc}")
11971264
return []
11981265
1266+
def workspaces_scan(account_id, region):
1267+
"""Special function to scan AWS WorkSpaces resources"""
1268+
# Define WorkSpaces supported regions
1269+
WORKSPACES_REGIONS = [
1270+
'us-east-1', 'us-west-2', 'ap-south-1', 'ap-northeast-2',
1271+
'ap-southeast-1', 'ap-southeast-2', 'ap-northeast-1', 'ca-central-1',
1272+
'eu-central-1', 'eu-west-1', 'eu-west-2', 'sa-east-1']
1273+
# Skip if region is not supported by WorkSpaces
1274+
if region not in WORKSPACES_REGIONS:
1275+
logger.info(f"WorkSpaces not supported in region {region}. Skipping.")
1276+
return
1277+
try:
1278+
session = assume_session(account_id, region)
1279+
client = session.client('workspaces', region_name=region)
1280+
# Get WorkSpaces data
1281+
workspaces_data = list(client.get_paginator('describe_workspaces').paginate().search('Workspaces[*]'))
1282+
if not workspaces_data:
1283+
logger.info(f"No WorkSpaces found in {account_id}/{region}")
1284+
return
1285+
# Get connection status and directories for lookup
1286+
connection_status = client.describe_workspaces_connection_status()['WorkspacesConnectionStatus']
1287+
directories = client.describe_workspace_directories()['Directories']
1288+
# Create lookup dictionaries
1289+
connection_lookup = {conn['WorkspaceId']: conn for conn in connection_status}
1290+
directory_lookup = {d['DirectoryId']: d for d in directories}
1291+
# Process workspaces
1292+
for workspace in workspaces_data:
1293+
workspace_id = workspace['WorkspaceId']
1294+
dir_id = workspace['DirectoryId']
1295+
1296+
connection_info = connection_lookup.get(workspace_id, {})
1297+
dir_info = directory_lookup.get(dir_id, {})
1298+
1299+
workspace_info = {
1300+
'accountid': account_id,
1301+
'region': region,
1302+
'ResourceType': 'WorkSpace',
1303+
'WorkspaceId': workspace_id,
1304+
'UserName': workspace['UserName'],
1305+
'DirectoryId': dir_id,
1306+
'State': workspace['State'],
1307+
'ConnectionStatus': connection_info.get('ConnectionState', 'N/A'),
1308+
'LastConnected': connection_info.get('LastKnownUserConnectionTimestamp', 'Never').isoformat() if isinstance(connection_info.get('LastKnownUserConnectionTimestamp'), datetime) else "01/01/01 00:00:00",
1309+
'LastInventoryRun': time.strftime('%x %X'),
1310+
'RunningMode': workspace.get("WorkspaceProperties", {}).get("RunningMode", "N/A"),
1311+
'ComputeType': workspace.get("WorkspaceProperties", {}).get("ComputeTypeName", "N/A"),
1312+
'OperatingSystem': workspace.get("WorkspaceProperties", {}).get("OperatingSystemName", "N/A"),
1313+
'Protocol': ''.join(
1314+
c for c in str(workspace.get("WorkspaceProperties", {}).get("Protocols", "N/A"))
1315+
if c.isalnum() or c.isspace()
1316+
),
1317+
'DirectoryName': dir_info.get('DirectoryName', 'N/A'),
1318+
'DirectoryAlias': dir_info.get('Alias', 'N/A'),
1319+
'DirectoryType': dir_info.get('DirectoryType', 'N/A'),
1320+
'DirectoryMaintenance': dir_info.get("WorkspaceCreationProperties", {}).get("EnableMaintenanceMode", "N/A"),
1321+
'ComputerName': workspace.get("ComputerName", ""),
1322+
'IPAddress': workspace.get("IpAddress", "")
1323+
}
1324+
yield workspace_info
1325+
except Exception as exc:
1326+
logger.error(f"Cannot get WorkSpaces info from {account_id}/{region}: {type(exc)}-{exc}")
1327+
11991328
def lambda_handler(event, context): #pylint: disable=unused-argument
12001329
""" this lambda collects ami, snapshots and volumes from linked accounts
12011330
and must be called from the corresponding Step Function to orchestrate
@@ -1270,7 +1399,8 @@ Resources:
12701399
function_name='list_functions',
12711400
obj_name='Functions[*]'
12721401
),
1273-
'eks': eks_clusters_scan
1402+
'eks': eks_clusters_scan,
1403+
'workspaces': workspaces_scan
12741404
}
12751405
12761406
account = json.loads(event["account"])
@@ -1799,6 +1929,37 @@ Resources:
17991929
gp3_gb_cost + gp3_iops_cost + gp3_throughput_cost AS gp3_total_cost,
18001930
(current_gb_cost + current_iops_cost) - (gp3_gb_cost + gp3_iops_cost + gp3_throughput_cost) as gp3_saving
18011931
FROM inventory_ebs_data
1932+
1933+
EUCInventoryView:
1934+
Type: AWS::Athena::NamedQuery
1935+
Properties:
1936+
Database: !Ref DatabaseName
1937+
Description: EUC Inventory View for WorkSpaces inventory data
1938+
Name: create_euc_inventory_view
1939+
QueryString: !Sub |
1940+
CREATE OR REPLACE VIEW "euc_inventory_view" AS
1941+
SELECT
1942+
"workspaceid"
1943+
, "username"
1944+
, "accountid"
1945+
, "computetype"
1946+
, "directoryid"
1947+
, "directoryname"
1948+
, "directoryalias"
1949+
, "directorytype"
1950+
, "directorymaintenance"
1951+
, "region"
1952+
, "state"
1953+
, "connectionstatus"
1954+
, CAST(parse_datetime(lastconnected, 'MM/dd/yy HH:mm:ss') AS timestamp) lastconnected
1955+
, "lastinventoryrun"
1956+
, "runningmode"
1957+
, "operatingsystemname"
1958+
, "protocol"
1959+
, "computername"
1960+
, "ipaddress"
1961+
FROM
1962+
"inventory_workspaces_data"
18021963
18031964
AthenaBackwardCompatPricingRegionNames:
18041965
Type: AWS::Athena::NamedQuery

0 commit comments

Comments
 (0)