Skip to content

Commit 6a42bb1

Browse files
committed
feat: Add database dimension support to RDS multitenant collector
- Add engine detection to skip database dimension for Oracle/SQL Server - Collect db.load metrics by both user and database dimensions - Add dimension_type field to distinguish between user and database metrics - Fix Parquet field names to use underscores instead of dots for valid column names - Update Athena view to handle both dimension types with proper CASE statements - Fix view JOIN order to prevent row duplication - Maintain backward compatibility with existing data structure
1 parent 16e324c commit 6a42bb1

File tree

1 file changed

+57
-24
lines changed

1 file changed

+57
-24
lines changed

data-collection/deploy/module-rds-multitenant.yaml

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -194,12 +194,13 @@ Resources:
194194
for instance in rds_instances:
195195
instance_id = instance['DbiResourceId']
196196
instance_arn = instance['DBInstanceArn']
197-
print(f"Processing metrics for instance {instance_arn} in region {region}")
197+
engine = instance.get('Engine', '')
198+
print(f"Processing metrics for instance {instance_arn} (engine: {engine}) in region {region}")
198199
199200
# Check if Performance Insights is enabled
200201
if instance.get('PerformanceInsightsEnabled', False):
201202
# Get Performance Insights metrics
202-
metrics = get_performance_metrics(pi_client, instance_id)
203+
metrics = get_performance_metrics(pi_client, instance_id, engine)
203204
204205
# Collect metrics for this instance
205206
region_metrics[region].extend(
@@ -234,26 +235,43 @@ Resources:
234235
return instances
235236
236237
237-
def get_performance_metrics(pi_client, instance_id):
238+
def should_collect_database_metrics(engine):
239+
excluded_engines = ['oracle-ee', 'oracle-se2', 'oracle-se1', 'oracle-se',
240+
'sqlserver-ee', 'sqlserver-se', 'sqlserver-ex', 'sqlserver-web']
241+
return engine.lower() not in excluded_engines
242+
243+
def get_performance_metrics(pi_client, instance_id, engine):
238244
current_time = datetime.utcnow()
239245
end_time = current_time.replace(minute=0, second=0, microsecond=0)
240246
start_time = end_time - timedelta(hours=hour_delta)
241247
248+
metric_queries = [
249+
{
250+
'Metric': 'os.general.numVCPUs.avg'
251+
},
252+
{
253+
'Metric': 'db.load.avg',
254+
'GroupBy': {
255+
'Group': 'db.user',
256+
'Dimensions': ['db.user.name']
257+
}
258+
}
259+
]
260+
261+
# Add database dimension query if engine supports it
262+
if should_collect_database_metrics(engine):
263+
metric_queries.append({
264+
'Metric': 'db.load.avg',
265+
'GroupBy': {
266+
'Group': 'db',
267+
'Dimensions': ['db.name']
268+
}
269+
})
270+
242271
response = pi_client.get_resource_metrics(
243272
ServiceType='RDS',
244273
Identifier=instance_id,
245-
MetricQueries=[
246-
{
247-
'Metric': 'os.general.numVCPUs.avg'
248-
},
249-
{
250-
'Metric': 'db.load.avg',
251-
'GroupBy': {
252-
'Group': 'db.user',
253-
'Dimensions': ['db.user.name']
254-
}
255-
},
256-
],
274+
MetricQueries=metric_queries,
257275
StartTime=start_time,
258276
EndTime=end_time,
259277
PeriodInSeconds=metrics_period_in_seconds
@@ -280,13 +298,25 @@ Resources:
280298
break
281299
282300
if "Dimensions" in metric["Key"]:
301+
dimensions = metric["Key"]["Dimensions"]
302+
303+
# Determine dimension type based on available dimensions
304+
if 'db.user.name' in dimensions:
305+
dimension_type = 'user'
306+
elif 'db.name' in dimensions:
307+
dimension_type = 'database'
308+
else:
309+
dimension_type = 'unknown'
310+
283311
base_entry = {
284312
"metric": metric["Key"]["Metric"],
285313
"resourcearn": instance_arn,
286314
"instance_id": instance_id,
287-
"num_vcpus": num_cpus
315+
"num_vcpus": num_cpus,
316+
"dimension_type": dimension_type,
317+
"db_user_name": dimensions.get('db.user.name', None),
318+
"db_database_name": dimensions.get('db.name', None)
288319
}
289-
base_entry.update(metric["Key"]["Dimensions"])
290320
291321
for datapoint in metric["DataPoints"]:
292322
flattened_entry = base_entry.copy()
@@ -530,28 +560,31 @@ Resources:
530560
SELECT
531561
timestamp,
532562
resourcearn,
563+
dimension_type,
533564
AVG(num_vcpus) AS num_vcpus,
534565
SUM(value) AS total_db_load,
535566
greatest(AVG(num_vcpus), SUM(value)) total_compute_power,
536-
count(1) AS num_users
567+
count(1) AS num_entities
537568
FROM "AwsDataCatalog"."{event['ResourceProperties']['GlueDatabase']}"."hourly_rds_multitenant"
538-
GROUP BY 1, 2
569+
GROUP BY 1, 2, 3
539570
)
540571
SELECT
541572
b.timestamp,
542573
b.account_id,
543574
b.resourcearn,
544575
b.num_vcpus,
545-
b."db.user.name" as user_name,
576+
b.dimension_type,
577+
CASE WHEN b.dimension_type = 'user' THEN b.db_user_name END as user_name,
578+
CASE WHEN b.dimension_type = 'database' THEN b.db_database_name END as database_name,
546579
b.value db_load,
547580
a.total_db_load,
548581
a.total_compute_power,
549-
a.num_users distinct_users,
582+
a.num_entities,
550583
case when a.total_db_load = 0 then 0 else b.value / a.total_db_load end AS perc_utilization,
551584
(b.value / a.total_compute_power) perc_utilization_rebased
552-
FROM aggregate_load_data a
553-
JOIN "AwsDataCatalog"."{event['ResourceProperties']['GlueDatabase']}"."hourly_rds_multitenant" b
554-
ON a.timestamp = b.timestamp AND a.resourcearn = b.resourcearn
585+
FROM "AwsDataCatalog"."{event['ResourceProperties']['GlueDatabase']}"."hourly_rds_multitenant" b
586+
JOIN aggregate_load_data a
587+
ON a.timestamp = b.timestamp AND a.resourcearn = b.resourcearn AND a.dimension_type = b.dimension_type
555588
"""
556589
557590
response = athena.start_query_execution(

0 commit comments

Comments
 (0)