Skip to content

Commit 0e90a18

Browse files
bvliucopybara-github
authored andcommitted
Add Azure Flexible Server metrics implementation.
PiperOrigin-RevId: 846734952
1 parent 7fdc40c commit 0e90a18

File tree

3 files changed

+233
-0
lines changed

3 files changed

+233
-0
lines changed

perfkitbenchmarker/providers/azure/azure_flexible_server.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"""
2121

2222
import datetime
23+
import json
2324
import logging
2425
import time
2526
from typing import Any, Tuple
@@ -28,11 +29,13 @@
2829
from perfkitbenchmarker import errors
2930
from perfkitbenchmarker import provider_info
3031
from perfkitbenchmarker import relational_db
32+
from perfkitbenchmarker import sample
3133
from perfkitbenchmarker import sql_engine_utils
3234
from perfkitbenchmarker import vm_util
3335
from perfkitbenchmarker.providers import azure
3436
from perfkitbenchmarker.providers.azure import azure_disk
3537
from perfkitbenchmarker.providers.azure import azure_relational_db
38+
from perfkitbenchmarker.providers.azure import util
3639

3740
DEFAULT_DATABASE_NAME = 'database'
3841

@@ -70,6 +73,9 @@ class AzureFlexibleServer(azure_relational_db.AzureRelationalDb):
7073
sql_engine_utils.FLEXIBLE_SERVER_POSTGRES,
7174
sql_engine_utils.FLEXIBLE_SERVER_MYSQL,
7275
]
76+
# Metrics are processed in 5 minute batches according to
77+
# https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/concepts-monitoring.
78+
METRICS_COLLECTION_DELAY_SECONDS = 300
7379

7480
def __init__(self, relational_db_spec: Any):
7581
super().__init__(relational_db_spec)
@@ -277,3 +283,122 @@ def _ApplyDbFlags(self) -> None:
277283
)
278284

279285
self._Reboot()
286+
287+
def _GetResourceProvider(self) -> str:
288+
if self.spec.engine == sql_engine_utils.FLEXIBLE_SERVER_MYSQL:
289+
return 'Microsoft.DBforMySQL'
290+
elif self.spec.engine == sql_engine_utils.FLEXIBLE_SERVER_POSTGRES:
291+
return 'Microsoft.DBforPostgreSQL'
292+
else:
293+
raise NotImplementedError(f'Unsupported engine {self.spec.engine}')
294+
295+
def _GetResourceId(self) -> str:
296+
return (
297+
f'/subscriptions/{util.GetSubscriptionId()}/resourceGroups/'
298+
f'{self.resource_group.name}/providers/'
299+
f'{self._GetResourceProvider()}/flexibleServers/{self.instance_id}'
300+
)
301+
302+
def _GetMetricsToCollect(self) -> list[relational_db.MetricSpec]:
303+
"""Returns a list of metrics to collect."""
304+
# pyformat: disable
305+
if self.spec.engine == sql_engine_utils.FLEXIBLE_SERVER_MYSQL:
306+
return [
307+
relational_db.MetricSpec('cpu_percent', 'cpu_utilization', '%', None),
308+
relational_db.MetricSpec('io_consumption_percent', 'io_consumption_percent', '%', None),
309+
relational_db.MetricSpec('storage_io_count', 'storage_io_count', 'iops', None),
310+
relational_db.MetricSpec('storage_used', 'disk_bytes_used', 'GB', lambda x: x / (1024 * 1024 * 1024)),
311+
]
312+
else:
313+
return [
314+
relational_db.MetricSpec('cpu_percent', 'cpu_utilization', '%', None),
315+
relational_db.MetricSpec('read_iops', 'disk_read_iops', 'iops', None),
316+
relational_db.MetricSpec('write_iops', 'disk_write_iops', 'iops', None),
317+
relational_db.MetricSpec('read_throughput', 'disk_read_throughput', 'MB/s', lambda x: x / (1024 * 1024)),
318+
relational_db.MetricSpec('write_throughput', 'disk_write_throughput', 'MB/s', lambda x: x / (1024 * 1024)),
319+
relational_db.MetricSpec('storage_used', 'disk_bytes_used', 'GB', lambda x: x / (1024 * 1024 * 1024)),
320+
]
321+
# pyformat: enable
322+
323+
@vm_util.Retry(poll_interval=60, max_retries=5, retryable_exceptions=KeyError)
324+
def _CollectProviderMetric(
325+
self,
326+
metric: relational_db.MetricSpec,
327+
start_time: datetime.datetime,
328+
end_time: datetime.datetime,
329+
collect_percentiles: bool = False,
330+
) -> list[sample.Sample]:
331+
"""Collects metrics from Azure Monitor."""
332+
if end_time - start_time < datetime.timedelta(minutes=1):
333+
logging.warning(
334+
'Not collecting metrics since end time %s is within 1 minute of start'
335+
' time %s.',
336+
end_time,
337+
start_time,
338+
)
339+
return []
340+
metric_name = metric.provider_name
341+
logging.info(
342+
'Collecting metric %s for instance %s', metric_name, self.instance_id
343+
)
344+
cmd = [
345+
azure.AZURE_PATH,
346+
'monitor',
347+
'metrics',
348+
'list',
349+
'--resource',
350+
self._GetResourceId(),
351+
'--metric',
352+
metric_name,
353+
'--start-time',
354+
start_time.astimezone(datetime.timezone.utc).strftime(
355+
relational_db.METRICS_TIME_FORMAT
356+
),
357+
'--end-time',
358+
end_time.astimezone(datetime.timezone.utc).strftime(
359+
relational_db.METRICS_TIME_FORMAT
360+
),
361+
'--interval',
362+
'pt1m',
363+
'--aggregation',
364+
'Average',
365+
]
366+
try:
367+
stdout, _ = vm_util.IssueRetryableCommand(cmd)
368+
except errors.VmUtil.IssueCommandError as e:
369+
logging.warning(
370+
'Could not collect metric %s for instance %s: %s',
371+
metric.provider_name,
372+
self.instance_id,
373+
e,
374+
)
375+
return []
376+
response = json.loads(stdout)
377+
if (
378+
not response
379+
or not response['value']
380+
or not response['value'][0]['timeseries']
381+
):
382+
logging.warning('No timeseries for metric %s', metric_name)
383+
return []
384+
385+
datapoints = response['value'][0]['timeseries'][0]['data']
386+
if not datapoints:
387+
logging.warning('No datapoints for metric %s', metric_name)
388+
return []
389+
390+
points = []
391+
for dp in datapoints:
392+
if dp['average'] is None:
393+
continue
394+
value = dp['average']
395+
if metric.conversion_func:
396+
value = metric.conversion_func(value)
397+
points.append((
398+
datetime.datetime.fromisoformat(dp['timeStamp']),
399+
value,
400+
))
401+
402+
return self._CreateSamples(
403+
points, metric.sample_name, metric.unit, collect_percentiles
404+
)

perfkitbenchmarker/providers/azure/util.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,3 +280,11 @@ def GetMachineSeriesNumber(machine_type: str) -> int:
280280
return int(match.group(1))
281281
# Azure only adds a v after the first series,
282282
return 1
283+
284+
285+
def GetSubscriptionId():
286+
"""Gets the current Azure subscription ID."""
287+
stdout, _ = vm_util.IssueRetryableCommand(
288+
[AZURE_PATH, 'account', 'show', '--query', 'id', '-o', 'tsv']
289+
)
290+
return stdout.strip()

tests/providers/azure/azure_flexible_server_test.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import datetime
12
import inspect
3+
import json
24
import unittest
35

46
from absl import flags
@@ -8,11 +10,109 @@
810
from perfkitbenchmarker import sql_engine_utils
911
from perfkitbenchmarker import vm_util
1012
from perfkitbenchmarker.providers.azure import azure_network
13+
from perfkitbenchmarker.providers.azure import util
1114
from tests import pkb_common_test_case
1215

16+
1317
FLAGS = flags.FLAGS
1418

1519

20+
class AzureFlexibleServerMetricsTest(pkb_common_test_case.PkbCommonTestCase):
21+
22+
def setUp(self):
23+
super().setUp()
24+
self.enter_context(
25+
mock.patch.object(vm_util, 'IssueCommand', return_value=('', '', ''))
26+
)
27+
FLAGS.run_uri = '123'
28+
FLAGS.cloud = provider_info.AZURE
29+
FLAGS['db_engine'].parse(sql_engine_utils.FLEXIBLE_SERVER_POSTGRES)
30+
test_spec = inspect.cleandoc("""
31+
sysbench:
32+
relational_db:
33+
engine: postgres
34+
engine_version: '13'
35+
database_username: user
36+
database_password: password
37+
high_availability: False
38+
db_spec:
39+
Azure:
40+
machine_type: Standard_D2s_v3
41+
zone: westus2
42+
db_disk_spec:
43+
Azure:
44+
disk_size: 128
45+
vm_groups:
46+
clients:
47+
vm_spec:
48+
Azure:
49+
machine_type: Standard_B4ms
50+
zone: westus2
51+
""")
52+
self.spec = pkb_common_test_case.CreateBenchmarkSpecFromYaml(
53+
yaml_string=test_spec, benchmark_name='sysbench'
54+
)
55+
self.spec.ConstructRelationalDb()
56+
self.server = self.spec.relational_db
57+
self.server.resource_group = mock.Mock()
58+
self.server.resource_group.name = 'test-group'
59+
self.enter_context(
60+
mock.patch.object(util, 'GetSubscriptionId', return_value='test-sub')
61+
)
62+
63+
def testCollectMetrics(self):
64+
# Mock the response from Azure Monitor
65+
mock_response = {
66+
'value': [{
67+
'timeseries': [{
68+
'data': [
69+
{
70+
'timeStamp': '2025-11-26T10:00:00Z',
71+
'average': 10.0,
72+
},
73+
{
74+
'timeStamp': '2025-11-26T10:01:00Z',
75+
'average': 20.0,
76+
},
77+
]
78+
}]
79+
}]
80+
}
81+
self.enter_context(
82+
mock.patch.object(
83+
vm_util,
84+
'IssueRetryableCommand',
85+
return_value=(json.dumps(mock_response), ''),
86+
)
87+
)
88+
89+
start_time = datetime.datetime(2025, 11, 26, 10, 0, 0)
90+
end_time = datetime.datetime(2025, 11, 26, 10, 1, 0)
91+
samples = self.server.CollectMetrics(start_time, end_time)
92+
93+
# Check the number of samples returned (4 per metric * 6 metrics)
94+
self.assertLen(samples, 24)
95+
96+
# Spot check a few sample values
97+
sample_names = [s.metric for s in samples]
98+
self.assertIn('cpu_utilization_average', sample_names)
99+
self.assertIn('cpu_utilization_min', sample_names)
100+
self.assertIn('cpu_utilization_max', sample_names)
101+
self.assertIn('disk_read_iops_average', sample_names)
102+
103+
cpu_avg = next(s for s in samples if s.metric == 'cpu_utilization_average')
104+
self.assertEqual(cpu_avg.value, 15.0)
105+
self.assertEqual(cpu_avg.unit, '%')
106+
107+
cpu_min = next(s for s in samples if s.metric == 'cpu_utilization_min')
108+
self.assertEqual(cpu_min.value, 10.0)
109+
self.assertEqual(cpu_min.unit, '%')
110+
111+
cpu_max = next(s for s in samples if s.metric == 'cpu_utilization_max')
112+
self.assertEqual(cpu_max.value, 20.0)
113+
self.assertEqual(cpu_max.unit, '%')
114+
115+
16116
class AzureFlexibleServerCreateTestCase(pkb_common_test_case.PkbCommonTestCase):
17117

18118
def setUp(self):

0 commit comments

Comments
 (0)