Skip to content

Commit fe8e8cb

Browse files
Ashwin Rameshdzier
authored andcommitted
Updated detailed report and results columns (#161)
* Updated detailed report * Review edits, fixed report mode for CPU only * Review edits - report info
1 parent 44d52b6 commit fe8e8cb

File tree

13 files changed

+234
-101
lines changed

13 files changed

+234
-101
lines changed

docs/metrics.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,4 @@ just metrics) that should be displayed in the output tables.
7878
dynamic batcher
7979
* `satisfies_constraints`: `Yes` if this measurement satisfies constraints, `No`
8080
otherwise.
81-
* `gpu_id`: The id of the GPU this measurement was taken on.
81+
* `gpu_uuid`: The UUID of the GPU this measurement was taken on.

model_analyzer/analyzer.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ class Analyzer:
3535
model_analyzer. Configured with metrics to monitor, exposes profiling and
3636
result writing methods.
3737
"""
38-
3938
def __init__(self, config, server, state_manager):
4039
"""
4140
Parameters
@@ -84,7 +83,8 @@ def profile(self, client):
8483
config=self._config,
8584
client=client,
8685
server=self._server,
87-
result_manager=self._result_manager)
86+
result_manager=self._result_manager,
87+
state_manager=self._state_manager)
8888

8989
self._model_manager = ModelManager(
9090
config=self._config,
@@ -131,8 +131,14 @@ def analyze(self):
131131
f"Expected config of type {ConfigCommandAnalyze}, got {type(self._config)}."
132132
)
133133

134+
gpu_info = self._state_manager.get_state_variable(
135+
'MetricsManager.gpus')
136+
if not gpu_info:
137+
gpu_info = {}
134138
self._report_manager = ReportManager(
135-
config=self._config, result_manager=self._result_manager)
139+
config=self._config,
140+
gpu_info=gpu_info,
141+
result_manager=self._result_manager)
136142

137143
# Create result tables, put top results and get stats
138144
dcgm_metrics, perf_metrics, cpu_metrics = \
@@ -162,8 +168,14 @@ def report(self):
162168
f"Expected config of type {ConfigCommandReport}, got {type(self._config)}."
163169
)
164170

171+
gpu_info = self._state_manager.get_state_variable(
172+
'MetricsManager.gpus')
173+
if not gpu_info:
174+
gpu_info = {}
165175
self._report_manager = ReportManager(
166-
config=self._config, result_manager=self._result_manager)
176+
config=self._config,
177+
result_manager=self._result_manager,
178+
gpu_info=gpu_info)
167179

168180
self._report_manager.create_detailed_reports()
169181
self._report_manager.export_detailed_reports()

model_analyzer/config/input/config_defaults.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,12 @@
8686
'perf_throughput', 'perf_latency', 'cpu_used_ram'
8787
]
8888
DEFAULT_GPU_OUTPUT_FIELDS = [
89-
'model_name', 'gpu_id', 'batch_size', 'concurrency', 'model_config_path',
89+
'model_name', 'gpu_uuid', 'batch_size', 'concurrency', 'model_config_path',
9090
'instance_group', 'dynamic_batch_sizes', 'satisfies_constraints',
9191
'gpu_used_memory', 'gpu_utilization', 'gpu_power_usage'
9292
]
9393
DEFAULT_SERVER_OUTPUT_FIELDS = [
94-
'model_name', 'gpu_id', 'gpu_used_memory', 'gpu_utilization',
94+
'model_name', 'gpu_uuid', 'gpu_used_memory', 'gpu_utilization',
9595
'gpu_power_usage'
9696
]
9797

model_analyzer/plots/plot_manager.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ class PlotManager:
2929
This class manages the construction and arrangement
3030
of plots generated by model analyzer
3131
"""
32-
3332
def __init__(self, config, result_manager):
3433
"""
3534
Parameters
@@ -140,8 +139,8 @@ def create_detailed_plots(self):
140139
model_config_name = model.model_config_name()
141140
self._detailed_plots[model_config_name] = DetailedPlot(
142141
f'latency_breakdown', 'Online Performance')
143-
measurements = self._result_manager.get_model_config_measurements(
144-
model_config_name)[1]
142+
model_config, measurements = self._result_manager.get_model_config_measurements(
143+
model_config_name)
145144

146145
# If model_config_name was present in results
147146
if measurements:
@@ -152,6 +151,10 @@ def create_detailed_plots(self):
152151

153152
# Create the simple plots for the detailed reports
154153
for plot_config in model.plots():
154+
if model_config.cpu_only() and (
155+
plot_config.y_axis().startswith('gpu_')
156+
or plot_config.x_axis().startswith('gpu_')):
157+
continue
155158
self._create_update_simple_plot(plots_key=model_config_name,
156159
plot_config=plot_config,
157160
measurements=measurements,
@@ -183,8 +186,8 @@ def export_detailed_plots(self):
183186
os.makedirs(detailed_model_config_plot_dir, exist_ok=True)
184187
plot.save(detailed_model_config_plot_dir)
185188

186-
simple_model_config_plot_dir = os.path.join(simple_plot_dir,
187-
model_config_name)
189+
simple_model_config_plot_dir = os.path.join(
190+
simple_plot_dir, model_config_name)
188191
os.makedirs(simple_model_config_plot_dir, exist_ok=True)
189192
for plot in self._simple_plots[model_config_name].values():
190193
plot.save(simple_model_config_plot_dir)

model_analyzer/record/metrics_manager.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class MetricsManager:
4444
"gpu_power_usage"
4545
]
4646

47-
def __init__(self, config, client, server, result_manager):
47+
def __init__(self, config, client, server, result_manager, state_manager):
4848
"""
4949
Parameters
5050
----------
@@ -58,19 +58,45 @@ def __init__(self, config, client, server, result_manager):
5858
result_manager : ResultManager
5959
instance that manages the result tables and
6060
adding results
61+
state_manager: AnalyzerStateManager
62+
manages the analyzer state
6163
"""
6264

6365
self._config = config
6466
self._client = client
6567
self._server = server
6668
self._result_manager = result_manager
69+
self._state_manager = state_manager
6770

6871
self._dcgm_metrics, self._perf_metrics, self._cpu_metrics = \
6972
MetricsManager.categorize_metrics()
7073
self._gpus = GPUDeviceFactory.verify_requested_gpus(self._config.gpus)
74+
self._init_state()
7175

72-
self._dcgm_monitor = None
73-
self._cpu_monitor = None
76+
def _init_state(self):
77+
"""
78+
Sets MetricsManager object managed
79+
state variables in AnalyerState
80+
"""
81+
82+
gpu_info = self._state_manager.get_state_variable(
83+
'MetricsManager.gpu_info')
84+
85+
if self._state_manager.starting_fresh_run() or gpu_info is None:
86+
gpu_info = {}
87+
88+
for i in range(len(self._gpus)):
89+
if self._gpus[i] not in gpu_info:
90+
device_info = {}
91+
device = numba.cuda.list_devices()[i]
92+
device_info['name'] = device.name
93+
with device:
94+
# convert bytes to GB
95+
device_info['total_memory'] = numba.cuda.current_context(
96+
).get_memory_info().total
97+
gpu_info[self._gpus[i]] = device_info
98+
99+
self._state_manager.set_state_variable('MetricsManager.gpus', gpu_info)
74100

75101
@classmethod
76102
def categorize_metrics(cls):
@@ -86,7 +112,7 @@ def categorize_metrics(cls):
86112

87113
dcgm_metrics, perf_metrics, cpu_metrics = [], [], []
88114
# Separates metrics and objectives into related lists
89-
for metric in MetricsManager.get_metric_types(cls.metric_tags):
115+
for metric in MetricsManager.get_metric_types(tags=cls.metric_tags):
90116
if metric in DCGMMonitor.model_analyzer_to_dcgm_field:
91117
dcgm_metrics.append(metric)
92118
elif metric in PerfAnalyzer.perf_metrics:
@@ -147,7 +173,7 @@ def profile_model(self, run_config, perf_output_writer=None):
147173
else:
148174
perf_analyzer_metrics = perf_analyzer_metrics_or_status
149175

150-
# Get metrics for model inference and combine metrics that do not have GPU ID
176+
# Get metrics for model inference and combine metrics that do not have GPU UUID
151177
model_gpu_metrics = {}
152178
if not cpu_only:
153179
model_gpu_metrics = self._get_gpu_inference_metrics()
@@ -273,12 +299,13 @@ def _get_gpu_inference_metrics(self):
273299

274300
records_groupby_gpu = {}
275301
records_groupby_gpu = dcgm_record_aggregator.groupby(
276-
self._dcgm_metrics, lambda record: record.device().device_id())
302+
self._dcgm_metrics, lambda record: str(
303+
record.device().device_uuid(), encoding='ascii'))
277304

278305
gpu_metrics = defaultdict(list)
279306
for _, metric in records_groupby_gpu.items():
280-
for gpu_id, metric_value in metric.items():
281-
gpu_metrics[gpu_id].append(metric_value)
307+
for gpu_uuid, metric_value in metric.items():
308+
gpu_metrics[gpu_uuid].append(metric_value)
282309

283310
return gpu_metrics
284311

model_analyzer/reports/pdf_report.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ class PDFReport(Report):
2323
constructed in html and
2424
written to disk as a PDF
2525
"""
26-
2726
def __init__(self):
2827
self._head = ""
2928
self._body = ""
@@ -72,7 +71,11 @@ def add_subheading(self, subheading):
7271

7372
self._body += f'<h3>{subheading}</h3>'
7473

75-
def add_images(self, images, image_captions, image_width=100):
74+
def add_images(self,
75+
images,
76+
image_captions,
77+
image_width=100,
78+
float="center"):
7679
"""
7780
Parameters
7881
----------
@@ -84,13 +87,15 @@ def add_images(self, images, image_captions, image_width=100):
8487
image_width: int
8588
Percentage of the the row of images
8689
will occupy.
90+
float: str
91+
Alignment of the div containing each image in the row
8792
"""
8893

8994
image_row = ""
9095
for img, caption in zip(images, image_captions):
9196
with open(img, "rb") as image_file:
9297
data_uri = base64.b64encode(image_file.read()).decode('ascii')
93-
image_row += f"<div class=\"image\" style=\"float:center;width:{image_width//len(images)}%\">"
98+
image_row += f"<div class=\"image\" style=\"float:{float};width:{image_width//len(images)}%\">"
9499
image_row += f"<img src=\"data:image/png;base64,{data_uri}\" style=\"width:100%\">"
95100
image_row += f"<center><div style=\"font-weight:bold;font-size:12;padding-bottom:20px\">{caption}</div></center>"
96101
image_row += "</div>"
@@ -101,21 +106,32 @@ def add_paragraph(self, paragraph, font_size=14):
101106
"""
102107
Parameters
103108
----------
104-
title: paragraph
109+
paragraph: str
105110
The text to add to
106111
the report as a paragraph
107112
"""
108113

109114
self._body += f'<div style=\"font-size:{font_size}\"><p>{paragraph}</p></div>'
110115

116+
def add_line_breaks(self, num_breaks=1):
117+
"""
118+
Parameters
119+
----------
120+
num_breaks: paragraph
121+
The text to add to
122+
the report as a paragraph
123+
"""
124+
125+
for _ in range(num_breaks):
126+
self._body += '<br>'
127+
111128
def add_table(self, table):
112129
"""
113130
Parameters
114131
----------
115132
table: ResultTable
116133
The table we want to add
117134
"""
118-
119135
def table_style(border="1px solid black",
120136
padding="5px 10px",
121137
font_size="11pt",

0 commit comments

Comments
 (0)