1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414
15+ from typing import Any , Dict , List , Union , Tuple
1516from model_analyzer .model_analyzer_exceptions \
1617 import TritonModelAnalyzerException
1718from model_analyzer .record .types .perf_latency_avg import PerfLatencyAvg
3334from model_analyzer .record .types .perf_server_compute_output \
3435 import PerfServerComputeOutput
3536
37+ from model_analyzer .record .record import Record
38+ from model_analyzer .record .types .gpu_utilization import GPUUtilization
39+ from model_analyzer .record .types .gpu_power_usage import GPUPowerUsage
40+ from model_analyzer .record .types .gpu_used_memory import GPUUsedMemory
41+ from model_analyzer .record .types .gpu_total_memory import GPUTotalMemory
42+
3643from model_analyzer .constants import \
3744 INTERVAL_SLEEP_TIME , LOGGER_NAME , MEASUREMENT_REQUEST_COUNT_STEP , \
3845 MEASUREMENT_WINDOW_STEP , PERF_ANALYZER_MEASUREMENT_WINDOW , \
@@ -56,23 +63,33 @@ class PerfAnalyzer:
5663 with perf_analyzer.
5764 """
5865
66+ GPU_METRIC_UUID = 0
67+ GPU_METRIC_VALUE = 1
68+
5969 #yapf: disable
6070 PA_SUCCESS , PA_FAIL , PA_RETRY = 0 , 1 , 2
6171
6272 METRIC_TAG , CSV_STRING , RECORD_CLASS , REDUCTION_FACTOR = 0 , 1 , 2 , 3
6373 perf_metric_table = [
64- ["perf_latency_avg" , "Avg latency" , PerfLatencyAvg , 1000 ],
65- ["perf_latency_p90" , "p90 latency" , PerfLatencyP90 , 1000 ],
66- ["perf_latency_p95" , "p95 latency" , PerfLatencyP95 , 1000 ],
67- ["perf_latency_p99" , "p99 latency" , PerfLatencyP99 , 1000 ],
68- ["perf_throughput" , "Inferences/Second" , PerfThroughput , 1 ],
69- ["perf_client_send_recv" , "request/response" , PerfClientSendRecv , 1000 ],
70- ["perf_client_send_recv" , "send/recv" , PerfClientSendRecv , 1000 ],
71- ["perf_client_response_wait" , "response wait" , PerfClientResponseWait , 1000 ],
72- ["perf_server_queue" , "Server Queue" , PerfServerQueue , 1000 ],
73- ["perf_server_compute_infer" , "Server Compute Infer" , PerfServerComputeInfer , 1000 ],
74- ["perf_server_compute_input" , "Server Compute Input" , PerfServerComputeInput , 1000 ],
75- ["perf_server_compute_output" , "Server Compute Output" , PerfServerComputeOutput , 1000 ]
74+ ["perf_latency_avg" , "Avg latency" , PerfLatencyAvg , "1000" ],
75+ ["perf_latency_p90" , "p90 latency" , PerfLatencyP90 , "1000" ],
76+ ["perf_latency_p95" , "p95 latency" , PerfLatencyP95 , "1000" ],
77+ ["perf_latency_p99" , "p99 latency" , PerfLatencyP99 , "1000" ],
78+ ["perf_throughput" , "Inferences/Second" , PerfThroughput , "1" ],
79+ ["perf_client_send_recv" , "request/response" , PerfClientSendRecv , "1000" ],
80+ ["perf_client_send_recv" , "send/recv" , PerfClientSendRecv , "1000" ],
81+ ["perf_client_response_wait" , "response wait" , PerfClientResponseWait , "1000" ],
82+ ["perf_server_queue" , "Server Queue" , PerfServerQueue , "1000" ],
83+ ["perf_server_compute_infer" , "Server Compute Infer" , PerfServerComputeInfer , "1000" ],
84+ ["perf_server_compute_input" , "Server Compute Input" , PerfServerComputeInput , "1000" ],
85+ ["perf_server_compute_output" , "Server Compute Output" , PerfServerComputeOutput , "1000" ]
86+ ]
87+
88+ gpu_metric_table = [
89+ ["gpu_utilization" , "Avg GPU Utilizations" , GPUUtilization ],
90+ ["gpu_power_usage" , "Avg GPU Power Usages" , GPUPowerUsage ],
91+ ["gpu_used_memory" , "Max GPU Memory Usages" , GPUUsedMemory ],
92+ ["gpu_total_memory" , "Total GPU Memory Usages" , GPUTotalMemory ]
7693 ]
7794 #yapf: enable
7895
@@ -84,6 +101,14 @@ def get_perf_metrics():
84101 ]
85102 return perf_metrics
86103
104+ @staticmethod
105+ def get_gpu_metrics ():
106+ gpu_metrics = [
107+ gpu_metric [PerfAnalyzer .RECORD_CLASS ]
108+ for gpu_metric in PerfAnalyzer .gpu_metric_table
109+ ]
110+ return gpu_metrics
111+
87112 def __init__ (self , path , config , max_retries , timeout , max_cpu_util ):
88113 """
89114 Parameters
@@ -402,26 +427,75 @@ def _parse_outputs(self, metrics):
402427 ]:
403428 os .remove (perf_config ['latency-report-file' ])
404429
405- def _extract_metrics_from_row (self , requested_metrics , row_metrics ):
430+ def _extract_metrics_from_row (self , requested_metrics : List [Record ],
431+ row_metrics : Dict [str , str ]) -> List [Record ]:
406432 """
407433 Extracts the requested metrics from the CSV's row and creates a list of Records
408434 """
409- perf_records = []
410- for perf_metric in PerfAnalyzer .perf_metric_table :
411- if self ._is_perf_metric_requested_and_in_row (
412- perf_metric , requested_metrics , row_metrics ):
413- value = float (row_metrics [perf_metric [PerfAnalyzer .CSV_STRING ]]
414- ) / perf_metric [PerfAnalyzer .REDUCTION_FACTOR ]
435+ perf_records = self ._create_records_from_perf_metrics (
436+ requested_metrics , row_metrics )
437+
438+ gpu_records = self ._create_records_from_gpu_metrics (
439+ requested_metrics , row_metrics )
415440
416- perf_records .append (
417- perf_metric [PerfAnalyzer .RECORD_CLASS ](value ))
441+ return perf_records + gpu_records
442+
443+ def _create_records_from_perf_metrics (
444+ self , requested_metrics : List [Record ],
445+ row_metrics : Dict [str , str ]) -> List [Record ]:
446+ perf_records : List [Record ] = []
447+ for perf_metric in PerfAnalyzer .perf_metric_table :
448+ if self ._is_metric_requested_and_in_row (perf_metric ,
449+ requested_metrics ,
450+ row_metrics ):
451+ value = float (row_metrics [str (
452+ perf_metric [PerfAnalyzer .CSV_STRING ])])
453+ reduction_factor = float (
454+ str (perf_metric [PerfAnalyzer .REDUCTION_FACTOR ]))
455+ perf_value = value / reduction_factor
456+
457+ perf_records .append (perf_metric [PerfAnalyzer .RECORD_CLASS ](
458+ perf_value )) # type: ignore
418459
419460 return perf_records
420461
421- def _is_perf_metric_requested_and_in_row (self , perf_metric ,
422- requested_metrics , row_metrics ):
423- tag_match = any (
424- perf_metric [PerfAnalyzer .METRIC_TAG ] in requested_metric .tag
425- for requested_metric in requested_metrics )
462+ def _create_records_from_gpu_metrics (
463+ self , requested_metrics : List [Record ],
464+ row_metrics : Dict [str , str ]) -> List [Record ]:
465+ # GPU metrics have the following format: UUID0:value0;UUID1:value1;...
466+ gpu_records : List [Record ] = []
467+ for gpu_metric in PerfAnalyzer .gpu_metric_table :
468+ if self ._is_metric_requested_and_in_row (gpu_metric ,
469+ requested_metrics ,
470+ row_metrics ):
471+ gpu_metric_string = row_metrics [str (
472+ gpu_metric [PerfAnalyzer .CSV_STRING ])]
473+
474+ # Covers the case where PA didn't provide data
475+ if not gpu_metric_string :
476+ continue
477+
478+ # Needed because PA might terminate substring with a ;
479+ if gpu_metric_string and gpu_metric_string [- 1 ] == ';' :
480+ gpu_metric_string = gpu_metric_string [:- 1 ]
481+
482+ gpu_metric_string_tuples = gpu_metric_string .split (';' )
483+
484+ for gpu_metric_string_tuple in gpu_metric_string_tuples :
485+ gpu_metric_tuple = gpu_metric_string_tuple .split (':' )
486+
487+ gpu_records .append (gpu_metric [PerfAnalyzer .RECORD_CLASS ](
488+ value = float (
489+ gpu_metric_tuple [PerfAnalyzer .GPU_METRIC_VALUE ]),
490+ device_uuid = gpu_metric_tuple [
491+ PerfAnalyzer .GPU_METRIC_UUID ])) # type: ignore
492+
493+ return gpu_records
494+
495+ def _is_metric_requested_and_in_row (self , metric : List [object ],
496+ requested_metrics : List [Record ],
497+ row_metrics : Dict [str , str ]) -> bool :
498+ tag_match = any (metric [PerfAnalyzer .METRIC_TAG ] in requested_metric .tag
499+ for requested_metric in requested_metrics )
426500
427- return tag_match and perf_metric [PerfAnalyzer .CSV_STRING ] in row_metrics
501+ return tag_match and metric [PerfAnalyzer .CSV_STRING ] in row_metrics
0 commit comments