Skip to content

Commit 70a0eee

Browse files
authored
test: Tests for Metrics API enhancement to include error counters (#7423)
1 parent 3dbf09e commit 70a0eee

File tree

4 files changed

+144
-3
lines changed

4 files changed

+144
-3
lines changed

docs/user_guide/metrics.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,25 @@ Count*. The count metrics are illustrated by the following examples:
100100
| |Execution Count |`nv_inference_exec_count` |Number of inference batch executions (see [Inference Request Metrics](#inference-request-metrics), does not include cached requests)|Per model|Per request|
101101
| |Pending Request Count |`nv_inference_pending_request_count` |Number of inference requests awaiting execution by a backend. This number is incremented when a request is enqueued to the server (`TRITONSERVER_ServerInferAsync`) and is decremented when a backend is about to start executing the request. More details can be found below. |Per model|Per request|
102102

103+
#### Failure Count Categories
104+
105+
| Failed Request Reason |Description |
106+
|------------|------------|
107+
| REJECTED | Number of inference failures due to request timeout in the schedular. |
108+
| CANCELED | Number of inference failures due to request cancellation in the core. |
109+
| BACKEND | Number of inference failures during execution of requests in the backend/model. |
110+
| OTHER | Number of inference failures due to other uncategorized reasons in the core. |
111+
112+
> **Note**
113+
>
114+
> Ensemble failure metrics will reflect the failure counts of their composing models as well as the parent model, but currently do not capture the same granularity for the "reason" label and will default to the "OTHER" reason.
115+
>
116+
> For example, if EnsembleA contains ModelA, and ModelA experiences a failed request due to a queue/backlog timeout in the scheduler, ModelA will have a failed request metric reflecting `reason=REJECTED` and `count=1`.
117+
> Additionally, EnsembleA will have a failed request metric reflecting `reason=OTHER` and `count=2`.
118+
> The `count=2` reflects 1 from the internally failed request captured by ModelA, as well as 1 from the failed top-level request sent to EnsembleA by the user/client.
119+
> The `reason=OTHER` reflects that fact that the ensemble doesn't currently capture the specific reason why
120+
> ModelA's request failed at this time.
121+
103122
#### Pending Request Count (Queue Size) Per-Model
104123

105124
The *Pending Request Count* reflects the number of requests that have been

qa/L0_backend_python/lifecycle/lifecycle_test.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@
2727
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828

2929
import os
30+
import re
3031
import sys
3132

33+
import requests
34+
3235
sys.path.append("../../common")
3336

3437
import queue
@@ -63,6 +66,29 @@ class LifecycleTest(unittest.TestCase):
6366
def setUp(self):
6467
self._shm_leak_detector = shm_util.ShmLeakDetector()
6568

69+
def _get_metrics(self):
70+
metrics_url = "http://localhost:8002/metrics"
71+
r = requests.get(metrics_url)
72+
r.raise_for_status()
73+
return r.text
74+
75+
def _metrics_before_test(self, model, reason):
76+
pattern = rf'nv_inference_request_failure\{{model="{model}",reason="{reason}",version="1"\}} (\d+)'
77+
metrics = self._get_metrics()
78+
match = re.search(pattern, metrics)
79+
if match:
80+
return int(match.group(1))
81+
else:
82+
raise Exception(f"Failure metrics for model='{model}' not found")
83+
84+
def _assert_metrics(
85+
self, model_name, reason, expected_count_increase, initial_count
86+
):
87+
metrics = self._get_metrics()
88+
# Add initial count + expected count for the the test
89+
expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="{reason}",version="1"}} {expected_count_increase + initial_count}'
90+
self.assertIn(expected_metric, metrics)
91+
6692
def test_error_code(self):
6793
model_name = "error_code"
6894
shape = [1, 1]
@@ -181,7 +207,7 @@ def test_batch_error(self):
181207
def test_infer_pymodel_error(self):
182208
model_name = "wrong_model"
183209
shape = [2, 2]
184-
210+
initial_metrics_value = self._metrics_before_test(model_name, "BACKEND")
185211
with self._shm_leak_detector.Probe() as shm_probe:
186212
with httpclient.InferenceServerClient(
187213
f"{_tritonserver_ipaddr}:8000"
@@ -207,6 +233,13 @@ def test_infer_pymodel_error(self):
207233
self.assertTrue(
208234
False, "Wrong exception raised or did not raise an exception"
209235
)
236+
expected_count_increase = 1
237+
self._assert_metrics(
238+
model_name,
239+
"BACKEND",
240+
expected_count_increase,
241+
initial_metrics_value,
242+
)
210243

211244

212245
if __name__ == "__main__":

qa/L0_model_queue/model_queue_test.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22

3-
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
44
#
55
# Redistribution and use in source and binary forms, with or without
66
# modification, are permitted provided that the following conditions
@@ -30,6 +30,7 @@
3030

3131
sys.path.append("../common")
3232

33+
import re
3334
import threading
3435
import time
3536
import unittest
@@ -38,6 +39,7 @@
3839

3940
import infer_util as iu
4041
import numpy as np
42+
import requests
4143
import test_util as tu
4244
from tritonclientutils import InferenceServerException
4345

@@ -69,6 +71,29 @@ def check_deferred_exception(self):
6971
_deferred_exceptions.pop(0)
7072
raise first_exception
7173

74+
def _get_metrics(self):
75+
metrics_url = "http://localhost:8002/metrics"
76+
r = requests.get(metrics_url)
77+
r.raise_for_status()
78+
return r.text
79+
80+
def _metrics_before_test(self, model, reason):
81+
pattern = rf'nv_inference_request_failure\{{model="{model}",reason="{reason}",version="1"\}} (\d+)'
82+
metrics = self._get_metrics()
83+
match = re.search(pattern, metrics)
84+
if match:
85+
return int(match.group(1))
86+
else:
87+
raise Exception(f"Failure metrics for model='{model}' not found")
88+
89+
def _assert_metrics(
90+
self, model_name, reason, expected_count_increase, initial_count
91+
):
92+
metrics = self._get_metrics()
93+
# Add initial count + expected count for the the test
94+
expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="{reason}",version="1"}} {expected_count_increase + initial_count}'
95+
self.assertIn(expected_metric, metrics)
96+
7297
def check_response(
7398
self,
7499
bs,
@@ -235,6 +260,12 @@ def test_policy_reject(self):
235260
# requests are sent after 'default_timeout_microseconds'.
236261
# Expect the first request is timed-out and rejected, which makes the
237262
# second and third request be batched together and executed.
263+
initial_metrics_value_ensemble = self._metrics_before_test(
264+
"ensemble_zero_1_float32", "OTHER"
265+
)
266+
initial_metrics_value_custom = self._metrics_before_test(
267+
"custom_zero_1_float32", "REJECTED"
268+
)
238269
dtype = np.float32
239270
shapes = ([16],)
240271
for trial in self.trials_:
@@ -283,6 +314,23 @@ def test_policy_reject(self):
283314
self.check_deferred_exception()
284315
except InferenceServerException as ex:
285316
self.assertTrue(False, "unexpected error {}".format(ex))
317+
expected_count_increase = 4
318+
# NOTE: Ensemble failure metrics will reflect the failure counts
319+
# of their composing models as well as the parent model, but currently do not capture the same granularity
320+
# for the "reason" label and will default to the "OTHER" reason.
321+
self._assert_metrics(
322+
"ensemble_zero_1_float32",
323+
"OTHER",
324+
expected_count_increase,
325+
initial_metrics_value_ensemble,
326+
)
327+
expected_count_increase = 4
328+
self._assert_metrics(
329+
"custom_zero_1_float32",
330+
"REJECTED",
331+
expected_count_increase,
332+
initial_metrics_value_custom,
333+
)
286334

287335
def test_timeout_override(self):
288336
# Send requests with batch sizes 1, 1, 3 where the first request

qa/L0_request_cancellation/scheduler_test.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22

3-
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
44
#
55
# Redistribution and use in source and binary forms, with or without
66
# modification, are permitted provided that the following conditions
@@ -27,10 +27,12 @@
2727
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828

2929
import concurrent.futures
30+
import re
3031
import time
3132
import unittest
3233

3334
import numpy as np
35+
import requests
3436
import tritonclient.grpc as grpcclient
3537
from tritonclient.utils import InferenceServerException
3638

@@ -84,6 +86,29 @@ def _assert_streaming_response_is_cancelled(self, response):
8486
cancelled_count += 1
8587
self.assertEqual(cancelled_count, 1)
8688

89+
def _get_metrics(self):
90+
metrics_url = "http://localhost:8002/metrics"
91+
r = requests.get(metrics_url)
92+
r.raise_for_status()
93+
return r.text
94+
95+
def _metrics_before_test(self, model, reason):
96+
pattern = rf'nv_inference_request_failure\{{model="{model}",reason="{reason}",version="1"\}} (\d+)'
97+
metrics = self._get_metrics()
98+
match = re.search(pattern, metrics)
99+
if match:
100+
return int(match.group(1))
101+
else:
102+
raise Exception(f"Failure metrics for model='{model}' not found")
103+
104+
def _assert_metrics(
105+
self, model_name, reason, expected_count_increase, initial_count
106+
):
107+
metrics = self._get_metrics()
108+
# Add initial count + expected count for the the test
109+
expected_metric = f'nv_inference_request_failure{{model="{model_name}",reason="{reason}",version="1"}} {expected_count_increase + initial_count}'
110+
self.assertIn(expected_metric, metrics)
111+
87112
# Test queued requests on dynamic batch scheduler can be cancelled
88113
def test_dynamic_batch_scheduler_request_cancellation(self):
89114
model_name = "dynamic_batch"
@@ -114,6 +139,7 @@ def test_dynamic_batch_scheduler_request_cancellation(self):
114139
# Test backlogged requests on sequence batch scheduler can be cancelled
115140
def test_sequence_batch_scheduler_backlog_request_cancellation(self):
116141
model_name = "sequence_direct"
142+
initial_metrics_value = self._metrics_before_test(model_name, "CANCELED")
117143
with concurrent.futures.ThreadPoolExecutor() as pool:
118144
# Saturate the single sequence slot
119145
saturate_thread = pool.submit(
@@ -149,11 +175,26 @@ def test_sequence_batch_scheduler_backlog_request_cancellation(self):
149175
self._assert_response_is_cancelled(backlog_requests[1]["response"])
150176
# Join saturating thread
151177
saturate_thread.result()
178+
expected_count_increase = 2
179+
self._assert_metrics(
180+
model_name,
181+
"CANCELED",
182+
expected_count_increase,
183+
initial_metrics_value,
184+
)
152185

153186
# Test queued requests on direct sequence batch scheduler can be cancelled
154187
def test_direct_sequence_batch_scheduler_request_cancellation(self):
155188
model_name = "sequence_direct"
189+
initial_metrics_value = self._metrics_before_test(model_name, "CANCELED")
156190
self._test_sequence_batch_scheduler_queued_request_cancellation(model_name)
191+
expected_count_increase = 2
192+
self._assert_metrics(
193+
model_name,
194+
"CANCELED",
195+
expected_count_increase,
196+
initial_metrics_value,
197+
)
157198

158199
# Test queued requests on oldest sequence batch scheduler can be cancelled
159200
def test_oldest_sequence_batch_scheduler_request_cancellation(self):

0 commit comments

Comments
 (0)