Skip to content

Commit 6b17e6c

Browse files
authored
Merge pull request #95 from huggingface/feat/metrics-concurrent-connection
feat(metrics): add /metrics which returns prometheus metrics
2 parents 38a7611 + 05868e5 commit 6b17e6c

File tree

1 file changed

+14
-1
lines changed

1 file changed

+14
-1
lines changed

src/huggingface_inference_toolkit/webservice_starlette.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from starlette.responses import PlainTextResponse, Response
88
from starlette.routing import Route
99

10-
from huggingface_inference_toolkit.async_utils import async_handler_call
10+
from huggingface_inference_toolkit.async_utils import MAX_CONCURRENT_THREADS, MAX_THREADS_GUARD, async_handler_call
1111
from huggingface_inference_toolkit.const import (
1212
HF_FRAMEWORK,
1313
HF_HUB_TOKEN,
@@ -69,6 +69,18 @@ async def health(request):
6969
return PlainTextResponse("Ok")
7070

7171

72+
# Report Prometheus metrics
73+
# inf_batch_current_size: Current number of requests being processed
74+
# inf_queue_size: Number of requests waiting in the queue
75+
async def metrics(request):
76+
batch_current_size = MAX_CONCURRENT_THREADS - MAX_THREADS_GUARD.value
77+
queue_size = MAX_THREADS_GUARD.statistics().tasks_waiting
78+
return PlainTextResponse(
79+
f"inf_batch_current_size {batch_current_size}\n" +
80+
f"inf_queue_size {queue_size}\n"
81+
)
82+
83+
7284
async def predict(request):
7385
try:
7486
# extracts content from request
@@ -143,6 +155,7 @@ async def predict(request):
143155
Route("/health", health, methods=["GET"]),
144156
Route("/", predict, methods=["POST"]),
145157
Route("/predict", predict, methods=["POST"]),
158+
Route("/metrics", metrics, methods=["GET"]),
146159
],
147160
on_startup=[prepare_model_artifacts],
148161
)

0 commit comments

Comments
 (0)